Skip to content

Commit

Permalink
Add '--checksum' attribute to maintain the exported model data
Browse files Browse the repository at this point in the history
  • Loading branch information
nok committed Dec 3, 2017
1 parent 0669645 commit cd12827
Show file tree
Hide file tree
Showing 11 changed files with 138 additions and 39 deletions.
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ First of all have a quick view on the available arguments:
$ python -m sklearn_porter [-h] --input <PICKLE_FILE> [--output <DEST_DIR>] \
[--class_name <CLASS_NAME>] [--method_name <METHOD_NAME>] \
[--c] [--java] [--js] [--go] [--php] [--ruby] \
[--export] [--pipe]
[--export] [--checksum] [--pipe]
```

The following example shows how you can save an trained estimator to the [pickle format](http://scikit-learn.org/stable/modules/model_persistence.html#persistence-example):
Expand Down
13 changes: 11 additions & 2 deletions sklearn_porter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ def parse_args(args):
default=False,
action='store_true',
help='Whether to export the model data or not.')
optional.add_argument(
'--checksum',
required=False,
default=False,
action='store_true',
help='Whether to append the checksum to the filename or not.')
optional.add_argument(
'--pipe', '-p',
required=False,
Expand Down Expand Up @@ -106,13 +112,16 @@ def main():

# Port estimator:
try:
porter = Porter(estimator, language=language)
class_name = args.get('class_name')
method_name = args.get('method_name')
with_export = bool(args.get('export'))
with_checksum = bool(args.get('checksum'))
porter = Porter(estimator, language=language)
output = porter.export(class_name=class_name,
method_name=method_name,
export_dir=dest_dir,
export_data=bool(args.get('export')),
export_data=with_export,
export_append_checksum=with_checksum,
details=True)
except Exception as e:
sys.exit('Error: {}'.format(str(e)))
Expand Down
18 changes: 14 additions & 4 deletions sklearn_porter/estimator/classifier/AdaBoostClassifier/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(self, estimator, target_language='java',

def export(self, class_name, method_name,
export_data=False, export_dir='.', export_filename='data.json',
embed_data=True, **kwargs):
export_append_checksum=False, embed_data=True, **kwargs):
"""
Port a trained estimator to the syntax of a chosen programming language.
Expand All @@ -101,6 +101,8 @@ def export(self, class_name, method_name,
The directory where the model data should be saved.
:param export_filename : string
The filename of the exported model data.
:param export_append_checksum : bool
Whether to append the checksum to the filename or not.
:param embed_data : bool
Whether the model data should be embedded in the template or not.
Expand Down Expand Up @@ -128,7 +130,8 @@ def export(self, class_name, method_name,
if self.target_method == 'predict':
# Exported:
if export_data and os.path.isdir(export_dir):
self.export_data(export_dir, export_filename)
self.export_data(export_dir, export_filename,
export_append_checksum)
return self.predict('exported')
# Embedded:
return self.predict('embedded')
Expand Down Expand Up @@ -158,7 +161,7 @@ def predict(self, temp_type):
meth = self.create_embedded_meth()
return self.create_embedded_class(meth)

def export_data(self, directory, filename):
def export_data(self, directory, filename, with_md5_hash=False):
"""
Save model data in a JSON file.
Expand All @@ -168,6 +171,8 @@ def export_data(self, directory, filename):
The directory.
:param filename : string
The filename.
:param with_md5_hash : bool
Whether to append the checksum to the filename or not.
"""
model_data = []
for est in self.estimators:
Expand All @@ -179,9 +184,14 @@ def export_data(self, directory, filename):
'indices': est.tree_.feature.tolist()
})
encoder.FLOAT_REPR = lambda o: self.repr(o)
json_data = json.dumps(model_data, sort_keys=True)
if with_md5_hash:
import hashlib
json_hash = hashlib.md5(json_data).hexdigest()
filename = filename.split('.json')[0] + '_' + json_hash + '.json'
path = os.path.join(directory, filename)
with open(path, 'w') as fp:
json.dump(model_data, fp)
fp.write(json_data)

def create_branches(self, left_nodes, right_nodes, threshold,
value, features, node, depth, init=False):
Expand Down
18 changes: 14 additions & 4 deletions sklearn_porter/estimator/classifier/BernoulliNB/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(self, estimator, target_language='java',

def export(self, class_name, method_name,
export_data=False, export_dir='.', export_filename='data.json',
**kwargs):
export_append_checksum=False, **kwargs):
"""
Port a trained estimator to the syntax of a chosen programming language.
Expand All @@ -75,6 +75,8 @@ def export(self, class_name, method_name,
The directory where the model data should be saved.
:param export_filename : string
The filename of the exported model data.
:param export_append_checksum : bool
Whether to append the checksum to the filename or not.
Returns
-------
Expand Down Expand Up @@ -127,7 +129,8 @@ def export(self, class_name, method_name,
if self.target_method == 'predict':
# Exported:
if export_data and os.path.isdir(export_dir):
self.export_data(export_dir, export_filename)
self.export_data(export_dir, export_filename,
export_append_checksum)
return self.predict('exported')
# Separated:
return self.predict('separated')
Expand Down Expand Up @@ -156,7 +159,7 @@ def predict(self, temp_type):
method = self.create_method()
return self.create_class(method)

def export_data(self, directory, filename):
def export_data(self, directory, filename, with_md5_hash=False):
"""
Save model data in a JSON file.
Expand All @@ -166,6 +169,8 @@ def export_data(self, directory, filename):
The directory.
:param filename : string
The filename.
:param with_md5_hash : bool
Whether to append the checksum to the filename or not.
"""
neg_prob = np.log(1 - np.exp(self.estimator.feature_log_prob_))
delta_probs = (self.estimator.feature_log_prob_ - neg_prob).T
Expand All @@ -175,9 +180,14 @@ def export_data(self, directory, filename):
'delProbs': delta_probs.tolist()
}
encoder.FLOAT_REPR = lambda o: self.repr(o)
json_data = json.dumps(model_data, sort_keys=True)
if with_md5_hash:
import hashlib
json_hash = hashlib.md5(json_data).hexdigest()
filename = filename.split('.json')[0] + '_' + json_hash + '.json'
path = os.path.join(directory, filename)
with open(path, 'w') as fp:
json.dump(model_data, fp)
fp.write(json_data)

def create_method(self):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def __init__(self, estimator, target_language='java',

def export(self, class_name, method_name,
export_data=False, export_dir='.', export_filename='data.json',
embed_data=False, **kwargs):
export_append_checksum=False, embed_data=False, **kwargs):
"""
Port a trained estimator to the syntax of a chosen programming language.
Expand All @@ -127,6 +127,8 @@ def export(self, class_name, method_name,
The directory where the model data should be saved.
:param export_filename : string
The filename of the exported model data.
:param export_append_checksum : bool
Whether to append the checksum to the filename or not.
:param embed_data : bool
Whether the model data should be embedded in the template or not.
Expand Down Expand Up @@ -185,15 +187,16 @@ def export(self, class_name, method_name,
if self.target_method == 'predict':
# Exported:
if export_data and os.path.isdir(export_dir):
self.export_data(export_dir, export_filename)
self.export_data(export_dir, export_filename,
export_append_checksum)
return self.predict('exported')
# Embedded:
if embed_data:
return self.predict('embedded')
# Separated:
return self.predict('separated')

def export_data(self, directory, filename):
def export_data(self, directory, filename, with_md5_hash=False):
"""
Save model data in a JSON file.
Expand All @@ -203,6 +206,8 @@ def export_data(self, directory, filename):
The directory.
:param filename : string
The filename.
:param with_md5_hash : bool
Whether to append the checksum to the filename or not.
"""
model_data = {
'leftChilds': self.estimator.tree_.children_left.tolist(),
Expand All @@ -212,9 +217,14 @@ def export_data(self, directory, filename):
'classes': [c[0] for c in self.estimator.tree_.value.tolist()]
}
encoder.FLOAT_REPR = lambda o: self.repr(o)
json_data = json.dumps(model_data, sort_keys=True)
if with_md5_hash:
import hashlib
json_hash = hashlib.md5(json_data).hexdigest()
filename = filename.split('.json')[0] + '_' + json_hash + '.json'
path = os.path.join(directory, filename)
with open(path, 'w') as fp:
json.dump(model_data, fp)
fp.write(json_data)

def predict(self, temp_type='separated'):
"""
Expand Down
18 changes: 14 additions & 4 deletions sklearn_porter/estimator/classifier/GaussianNB/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self, estimator, target_language='java',

def export(self, class_name, method_name,
export_data=False, export_dir='.', export_filename='data.json',
**kwargs):
export_append_checksum=False, **kwargs):
"""
Port a trained estimator to the syntax of a chosen programming language.
Expand All @@ -74,6 +74,8 @@ def export(self, class_name, method_name,
The directory where the model data should be saved.
:param export_filename : string
The filename of the exported model data.
:param export_append_checksum : bool
Whether to append the checksum to the filename or not.
Returns
-------
Expand Down Expand Up @@ -124,7 +126,8 @@ def export(self, class_name, method_name,
if self.target_method == 'predict':
# Exported:
if export_data and os.path.isdir(export_dir):
self.export_data(export_dir, export_filename)
self.export_data(export_dir, export_filename,
export_append_checksum)
return self.predict('exported')
# Separated:
return self.predict('separated')
Expand Down Expand Up @@ -152,7 +155,7 @@ def predict(self, temp_type):
method = self.create_method()
return self.create_class(method)

def export_data(self, directory, filename):
def export_data(self, directory, filename, with_md5_hash=False):
"""
Save model data in a JSON file.
Expand All @@ -162,16 +165,23 @@ def export_data(self, directory, filename):
The directory.
:param filename : string
The filename.
:param with_md5_hash : bool
Whether to append the checksum to the filename or not.
"""
model_data = {
'priors': self.estimator.class_prior_.tolist(),
'sigmas': self.estimator.sigma_.tolist(),
'thetas': self.estimator.theta_.tolist()
}
encoder.FLOAT_REPR = lambda o: self.repr(o)
json_data = json.dumps(model_data, sort_keys=True)
if with_md5_hash:
import hashlib
json_hash = hashlib.md5(json_data).hexdigest()
filename = filename.split('.json')[0] + '_' + json_hash + '.json'
path = os.path.join(directory, filename)
with open(path, 'w') as fp:
json.dump(model_data, fp)
fp.write(json_data)

def create_method(self):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def __init__(self, estimator, target_language='java',

def export(self, class_name, method_name,
export_data=False, export_dir='.', export_filename='data.json',
**kwargs):
export_append_checksum=False, **kwargs):
"""
Port a trained estimator to the syntax of a chosen programming language.
Expand All @@ -78,6 +78,8 @@ def export(self, class_name, method_name,
The directory where the model data should be saved.
:param export_filename : string
The filename of the exported model data.
:param export_append_checksum : bool
Whether to append the checksum to the filename or not.
Returns
-------
Expand Down Expand Up @@ -110,12 +112,13 @@ def export(self, class_name, method_name,
if self.target_method == 'predict':
# Exported:
if export_data and os.path.isdir(export_dir):
self.export_data(export_dir, export_filename)
self.export_data(export_dir, export_filename,
export_append_checksum)
return self.predict('exported')
# Separated:
return self.predict('separated')

def export_data(self, directory, filename):
def export_data(self, directory, filename, with_md5_hash=False):
"""
Save model data in a JSON file.
Expand All @@ -125,6 +128,8 @@ def export_data(self, directory, filename):
The directory.
:param filename : string
The filename.
:param with_md5_hash : bool
Whether to append the checksum to the filename or not.
"""
model_data = {
'X': self.estimator._fit_X.tolist(), # pylint: disable=W0212
Expand All @@ -134,9 +139,14 @@ def export_data(self, directory, filename):
'power': self.power_param
}
encoder.FLOAT_REPR = lambda o: self.repr(o)
json_data = json.dumps(model_data, sort_keys=True)
if with_md5_hash:
import hashlib
json_hash = hashlib.md5(json_data).hexdigest()
filename = filename.split('.json')[0] + '_' + json_hash + '.json'
path = os.path.join(directory, filename)
with open(path, 'w') as fp:
json.dump(model_data, fp)
fp.write(json_data)

def predict(self, temp_type):
"""
Expand Down
Loading

0 comments on commit cd12827

Please sign in to comment.