From cd12827cb136d0711c555fe321f72d6aa8326143 Mon Sep 17 00:00:00 2001 From: Darius Morawiec Date: Sun, 3 Dec 2017 18:42:33 +0100 Subject: [PATCH] Add '--checksum' attribute to maintain the exported model data --- readme.md | 2 +- sklearn_porter/__main__.py | 13 +++++++++++-- .../classifier/AdaBoostClassifier/__init__.py | 18 ++++++++++++++---- .../classifier/BernoulliNB/__init__.py | 18 ++++++++++++++---- .../DecisionTreeClassifier/__init__.py | 18 ++++++++++++++---- .../classifier/GaussianNB/__init__.py | 18 ++++++++++++++---- .../KNeighborsClassifier/__init__.py | 18 ++++++++++++++---- .../estimator/classifier/LinearSVC/__init__.py | 18 ++++++++++++++---- .../classifier/MLPClassifier/__init__.py | 18 ++++++++++++++---- .../RandomForestClassifier/__init__.py | 18 ++++++++++++++---- .../estimator/classifier/SVC/__init__.py | 18 ++++++++++++++---- 11 files changed, 138 insertions(+), 39 deletions(-) diff --git a/readme.md b/readme.md index 38b89285..279a6174 100644 --- a/readme.md +++ b/readme.md @@ -233,7 +233,7 @@ First of all have a quick view on the available arguments: $ python -m sklearn_porter [-h] --input [--output ] \ [--class_name ] [--method_name ] \ [--c] [--java] [--js] [--go] [--php] [--ruby] \ - [--export] [--pipe] + [--export] [--checksum] [--pipe] ``` The following example shows how you can save an trained estimator to the [pickle format](http://scikit-learn.org/stable/modules/model_persistence.html#persistence-example): diff --git a/sklearn_porter/__main__.py b/sklearn_porter/__main__.py index cc72c722..c29c19ff 100644 --- a/sklearn_porter/__main__.py +++ b/sklearn_porter/__main__.py @@ -47,6 +47,12 @@ def parse_args(args): default=False, action='store_true', help='Whether to export the model data or not.') + optional.add_argument( + '--checksum', + required=False, + default=False, + action='store_true', + help='Whether to append the checksum to the filename or not.') optional.add_argument( '--pipe', '-p', required=False, @@ -106,13 +112,16 @@ def main(): # Port estimator: try: - porter = Porter(estimator, language=language) class_name = args.get('class_name') method_name = args.get('method_name') + with_export = bool(args.get('export')) + with_checksum = bool(args.get('checksum')) + porter = Porter(estimator, language=language) output = porter.export(class_name=class_name, method_name=method_name, export_dir=dest_dir, - export_data=bool(args.get('export')), + export_data=with_export, + export_append_checksum=with_checksum, details=True) except Exception as e: sys.exit('Error: {}'.format(str(e))) diff --git a/sklearn_porter/estimator/classifier/AdaBoostClassifier/__init__.py b/sklearn_porter/estimator/classifier/AdaBoostClassifier/__init__.py index 0a1a49cf..2394fdc5 100644 --- a/sklearn_porter/estimator/classifier/AdaBoostClassifier/__init__.py +++ b/sklearn_porter/estimator/classifier/AdaBoostClassifier/__init__.py @@ -85,7 +85,7 @@ def __init__(self, estimator, target_language='java', def export(self, class_name, method_name, export_data=False, export_dir='.', export_filename='data.json', - embed_data=True, **kwargs): + export_append_checksum=False, embed_data=True, **kwargs): """ Port a trained estimator to the syntax of a chosen programming language. @@ -101,6 +101,8 @@ def export(self, class_name, method_name, The directory where the model data should be saved. :param export_filename : string The filename of the exported model data. + :param export_append_checksum : bool + Whether to append the checksum to the filename or not. :param embed_data : bool Whether the model data should be embedded in the template or not. @@ -128,7 +130,8 @@ def export(self, class_name, method_name, if self.target_method == 'predict': # Exported: if export_data and os.path.isdir(export_dir): - self.export_data(export_dir, export_filename) + self.export_data(export_dir, export_filename, + export_append_checksum) return self.predict('exported') # Embedded: return self.predict('embedded') @@ -158,7 +161,7 @@ def predict(self, temp_type): meth = self.create_embedded_meth() return self.create_embedded_class(meth) - def export_data(self, directory, filename): + def export_data(self, directory, filename, with_md5_hash=False): """ Save model data in a JSON file. @@ -168,6 +171,8 @@ def export_data(self, directory, filename): The directory. :param filename : string The filename. + :param with_md5_hash : bool + Whether to append the checksum to the filename or not. """ model_data = [] for est in self.estimators: @@ -179,9 +184,14 @@ def export_data(self, directory, filename): 'indices': est.tree_.feature.tolist() }) encoder.FLOAT_REPR = lambda o: self.repr(o) + json_data = json.dumps(model_data, sort_keys=True) + if with_md5_hash: + import hashlib + json_hash = hashlib.md5(json_data).hexdigest() + filename = filename.split('.json')[0] + '_' + json_hash + '.json' path = os.path.join(directory, filename) with open(path, 'w') as fp: - json.dump(model_data, fp) + fp.write(json_data) def create_branches(self, left_nodes, right_nodes, threshold, value, features, node, depth, init=False): diff --git a/sklearn_porter/estimator/classifier/BernoulliNB/__init__.py b/sklearn_porter/estimator/classifier/BernoulliNB/__init__.py index 20fbc26c..a1b830c7 100644 --- a/sklearn_porter/estimator/classifier/BernoulliNB/__init__.py +++ b/sklearn_porter/estimator/classifier/BernoulliNB/__init__.py @@ -59,7 +59,7 @@ def __init__(self, estimator, target_language='java', def export(self, class_name, method_name, export_data=False, export_dir='.', export_filename='data.json', - **kwargs): + export_append_checksum=False, **kwargs): """ Port a trained estimator to the syntax of a chosen programming language. @@ -75,6 +75,8 @@ def export(self, class_name, method_name, The directory where the model data should be saved. :param export_filename : string The filename of the exported model data. + :param export_append_checksum : bool + Whether to append the checksum to the filename or not. Returns ------- @@ -127,7 +129,8 @@ def export(self, class_name, method_name, if self.target_method == 'predict': # Exported: if export_data and os.path.isdir(export_dir): - self.export_data(export_dir, export_filename) + self.export_data(export_dir, export_filename, + export_append_checksum) return self.predict('exported') # Separated: return self.predict('separated') @@ -156,7 +159,7 @@ def predict(self, temp_type): method = self.create_method() return self.create_class(method) - def export_data(self, directory, filename): + def export_data(self, directory, filename, with_md5_hash=False): """ Save model data in a JSON file. @@ -166,6 +169,8 @@ def export_data(self, directory, filename): The directory. :param filename : string The filename. + :param with_md5_hash : bool + Whether to append the checksum to the filename or not. """ neg_prob = np.log(1 - np.exp(self.estimator.feature_log_prob_)) delta_probs = (self.estimator.feature_log_prob_ - neg_prob).T @@ -175,9 +180,14 @@ def export_data(self, directory, filename): 'delProbs': delta_probs.tolist() } encoder.FLOAT_REPR = lambda o: self.repr(o) + json_data = json.dumps(model_data, sort_keys=True) + if with_md5_hash: + import hashlib + json_hash = hashlib.md5(json_data).hexdigest() + filename = filename.split('.json')[0] + '_' + json_hash + '.json' path = os.path.join(directory, filename) with open(path, 'w') as fp: - json.dump(model_data, fp) + fp.write(json_data) def create_method(self): """ diff --git a/sklearn_porter/estimator/classifier/DecisionTreeClassifier/__init__.py b/sklearn_porter/estimator/classifier/DecisionTreeClassifier/__init__.py index 430ff6f7..abc7d2a7 100644 --- a/sklearn_porter/estimator/classifier/DecisionTreeClassifier/__init__.py +++ b/sklearn_porter/estimator/classifier/DecisionTreeClassifier/__init__.py @@ -111,7 +111,7 @@ def __init__(self, estimator, target_language='java', def export(self, class_name, method_name, export_data=False, export_dir='.', export_filename='data.json', - embed_data=False, **kwargs): + export_append_checksum=False, embed_data=False, **kwargs): """ Port a trained estimator to the syntax of a chosen programming language. @@ -127,6 +127,8 @@ def export(self, class_name, method_name, The directory where the model data should be saved. :param export_filename : string The filename of the exported model data. + :param export_append_checksum : bool + Whether to append the checksum to the filename or not. :param embed_data : bool Whether the model data should be embedded in the template or not. @@ -185,7 +187,8 @@ def export(self, class_name, method_name, if self.target_method == 'predict': # Exported: if export_data and os.path.isdir(export_dir): - self.export_data(export_dir, export_filename) + self.export_data(export_dir, export_filename, + export_append_checksum) return self.predict('exported') # Embedded: if embed_data: @@ -193,7 +196,7 @@ def export(self, class_name, method_name, # Separated: return self.predict('separated') - def export_data(self, directory, filename): + def export_data(self, directory, filename, with_md5_hash=False): """ Save model data in a JSON file. @@ -203,6 +206,8 @@ def export_data(self, directory, filename): The directory. :param filename : string The filename. + :param with_md5_hash : bool + Whether to append the checksum to the filename or not. """ model_data = { 'leftChilds': self.estimator.tree_.children_left.tolist(), @@ -212,9 +217,14 @@ def export_data(self, directory, filename): 'classes': [c[0] for c in self.estimator.tree_.value.tolist()] } encoder.FLOAT_REPR = lambda o: self.repr(o) + json_data = json.dumps(model_data, sort_keys=True) + if with_md5_hash: + import hashlib + json_hash = hashlib.md5(json_data).hexdigest() + filename = filename.split('.json')[0] + '_' + json_hash + '.json' path = os.path.join(directory, filename) with open(path, 'w') as fp: - json.dump(model_data, fp) + fp.write(json_data) def predict(self, temp_type='separated'): """ diff --git a/sklearn_porter/estimator/classifier/GaussianNB/__init__.py b/sklearn_porter/estimator/classifier/GaussianNB/__init__.py index 9213a04f..a47f8ced 100644 --- a/sklearn_porter/estimator/classifier/GaussianNB/__init__.py +++ b/sklearn_porter/estimator/classifier/GaussianNB/__init__.py @@ -58,7 +58,7 @@ def __init__(self, estimator, target_language='java', def export(self, class_name, method_name, export_data=False, export_dir='.', export_filename='data.json', - **kwargs): + export_append_checksum=False, **kwargs): """ Port a trained estimator to the syntax of a chosen programming language. @@ -74,6 +74,8 @@ def export(self, class_name, method_name, The directory where the model data should be saved. :param export_filename : string The filename of the exported model data. + :param export_append_checksum : bool + Whether to append the checksum to the filename or not. Returns ------- @@ -124,7 +126,8 @@ def export(self, class_name, method_name, if self.target_method == 'predict': # Exported: if export_data and os.path.isdir(export_dir): - self.export_data(export_dir, export_filename) + self.export_data(export_dir, export_filename, + export_append_checksum) return self.predict('exported') # Separated: return self.predict('separated') @@ -152,7 +155,7 @@ def predict(self, temp_type): method = self.create_method() return self.create_class(method) - def export_data(self, directory, filename): + def export_data(self, directory, filename, with_md5_hash=False): """ Save model data in a JSON file. @@ -162,6 +165,8 @@ def export_data(self, directory, filename): The directory. :param filename : string The filename. + :param with_md5_hash : bool + Whether to append the checksum to the filename or not. """ model_data = { 'priors': self.estimator.class_prior_.tolist(), @@ -169,9 +174,14 @@ def export_data(self, directory, filename): 'thetas': self.estimator.theta_.tolist() } encoder.FLOAT_REPR = lambda o: self.repr(o) + json_data = json.dumps(model_data, sort_keys=True) + if with_md5_hash: + import hashlib + json_hash = hashlib.md5(json_data).hexdigest() + filename = filename.split('.json')[0] + '_' + json_hash + '.json' path = os.path.join(directory, filename) with open(path, 'w') as fp: - json.dump(model_data, fp) + fp.write(json_data) def create_method(self): """ diff --git a/sklearn_porter/estimator/classifier/KNeighborsClassifier/__init__.py b/sklearn_porter/estimator/classifier/KNeighborsClassifier/__init__.py index 9778dac7..53e9a548 100644 --- a/sklearn_porter/estimator/classifier/KNeighborsClassifier/__init__.py +++ b/sklearn_porter/estimator/classifier/KNeighborsClassifier/__init__.py @@ -62,7 +62,7 @@ def __init__(self, estimator, target_language='java', def export(self, class_name, method_name, export_data=False, export_dir='.', export_filename='data.json', - **kwargs): + export_append_checksum=False, **kwargs): """ Port a trained estimator to the syntax of a chosen programming language. @@ -78,6 +78,8 @@ def export(self, class_name, method_name, The directory where the model data should be saved. :param export_filename : string The filename of the exported model data. + :param export_append_checksum : bool + Whether to append the checksum to the filename or not. Returns ------- @@ -110,12 +112,13 @@ def export(self, class_name, method_name, if self.target_method == 'predict': # Exported: if export_data and os.path.isdir(export_dir): - self.export_data(export_dir, export_filename) + self.export_data(export_dir, export_filename, + export_append_checksum) return self.predict('exported') # Separated: return self.predict('separated') - def export_data(self, directory, filename): + def export_data(self, directory, filename, with_md5_hash=False): """ Save model data in a JSON file. @@ -125,6 +128,8 @@ def export_data(self, directory, filename): The directory. :param filename : string The filename. + :param with_md5_hash : bool + Whether to append the checksum to the filename or not. """ model_data = { 'X': self.estimator._fit_X.tolist(), # pylint: disable=W0212 @@ -134,9 +139,14 @@ def export_data(self, directory, filename): 'power': self.power_param } encoder.FLOAT_REPR = lambda o: self.repr(o) + json_data = json.dumps(model_data, sort_keys=True) + if with_md5_hash: + import hashlib + json_hash = hashlib.md5(json_data).hexdigest() + filename = filename.split('.json')[0] + '_' + json_hash + '.json' path = os.path.join(directory, filename) with open(path, 'w') as fp: - json.dump(model_data, fp) + fp.write(json_data) def predict(self, temp_type): """ diff --git a/sklearn_porter/estimator/classifier/LinearSVC/__init__.py b/sklearn_porter/estimator/classifier/LinearSVC/__init__.py index 54692e18..d553806f 100644 --- a/sklearn_porter/estimator/classifier/LinearSVC/__init__.py +++ b/sklearn_porter/estimator/classifier/LinearSVC/__init__.py @@ -91,7 +91,7 @@ def __init__(self, estimator, target_language='java', def export(self, class_name, method_name, export_data=False, export_dir='.', export_filename='data.json', - **kwargs): + export_append_checksum=False, **kwargs): """ Port a trained estimator to the syntax of a chosen programming language. @@ -107,6 +107,8 @@ def export(self, class_name, method_name, The directory where the model data should be saved. :param export_filename : string The filename of the exported model data. + :param export_append_checksum : bool + Whether to append the checksum to the filename or not. Returns ------- @@ -166,7 +168,8 @@ def export(self, class_name, method_name, if self.target_method == 'predict': # Exported: if export_data and os.path.isdir(export_dir): - self.export_data(export_dir, export_filename) + self.export_data(export_dir, export_filename, + export_append_checksum) return self.predict('exported') # Separated: return self.predict('separated') @@ -195,7 +198,7 @@ def predict(self, temp_type): self.method = self.create_method() return self.create_class() - def export_data(self, directory, filename): + def export_data(self, directory, filename, with_md5_hash=False): """ Save model data in a JSON file. @@ -205,6 +208,8 @@ def export_data(self, directory, filename): The directory. :param filename : string The filename. + :param with_md5_hash : bool + Whether to append the checksum to the filename or not. """ est = self.estimator coefs = est.coef_[0] if self.is_binary else est.coef_ @@ -214,9 +219,14 @@ def export_data(self, directory, filename): 'intercepts': inters.tolist(), } encoder.FLOAT_REPR = lambda o: self.repr(o) + json_data = json.dumps(model_data, sort_keys=True) + if with_md5_hash: + import hashlib + json_hash = hashlib.md5(json_data).hexdigest() + filename = filename.split('.json')[0] + '_' + json_hash + '.json' path = os.path.join(directory, filename) with open(path, 'w') as fp: - json.dump(model_data, fp) + fp.write(json_data) def create_method(self): """ diff --git a/sklearn_porter/estimator/classifier/MLPClassifier/__init__.py b/sklearn_porter/estimator/classifier/MLPClassifier/__init__.py index 7e748b3f..96e7ecbd 100644 --- a/sklearn_porter/estimator/classifier/MLPClassifier/__init__.py +++ b/sklearn_porter/estimator/classifier/MLPClassifier/__init__.py @@ -87,7 +87,7 @@ def output_activation_functions(self): def export(self, class_name, method_name, export_data=False, export_dir='.', export_filename='data.json', - **kwargs): + export_append_checksum=False, **kwargs): """ Port a trained estimator to the syntax of a chosen programming language. @@ -103,6 +103,8 @@ def export(self, class_name, method_name, The directory where the model data should be saved. :param export_filename : string The filename of the exported model data. + :param export_append_checksum : bool + Whether to append the checksum to the filename or not. Returns ------- @@ -146,7 +148,8 @@ def export(self, class_name, method_name, if self.target_method == 'predict': # Exported: if export_data and os.path.isdir(export_dir): - self.export_data(export_dir, export_filename) + self.export_data(export_dir, export_filename, + export_append_checksum) return self.predict('exported') # Separated: return self.predict('separated') @@ -214,7 +217,7 @@ def predict(self, temp_type): layers=layers, file_name=file_name) - def export_data(self, directory, filename): + def export_data(self, directory, filename, with_md5_hash=False): """ Save model data in a JSON file. @@ -224,6 +227,8 @@ def export_data(self, directory, filename): The directory. :param filename : string The filename. + :param with_md5_hash : bool + Whether to append the checksum to the filename or not. """ model_data = { 'layers': [int(l) for l in list(self._get_activations())], @@ -233,9 +238,14 @@ def export_data(self, directory, filename): 'output_activation': self.output_activation } encoder.FLOAT_REPR = lambda o: self.repr(o) + json_data = json.dumps(model_data, sort_keys=True) + if with_md5_hash: + import hashlib + json_hash = hashlib.md5(json_data).hexdigest() + filename = filename.split('.json')[0] + '_' + json_hash + '.json' path = os.path.join(directory, filename) with open(path, 'w') as fp: - json.dump(model_data, fp) + fp.write(json_data) def _get_intercepts(self): """ diff --git a/sklearn_porter/estimator/classifier/RandomForestClassifier/__init__.py b/sklearn_porter/estimator/classifier/RandomForestClassifier/__init__.py index 35cf2edf..f5c61b43 100644 --- a/sklearn_porter/estimator/classifier/RandomForestClassifier/__init__.py +++ b/sklearn_porter/estimator/classifier/RandomForestClassifier/__init__.py @@ -96,7 +96,7 @@ def __init__(self, estimator, target_language='java', def export(self, class_name, method_name, export_data=False, export_dir='.', export_filename='data.json', - embed_data=True, **kwargs): + export_append_checksum=False, embed_data=True, **kwargs): """ Port a trained estimator to the syntax of a chosen programming language. @@ -112,6 +112,8 @@ def export(self, class_name, method_name, The directory where the model data should be saved. :param export_filename : string The filename of the exported model data. + :param export_append_checksum : bool + Whether to append the checksum to the filename or not. :param embed_data : bool Whether the model data should be embedded in the template or not. """ @@ -131,7 +133,8 @@ def export(self, class_name, method_name, if self.target_method == 'predict': # Exported: if export_data and os.path.isdir(export_dir): - self.export_data(export_dir, export_filename) + self.export_data(export_dir, export_filename, + export_append_checksum) return self.predict('exported') # Embedded: return self.predict('embedded') @@ -161,7 +164,7 @@ def predict(self, temp_type): method = self.create_method_embedded() return self.create_class_embedded(method) - def export_data(self, directory, filename): + def export_data(self, directory, filename, with_md5_hash=False): """ Save model data in a JSON file. @@ -171,6 +174,8 @@ def export_data(self, directory, filename): The directory. :param filename : string The filename. + :param with_md5_hash : bool + Whether to append the checksum to the filename or not. """ model_data = [] for est in self.estimators: @@ -182,9 +187,14 @@ def export_data(self, directory, filename): 'indices': est.tree_.feature.tolist() }) encoder.FLOAT_REPR = lambda o: self.repr(o) + json_data = json.dumps(model_data, sort_keys=True) + if with_md5_hash: + import hashlib + json_hash = hashlib.md5(json_data).hexdigest() + filename = filename.split('.json')[0] + '_' + json_hash + '.json' path = os.path.join(directory, filename) with open(path, 'w') as fp: - json.dump(model_data, fp) + fp.write(json_data) def create_branches(self, left_nodes, right_nodes, threshold, value, features, node, depth): diff --git a/sklearn_porter/estimator/classifier/SVC/__init__.py b/sklearn_porter/estimator/classifier/SVC/__init__.py index 9740d738..8bf90e87 100644 --- a/sklearn_porter/estimator/classifier/SVC/__init__.py +++ b/sklearn_porter/estimator/classifier/SVC/__init__.py @@ -79,7 +79,7 @@ def __init__(self, estimator, target_language='java', def export(self, class_name, method_name, export_data=False, export_dir='.', export_filename='data.json', - **kwargs): + export_append_checksum=False, **kwargs): """ Port a trained estimator to the syntax of a chosen programming language. @@ -95,6 +95,8 @@ def export(self, class_name, method_name, The directory where the model data should be saved. :param export_filename : string The filename of the exported model data. + :param export_append_checksum : bool + Whether to append the checksum to the filename or not. Returns ------- @@ -186,7 +188,8 @@ def export(self, class_name, method_name, if self.target_method == 'predict': # Exported: if export_data and os.path.isdir(export_dir): - self.export_data(export_dir, export_filename) + self.export_data(export_dir, export_filename, + export_append_checksum) return self.predict('exported') # Separated: return self.predict('separated') @@ -214,7 +217,7 @@ def predict(self, temp_type): self.method = self.create_method() return self.create_class() - def export_data(self, directory, filename): + def export_data(self, directory, filename, with_md5_hash=False): """ Save model data in a JSON file. @@ -224,6 +227,8 @@ def export_data(self, directory, filename): The directory. :param filename : string The filename. + :param with_md5_hash : bool + Whether to append the checksum to the filename or not. """ model_data = { 'vectors': self.estimator.support_vectors_.tolist(), @@ -238,9 +243,14 @@ def export_data(self, directory, filename): 'nRows': int(self.n_svs_rows) } encoder.FLOAT_REPR = lambda o: self.repr(o) + json_data = json.dumps(model_data, sort_keys=True) + if with_md5_hash: + import hashlib + json_hash = hashlib.md5(json_data).hexdigest() + filename = filename.split('.json')[0] + '_' + json_hash + '.json' path = os.path.join(directory, filename) with open(path, 'w') as fp: - json.dump(model_data, fp) + fp.write(json_data) def create_method(self): """