biocore · mortonjt · Jan 8, 2020 · Dec 13, 2019 · Dec 13, 2019 · Dec 13, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,8 @@
 # songbird changelog
 
 ## Version 1.0.2-dev
+Added ability to set random seed for CLI and sets fixed random seeds for qiime2 [#101](https://github.com/biocore/songbird/pull/101)
+
 Correcting matching between metadata and biom table and clarifying the min-feature-count parameter [#99](https://github.com/biocore/songbird/pull/99)
 
 Added Tensorboard's HParams functionality to standalone [#95](https://github.com/biocore/songbird/pull/95)

diff --git a/scripts/songbird b/scripts/songbird
@@ -99,6 +99,13 @@ def songbird():
     show_default=True,
     help=DESCS["summary-dir"],
 )
+@click.option(
+    "--random-seed",
+    default=DEFAULTS["random-seed"],
+    show_default=True,
+    help=DESCS["random-seed"],
+    type=int,
+)
 def multinomial(
     input_biom,
     metadata_file,
@@ -115,6 +122,7 @@ def multinomial(
     checkpoint_interval,
     summary_interval,
     summary_dir,
+    random_seed,
 ):
     # load metadata and tables
     metadata = read_metadata(metadata_file)
@@ -139,16 +147,22 @@ def multinomial(
                'min_sample_count': min_sample_count,
                'min_feature_count': min_feature_count,
                }
+    if random_seed is not None:
+        hparams.update({
+            'random_seed': random_seed,
+        })
 
+    # split up training and testing
     trainX, testX, trainY, testY = split_training(
         dense_table,
         metadata,
         design,
         training_column,
         num_random_test_examples,
+        seed=random_seed,
     )
 
-    # split up training and testing
+    # initialize and train the model
     model = MultRegression(
         learning_rate=learning_rate,
         clipnorm=clipnorm,
@@ -157,6 +171,10 @@ def multinomial(
         save_path=summary_dir,
     )
     with tf.Graph().as_default(), tf.Session() as session:
+        # set the tf random seed
+        if random_seed is not None:
+            tf.set_random_seed(random_seed)
+
         model(session, trainX, trainY, testX, testY)
 
         model.fit(

diff --git a/scripts/test_songbird_cli.py b/scripts/test_songbird_cli.py
@@ -22,7 +22,7 @@ def setUp(self) -> None:
     def tearDown(self) -> None:
         shutil.rmtree(self.path)
 
-    def test_cli(self):
+    def test_cli_no_seed_set(self):
         runner = CliRunner()
         test_args = ['--input-biom', 'data/redsea/redsea.biom',
                      '--metadata-file', 'data/redsea/redsea_metadata.txt',
@@ -42,6 +42,50 @@ def test_cli(self):
             error = Exception('Command failed with non-zero exit code')
             raise error .with_traceback(ex.__traceback__)
 
+    def test_cli_set_set_seed_int(self):
+        runner = CliRunner()
+        test_args = ['--input-biom', 'data/redsea/redsea.biom',
+                     '--metadata-file', 'data/redsea/redsea_metadata.txt',
+                     '--formula',
+                     'Depth+Temperature+Salinity+Oxygen+Fluorescence'
+                     '+Nitrate',
+                     '--epochs', '100',
+                     '--differential-prior', '0.5',
+                     '--summary-interval', '1',
+                     '--summary-dir', self.path,
+                     '--random-seed', 42,
+                     ]
+
+        result = runner.invoke(songbird.multinomial, test_args)
+        try:
+            self.assertEqual(0, result.exit_code)
+        except AssertionError:
+            ex = result.exception
+            error = Exception('Command failed with non-zero exit code')
+            raise error .with_traceback(ex.__traceback__)
+
+    def test_cli_set_random_seed_None(self):
+        runner = CliRunner()
+        test_args = ['--input-biom', 'data/redsea/redsea.biom',
+                     '--metadata-file', 'data/redsea/redsea_metadata.txt',
+                     '--formula',
+                     'Depth+Temperature+Salinity+Oxygen+Fluorescence'
+                     '+Nitrate',
+                     '--epochs', '100',
+                     '--differential-prior', '0.5',
+                     '--summary-interval', '1',
+                     '--summary-dir', self.path,
+                     '--random-seed', None,
+                     ]
+
+        result = runner.invoke(songbird.multinomial, test_args)
+        try:
+            self.assertEqual(0, result.exit_code)
+        except AssertionError:
+            ex = result.exception
+            error = Exception('Command failed with non-zero exit code')
+            raise error .with_traceback(ex.__traceback__)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/songbird/parameter_info.py b/songbird/parameter_info.py
@@ -45,6 +45,10 @@
         'to summaries that can be loaded into Tensorboard and '
         'checkpoints for recovering parameters during runtime.'
     ),
+    "random-seed": (
+        'The number to used to receive consistent results for the random  '
+        'processes in the fitting procedure.'
+    ),
 }
 
 DEFAULTS = {
@@ -60,4 +64,5 @@
     "checkpoint-interval": 3600,
     "summary-interval": 10,
     "summary-dir": "summarydir",
+    "random-seed": 0,
-    "random-seed": 0,
+    "random-seed": None,
-    "random-seed": 0,
+    "random-seed": None,
 }
diff --git a/songbird/q2/_method.py b/songbird/q2/_method.py
@@ -43,14 +43,16 @@ def multinomial(table: biom.Table,
     # split up training and testing
     trainX, testX, trainY, testY = split_training(
         dense_table, metadata, design,
-        training_column, num_random_test_examples
+        training_column, num_random_test_examples,
+        seed=0,
     )
 
     model = MultRegression(learning_rate=learning_rate, clipnorm=clipnorm,
                            beta_mean=differential_prior,
                            batch_size=batch_size,
                            save_path=None)
     with tf.Graph().as_default(), tf.Session() as session:
+        tf.set_random_seed(0)
-        tf.set_random_seed(0)
+        tf.set_random_seed(seed)
-        tf.set_random_seed(0)
+        tf.set_random_seed(seed)
         model(session, trainX, trainY, testX, testY)
 
         loss, cv, its = model.fit(

diff --git a/songbird/q2/tests/test_method.py b/songbird/q2/tests/test_method.py
@@ -49,6 +49,30 @@ def test_fit(self):
         npt.assert_allclose(exp_beta, res_beta.T, atol=0.6, rtol=0.6)
         self.assertGreater(len(res_stats.to_dataframe().index), 1)
 
+    def test_fit_consistency(self):
+        md = self.md
+
+        md.name = 'sampleid'
+        md = qiime2.Metadata(md)
+
+        res_beta1, res_stats1, res_biplot1 = multinomial(
+            table=self.table, metadata=md,
+            min_sample_count=0, min_feature_count=0,
+            formula="X", epochs=1000)
+
+        res_beta2, res_stats2, res_biplot2 = multinomial(
+            table=self.table, metadata=md,
+            min_sample_count=0, min_feature_count=0,
+            formula="X", epochs=1000)
+
+        npt.assert_array_equal(res_beta1, res_beta2)
+        end_res_stats1 = res_stats1.to_dataframe().iloc[-1]
+        end_res_stats2 = res_stats2.to_dataframe().iloc[-1]
+        npt.assert_array_equal(end_res_stats1, end_res_stats2)
+        npt.assert_array_equal(res_biplot1.eigvals, res_biplot2.eigvals)
+        npt.assert_array_equal(res_biplot1.samples, res_biplot2.samples)
+        npt.assert_array_equal(res_biplot1.features, res_biplot2.features)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/songbird/util.py b/songbird/util.py
@@ -176,9 +176,10 @@ def design_filter(val, id_, md):
 
 
 def split_training(dense_table, metadata, design, training_column=None,
-                   num_random_test_examples=10):
+                   num_random_test_examples=10, seed=None):
 
     if training_column is None:
+        np.random.seed(seed)
         idx = np.random.random(design.shape[0])
         i = np.argsort(idx)[num_random_test_examples]