diff --git a/design_bench/__init__.py b/design_bench/__init__.py index fd36d32..1a27ff6 100644 --- a/design_bench/__init__.py +++ b/design_bench/__init__.py @@ -1,7 +1,10 @@ from design_bench.registration import registry, register, make, spec from design_bench.oracles.sklearn.kernels import ProteinKernel from design_bench.oracles.sklearn.kernels import DefaultSequenceKernel +from design_bench.oracles.feature_extractors.\ + morgan_fingerprint_features import MorganFingerprintFeatures from sklearn.gaussian_process.kernels import ConstantKernel, RBF +import numpy as np register('ToyDiscrete-Exact-v0', @@ -470,7 +473,7 @@ register('TFBind10-Exact-v0', 'design_bench.datasets.discrete.tf_bind_10_dataset:TFBind10Dataset', - 'design_bench.oracles.exact:TFBind10Oracle', + 'design_bench.oracles.exact.tf_bind_10_oracle:TFBind10Oracle', # keyword arguments for building the dataset dataset_kwargs=dict( @@ -731,6 +734,43 @@ is_absolute=None))) +register('ChEMBLMorganFingerprint-GP-v0', + 'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset', + 'design_bench.oracles.sklearn:GaussianProcessOracle', + + # keyword arguments for building the dataset + dataset_kwargs=dict( + max_samples=None, + distribution=None, + max_percentile=40, + min_percentile=0, + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), + + # keyword arguments for building GP oracle + oracle_kwargs=dict( + noise_std=0.0, + max_samples=2000, + distribution=None, + max_percentile=53, + min_percentile=0, + + # process the data into morgan fingerprints + feature_extractor=MorganFingerprintFeatures(dtype=np.int32), + + # parameters used for building the model + model_kwargs=dict(kernel=DefaultSequenceKernel(size=2), + alpha=0.01), + + # parameters used for building the validation set + split_kwargs=dict(val_fraction=0.5, + subset=None, + shard_size=50000, + to_disk=True, + disk_target="chembl-Potency-CHEMBL1794345/split", + is_absolute=False))) + + register('ChEMBL-GP-v0', 'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset', 'design_bench.oracles.sklearn:GaussianProcessOracle', @@ -741,8 +781,8 @@ distribution=None, max_percentile=40, min_percentile=0, - assay_chembl_id="CHEMBL1964047", - standard_type="GI50"), + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), # keyword arguments for building GP oracle oracle_kwargs=dict( @@ -759,9 +799,48 @@ # parameters used for building the validation set split_kwargs=dict(val_fraction=0.5, subset=None, - shard_size=5000, + shard_size=50000, + to_disk=True, + disk_target="chembl-Potency-CHEMBL1794345/split", + is_absolute=False))) + + +register('ChEMBLMorganFingerprint-RandomForest-v0', + 'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset', + 'design_bench.oracles.sklearn:RandomForestOracle', + + # keyword arguments for building the dataset + dataset_kwargs=dict( + max_samples=None, + distribution=None, + max_percentile=40, + min_percentile=0, + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), + + # keyword arguments for building RandomForest oracle + oracle_kwargs=dict( + noise_std=0.0, + max_samples=2000, + distribution=None, + max_percentile=53, + min_percentile=0, + + # process the data into morgan fingerprints + override_input_spec=True, + feature_extractor=MorganFingerprintFeatures(dtype=np.float32), + + # parameters used for building the model + model_kwargs=dict(n_estimators=100, + max_depth=100, + max_features="auto"), + + # parameters used for building the validation set + split_kwargs=dict(val_fraction=0.5, + subset=None, + shard_size=50000, to_disk=True, - disk_target="chembl-GI50-CHEMBL1964047/split", + disk_target="chembl-Potency-CHEMBL1794345/split", is_absolute=False))) @@ -775,8 +854,8 @@ distribution=None, max_percentile=40, min_percentile=0, - assay_chembl_id="CHEMBL1964047", - standard_type="GI50"), + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), # keyword arguments for building RandomForest oracle oracle_kwargs=dict( @@ -794,13 +873,13 @@ # parameters used for building the validation set split_kwargs=dict(val_fraction=0.5, subset=None, - shard_size=5000, + shard_size=50000, to_disk=True, - disk_target="chembl-GI50-CHEMBL1964047/split", + disk_target="chembl-Potency-CHEMBL1794345/split", is_absolute=False))) -register('ChEMBL-FullyConnected-v0', +register('ChEMBLMorganFingerprint-FullyConnected-v0', 'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset', 'design_bench.oracles.tensorflow:FullyConnectedOracle', @@ -810,8 +889,8 @@ distribution=None, max_percentile=40, min_percentile=0, - assay_chembl_id="CHEMBL1964047", - standard_type="GI50"), + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), # keyword arguments for training FullyConnected oracle oracle_kwargs=dict( @@ -821,21 +900,103 @@ max_percentile=53, min_percentile=0, + # process the data into morgan fingerprints + feature_extractor=MorganFingerprintFeatures(dtype=np.float32), + # parameters used for building the model - model_kwargs=dict(embedding_size=64, + model_kwargs=dict(embedding_size=32, hidden_size=512, activation='relu', num_layers=2, epochs=5, shuffle_buffer=5000, + learning_rate=0.0001), + + # parameters used for building the validation set + split_kwargs=dict(val_fraction=0.1, + subset=None, + shard_size=50000, + to_disk=True, + disk_target="chembl-Potency-CHEMBL1794345/split", + is_absolute=False))) + + +register('ChEMBL-FullyConnected-v0', + 'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset', + 'design_bench.oracles.tensorflow:FullyConnectedOracle', + + # keyword arguments for building the dataset + dataset_kwargs=dict( + max_samples=None, + distribution=None, + max_percentile=40, + min_percentile=0, + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), + + # keyword arguments for training FullyConnected oracle + oracle_kwargs=dict( + noise_std=0.0, + max_samples=None, + distribution=None, + max_percentile=53, + min_percentile=0, + + # parameters used for building the model + model_kwargs=dict(embedding_size=32, + hidden_size=512, + activation='relu', + num_layers=2, + epochs=20, + shuffle_buffer=5000, + learning_rate=0.0001), + + # parameters used for building the validation set + split_kwargs=dict(val_fraction=0.1, + subset=None, + shard_size=50000, + to_disk=True, + disk_target="chembl-Potency-CHEMBL1794345/split", + is_absolute=False))) + + +register('ChEMBLMorganFingerprint-LSTM-v0', + 'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset', + 'design_bench.oracles.tensorflow:LSTMOracle', + + # keyword arguments for building the dataset + dataset_kwargs=dict( + max_samples=None, + distribution=None, + max_percentile=40, + min_percentile=0, + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), + + # keyword arguments for training LSTM oracle + oracle_kwargs=dict( + noise_std=0.0, + max_samples=None, + distribution=None, + max_percentile=53, + min_percentile=0, + + # process the data into morgan fingerprints + feature_extractor=MorganFingerprintFeatures(dtype=np.int32), + + # parameters used for building the model + model_kwargs=dict(hidden_size=64, + num_layers=2, + epochs=20, + shuffle_buffer=5000, learning_rate=0.001), # parameters used for building the validation set split_kwargs=dict(val_fraction=0.1, subset=None, - shard_size=5000, + shard_size=50000, to_disk=True, - disk_target="chembl-GI50-CHEMBL1964047/split", + disk_target="chembl-Potency-CHEMBL1794345/split", is_absolute=False))) @@ -849,8 +1010,8 @@ distribution=None, max_percentile=40, min_percentile=0, - assay_chembl_id="CHEMBL1964047", - standard_type="GI50"), + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), # keyword arguments for training LSTM oracle oracle_kwargs=dict( @@ -870,9 +1031,51 @@ # parameters used for building the validation set split_kwargs=dict(val_fraction=0.1, subset=None, - shard_size=5000, + shard_size=50000, to_disk=True, - disk_target="chembl-GI50-CHEMBL1964047/split", + disk_target="chembl-Potency-CHEMBL1794345/split", + is_absolute=False))) + + +register('ChEMBLMorganFingerprint-ResNet-v0', + 'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset', + 'design_bench.oracles.tensorflow:ResNetOracle', + + # keyword arguments for building the dataset + dataset_kwargs=dict( + max_samples=None, + distribution=None, + max_percentile=40, + min_percentile=0, + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), + + # keyword arguments for training ResNet oracle + oracle_kwargs=dict( + noise_std=0.0, + max_samples=None, + distribution=None, + max_percentile=53, + min_percentile=0, + + # process the data into morgan fingerprints + feature_extractor=MorganFingerprintFeatures(dtype=np.int32), + + # parameters used for building the model + model_kwargs=dict(hidden_size=64, + activation='relu', + kernel_size=3, + num_blocks=4, + epochs=20, + shuffle_buffer=5000, + learning_rate=0.001), + + # parameters used for building the validation set + split_kwargs=dict(val_fraction=0.1, + subset=None, + shard_size=50000, + to_disk=True, + disk_target="chembl-Potency-CHEMBL1794345/split", is_absolute=False))) @@ -886,8 +1089,8 @@ distribution=None, max_percentile=40, min_percentile=0, - assay_chembl_id="CHEMBL1964047", - standard_type="GI50"), + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), # keyword arguments for training ResNet oracle oracle_kwargs=dict( @@ -909,9 +1112,54 @@ # parameters used for building the validation set split_kwargs=dict(val_fraction=0.1, subset=None, - shard_size=5000, + shard_size=50000, to_disk=True, - disk_target="chembl-GI50-CHEMBL1964047/split", + disk_target="chembl-Potency-CHEMBL1794345/split", + is_absolute=False))) + + +register('ChEMBLMorganFingerprint-Transformer-v0', + 'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset', + 'design_bench.oracles.tensorflow:TransformerOracle', + + # keyword arguments for building the dataset + dataset_kwargs=dict( + max_samples=None, + distribution=None, + max_percentile=40, + min_percentile=0, + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), + + # keyword arguments for training Transformer oracle + oracle_kwargs=dict( + noise_std=0.0, + internal_batch_size=32, + max_samples=None, + distribution=None, + max_percentile=53, + min_percentile=0, + + # process the data into morgan fingerprints + feature_extractor=MorganFingerprintFeatures(dtype=np.int32), + + # parameters used for building the model + model_kwargs=dict(hidden_size=128, + feed_forward_size=512, + activation='relu', + num_heads=4, + num_blocks=4, + epochs=20, + shuffle_buffer=20000, + learning_rate=0.0001, + dropout_rate=0.2), + + # parameters used for building the validation set + split_kwargs=dict(val_fraction=0.1, + subset=None, + shard_size=50000, + to_disk=True, + disk_target="chembl-Potency-CHEMBL1794345/split", is_absolute=False))) @@ -925,8 +1173,8 @@ distribution=None, max_percentile=40, min_percentile=0, - assay_chembl_id="CHEMBL1964047", - standard_type="GI50"), + assay_chembl_id="CHEMBL1794345", + standard_type="Potency"), # keyword arguments for training Transformer oracle oracle_kwargs=dict( @@ -951,9 +1199,9 @@ # parameters used for building the validation set split_kwargs=dict(val_fraction=0.1, subset=None, - shard_size=5000, + shard_size=50000, to_disk=True, - disk_target="chembl-GI50-CHEMBL1964047/split", + disk_target="chembl-Potency-CHEMBL1794345/split", is_absolute=False))) diff --git a/design_bench/datasets/discrete/chembl_dataset.py b/design_bench/datasets/discrete/chembl_dataset.py index 27c73dd..0627fcd 100644 --- a/design_bench/datasets/discrete/chembl_dataset.py +++ b/design_bench/datasets/discrete/chembl_dataset.py @@ -2,7 +2,404 @@ from design_bench.disk_resource import DiskResource, SERVER_URL -CHEMBL_FILES = ['chembl-GI50-CHEMBL1963844/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963854/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963860/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963866/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963868/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963874/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963876/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963880/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963882/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963885/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963895/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963900/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963901/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963903/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963911/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963921/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963922/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963929/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963945/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963953/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963960/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963961/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963976/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963981/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963985/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963989/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963990/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963991/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963994/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964004/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964006/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964007/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964009/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964012/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964017/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964018/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964021/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964025/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964030/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964034/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964037/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964040/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964043/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964045/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964047/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964048/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964049/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964059/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964063/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964065/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964066/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964072/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964075/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964077/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964085/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964087/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964088/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964091/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964092/chembl-x-0.npy', 'chembl-GI50-CHEMBL1964099/chembl-x-0.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-0.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-0.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-0.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-0.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-0.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-0.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-0.npy', 'chembl-Inhibition-CHEMBL4296187/chembl-x-0.npy', 'chembl-Inhibition-CHEMBL4296188/chembl-x-0.npy', 'chembl-Potency-CHEMBL1613769/chembl-x-0.npy', 'chembl-Potency-CHEMBL1613836/chembl-x-0.npy', 'chembl-Potency-CHEMBL1613838/chembl-x-0.npy', 'chembl-Potency-CHEMBL1613842/chembl-x-0.npy', 'chembl-Potency-CHEMBL1613914/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614038/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614079/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614087/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614146/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614161/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614166/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614174/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614211/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614236/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614249/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614250/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614257/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614421/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614441/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614502/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-0.npy', 'chembl-Potency-CHEMBL1614544/chembl-x-0.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-0.npy', 'chembl-Potency-CHEMBL1737991/chembl-x-0.npy', 'chembl-Potency-CHEMBL1738132/chembl-x-0.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-0.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-0.npy', 'chembl-Potency-CHEMBL1738317/chembl-x-0.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-0.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794311/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794359/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794424/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794461/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-0.npy', 'chembl-Potency-CHEMBL1794585/chembl-x-0.npy', 'chembl-Potency-CHEMBL2114713/chembl-x-0.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-0.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-0.npy', 'chembl-Potency-CHEMBL2114784/chembl-x-0.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-0.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-0.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-0.npy', 'chembl-Potency-CHEMBL2114861/chembl-x-0.npy', 'chembl-Potency-CHEMBL2114913/chembl-x-0.npy', 'chembl-Potency-CHEMBL2354211/chembl-x-0.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-0.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-0.npy', 'chembl-Potency-CHEMBL2354287/chembl-x-0.npy', 'chembl-Potency-CHEMBL2354311/chembl-x-0.npy', 'chembl-Potency-CHEMBL3214953/chembl-x-0.npy', 'chembl-Potency-CHEMBL3215017/chembl-x-0.npy', 'chembl-Potency-CHEMBL3215106/chembl-x-0.npy', 'chembl-Potency-CHEMBL3215181/chembl-x-0.npy', 'chembl-Potency-CHEMBL3215270/chembl-x-0.npy', 'chembl-Potency-CHEMBL3215278/chembl-x-0.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-0.npy', 'chembl-GI50-CHEMBL1963844/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963854/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963860/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963866/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963868/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963874/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963876/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963880/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963882/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963885/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963895/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963900/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963901/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963903/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963911/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963921/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963922/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963929/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963945/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963953/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963960/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963961/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963976/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963981/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963985/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963989/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963990/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963991/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963994/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964004/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964006/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964007/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964009/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964012/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964017/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964018/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964021/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964025/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964030/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964034/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964037/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964040/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964043/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964045/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964047/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964048/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964049/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964059/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964063/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964065/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964066/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964072/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964075/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964077/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964085/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964087/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964088/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964091/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964092/chembl-x-1.npy', 'chembl-GI50-CHEMBL1964099/chembl-x-1.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-1.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-1.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-1.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-1.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-1.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-1.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-1.npy', 'chembl-Inhibition-CHEMBL4296187/chembl-x-1.npy', 'chembl-Inhibition-CHEMBL4296188/chembl-x-1.npy', 'chembl-Potency-CHEMBL1613769/chembl-x-1.npy', 'chembl-Potency-CHEMBL1613836/chembl-x-1.npy', 'chembl-Potency-CHEMBL1613838/chembl-x-1.npy', 'chembl-Potency-CHEMBL1613842/chembl-x-1.npy', 'chembl-Potency-CHEMBL1613914/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614038/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614079/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614087/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614146/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614161/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614166/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614174/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614211/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614236/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614249/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614250/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614257/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614421/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614441/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614502/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-1.npy', 'chembl-Potency-CHEMBL1614544/chembl-x-1.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-1.npy', 'chembl-Potency-CHEMBL1737991/chembl-x-1.npy', 'chembl-Potency-CHEMBL1738132/chembl-x-1.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-1.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-1.npy', 'chembl-Potency-CHEMBL1738317/chembl-x-1.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-1.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794311/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794359/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794424/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794461/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-1.npy', 'chembl-Potency-CHEMBL1794585/chembl-x-1.npy', 'chembl-Potency-CHEMBL2114713/chembl-x-1.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-1.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-1.npy', 'chembl-Potency-CHEMBL2114784/chembl-x-1.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-1.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-1.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-1.npy', 'chembl-Potency-CHEMBL2114861/chembl-x-1.npy', 'chembl-Potency-CHEMBL2114913/chembl-x-1.npy', 'chembl-Potency-CHEMBL2354211/chembl-x-1.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-1.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-1.npy', 'chembl-Potency-CHEMBL2354287/chembl-x-1.npy', 'chembl-Potency-CHEMBL2354311/chembl-x-1.npy', 'chembl-Potency-CHEMBL3214953/chembl-x-1.npy', 'chembl-Potency-CHEMBL3215017/chembl-x-1.npy', 'chembl-Potency-CHEMBL3215106/chembl-x-1.npy', 'chembl-Potency-CHEMBL3215181/chembl-x-1.npy', 'chembl-Potency-CHEMBL3215270/chembl-x-1.npy', 'chembl-Potency-CHEMBL3215278/chembl-x-1.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-1.npy', 'chembl-GI50-CHEMBL1963844/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963854/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963860/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963866/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963868/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963874/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963876/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963880/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963882/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963885/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963895/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963900/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963901/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963903/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963911/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963921/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963922/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963929/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963945/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963953/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963960/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963961/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963976/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963981/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963985/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963989/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963990/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963991/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963994/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964004/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964006/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964007/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964009/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964012/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964017/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964018/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964021/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964025/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964030/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964034/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964037/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964040/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964043/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964045/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964047/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964048/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964049/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964059/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964063/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964065/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964066/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964072/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964075/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964077/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964085/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964087/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964088/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964091/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964092/chembl-x-2.npy', 'chembl-GI50-CHEMBL1964099/chembl-x-2.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-2.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-2.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-2.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-2.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-2.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-2.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-2.npy', 'chembl-Inhibition-CHEMBL4296187/chembl-x-2.npy', 'chembl-Inhibition-CHEMBL4296188/chembl-x-2.npy', 'chembl-Potency-CHEMBL1613769/chembl-x-2.npy', 'chembl-Potency-CHEMBL1613836/chembl-x-2.npy', 'chembl-Potency-CHEMBL1613838/chembl-x-2.npy', 'chembl-Potency-CHEMBL1613842/chembl-x-2.npy', 'chembl-Potency-CHEMBL1613914/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614038/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614079/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614087/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614146/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614161/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614166/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614174/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614211/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614236/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614249/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614250/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614257/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614421/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614441/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614502/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-2.npy', 'chembl-Potency-CHEMBL1614544/chembl-x-2.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-2.npy', 'chembl-Potency-CHEMBL1737991/chembl-x-2.npy', 'chembl-Potency-CHEMBL1738132/chembl-x-2.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-2.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-2.npy', 'chembl-Potency-CHEMBL1738317/chembl-x-2.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-2.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794311/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794359/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794424/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794461/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-2.npy', 'chembl-Potency-CHEMBL1794585/chembl-x-2.npy', 'chembl-Potency-CHEMBL2114713/chembl-x-2.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-2.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-2.npy', 'chembl-Potency-CHEMBL2114784/chembl-x-2.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-2.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-2.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-2.npy', 'chembl-Potency-CHEMBL2114861/chembl-x-2.npy', 'chembl-Potency-CHEMBL2114913/chembl-x-2.npy', 'chembl-Potency-CHEMBL2354211/chembl-x-2.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-2.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-2.npy', 'chembl-Potency-CHEMBL2354287/chembl-x-2.npy', 'chembl-Potency-CHEMBL2354311/chembl-x-2.npy', 'chembl-Potency-CHEMBL3214953/chembl-x-2.npy', 'chembl-Potency-CHEMBL3215017/chembl-x-2.npy', 'chembl-Potency-CHEMBL3215106/chembl-x-2.npy', 'chembl-Potency-CHEMBL3215181/chembl-x-2.npy', 'chembl-Potency-CHEMBL3215270/chembl-x-2.npy', 'chembl-Potency-CHEMBL3215278/chembl-x-2.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-2.npy', 'chembl-GI50-CHEMBL1963844/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963854/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963860/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963866/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963868/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963874/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963876/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963880/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963882/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963885/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963895/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963900/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963901/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963903/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963911/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963921/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963922/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963929/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963945/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963953/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963960/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963961/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963976/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963981/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963985/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963989/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963990/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963991/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963994/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964004/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964006/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964007/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964009/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964012/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964017/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964018/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964021/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964025/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964030/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964034/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964037/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964040/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964043/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964045/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964047/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964048/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964049/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964059/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964063/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964065/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964066/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964072/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964075/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964077/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964085/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964087/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964088/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964091/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964092/chembl-x-3.npy', 'chembl-GI50-CHEMBL1964099/chembl-x-3.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-3.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-3.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-3.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-3.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-3.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-3.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-3.npy', 'chembl-Inhibition-CHEMBL4296187/chembl-x-3.npy', 'chembl-Inhibition-CHEMBL4296188/chembl-x-3.npy', 'chembl-Potency-CHEMBL1613769/chembl-x-3.npy', 'chembl-Potency-CHEMBL1613836/chembl-x-3.npy', 'chembl-Potency-CHEMBL1613838/chembl-x-3.npy', 'chembl-Potency-CHEMBL1613842/chembl-x-3.npy', 'chembl-Potency-CHEMBL1613914/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614038/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614079/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614087/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614146/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614161/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614166/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614174/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614211/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614236/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614249/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614250/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614257/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614421/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614441/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614502/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-3.npy', 'chembl-Potency-CHEMBL1614544/chembl-x-3.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-3.npy', 'chembl-Potency-CHEMBL1737991/chembl-x-3.npy', 'chembl-Potency-CHEMBL1738132/chembl-x-3.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-3.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-3.npy', 'chembl-Potency-CHEMBL1738317/chembl-x-3.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-3.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794311/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794359/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794424/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794461/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-3.npy', 'chembl-Potency-CHEMBL1794585/chembl-x-3.npy', 'chembl-Potency-CHEMBL2114713/chembl-x-3.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-3.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-3.npy', 'chembl-Potency-CHEMBL2114784/chembl-x-3.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-3.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-3.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-3.npy', 'chembl-Potency-CHEMBL2114861/chembl-x-3.npy', 'chembl-Potency-CHEMBL2114913/chembl-x-3.npy', 'chembl-Potency-CHEMBL2354211/chembl-x-3.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-3.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-3.npy', 'chembl-Potency-CHEMBL2354287/chembl-x-3.npy', 'chembl-Potency-CHEMBL2354311/chembl-x-3.npy', 'chembl-Potency-CHEMBL3214953/chembl-x-3.npy', 'chembl-Potency-CHEMBL3215017/chembl-x-3.npy', 'chembl-Potency-CHEMBL3215106/chembl-x-3.npy', 'chembl-Potency-CHEMBL3215181/chembl-x-3.npy', 'chembl-Potency-CHEMBL3215270/chembl-x-3.npy', 'chembl-Potency-CHEMBL3215278/chembl-x-3.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-3.npy', 'chembl-GI50-CHEMBL1963844/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963854/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963860/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963866/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963868/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963874/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963876/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963880/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963882/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963885/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963895/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963900/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963901/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963903/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963911/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963921/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963922/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963929/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963945/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963953/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963960/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963961/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963976/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963981/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963985/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963989/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963990/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963991/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963994/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964004/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964006/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964007/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964009/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964012/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964017/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964018/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964021/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964025/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964030/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964034/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964037/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964040/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964043/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964045/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964047/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964048/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964049/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964059/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964063/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964065/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964066/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964072/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964075/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964077/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964085/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964087/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964088/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964091/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964092/chembl-x-4.npy', 'chembl-GI50-CHEMBL1964099/chembl-x-4.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-4.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-4.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-4.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-4.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-4.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-4.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-4.npy', 'chembl-Inhibition-CHEMBL4296187/chembl-x-4.npy', 'chembl-Inhibition-CHEMBL4296188/chembl-x-4.npy', 'chembl-Potency-CHEMBL1613769/chembl-x-4.npy', 'chembl-Potency-CHEMBL1613836/chembl-x-4.npy', 'chembl-Potency-CHEMBL1613838/chembl-x-4.npy', 'chembl-Potency-CHEMBL1613842/chembl-x-4.npy', 'chembl-Potency-CHEMBL1613914/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614038/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614079/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614087/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614146/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614161/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614166/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614174/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614211/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614236/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614249/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614250/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614257/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614421/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614441/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614502/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-4.npy', 'chembl-Potency-CHEMBL1614544/chembl-x-4.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-4.npy', 'chembl-Potency-CHEMBL1737991/chembl-x-4.npy', 'chembl-Potency-CHEMBL1738132/chembl-x-4.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-4.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-4.npy', 'chembl-Potency-CHEMBL1738317/chembl-x-4.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-4.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794311/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794359/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794424/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794461/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-4.npy', 'chembl-Potency-CHEMBL1794585/chembl-x-4.npy', 'chembl-Potency-CHEMBL2114713/chembl-x-4.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-4.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-4.npy', 'chembl-Potency-CHEMBL2114784/chembl-x-4.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-4.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-4.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-4.npy', 'chembl-Potency-CHEMBL2114861/chembl-x-4.npy', 'chembl-Potency-CHEMBL2114913/chembl-x-4.npy', 'chembl-Potency-CHEMBL2354211/chembl-x-4.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-4.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-4.npy', 'chembl-Potency-CHEMBL2354287/chembl-x-4.npy', 'chembl-Potency-CHEMBL2354311/chembl-x-4.npy', 'chembl-Potency-CHEMBL3214953/chembl-x-4.npy', 'chembl-Potency-CHEMBL3215017/chembl-x-4.npy', 'chembl-Potency-CHEMBL3215106/chembl-x-4.npy', 'chembl-Potency-CHEMBL3215181/chembl-x-4.npy', 'chembl-Potency-CHEMBL3215270/chembl-x-4.npy', 'chembl-Potency-CHEMBL3215278/chembl-x-4.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-4.npy', 'chembl-GI50-CHEMBL1963844/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963854/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963860/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963866/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963868/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963874/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963876/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963880/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963882/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963885/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963895/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963900/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963901/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963903/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963911/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963921/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963922/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963929/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963945/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963953/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963960/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963961/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963976/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963981/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963985/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963989/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963990/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963991/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963994/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964004/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964006/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964007/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964009/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964012/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964017/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964018/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964021/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964025/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964030/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964034/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964037/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964040/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964043/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964045/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964047/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964048/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964049/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964059/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964063/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964065/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964066/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964072/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964075/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964077/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964085/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964087/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964088/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964091/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964092/chembl-x-5.npy', 'chembl-GI50-CHEMBL1964099/chembl-x-5.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-5.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-5.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-5.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-5.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-5.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-5.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-5.npy', 'chembl-Potency-CHEMBL1613769/chembl-x-5.npy', 'chembl-Potency-CHEMBL1613836/chembl-x-5.npy', 'chembl-Potency-CHEMBL1613842/chembl-x-5.npy', 'chembl-Potency-CHEMBL1613914/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614087/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614146/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614161/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614166/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614211/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614236/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614249/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614257/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614421/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614441/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-5.npy', 'chembl-Potency-CHEMBL1614544/chembl-x-5.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-5.npy', 'chembl-Potency-CHEMBL1737991/chembl-x-5.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-5.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-5.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-5.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794359/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794424/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-5.npy', 'chembl-Potency-CHEMBL1794585/chembl-x-5.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-5.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-5.npy', 'chembl-Potency-CHEMBL2114784/chembl-x-5.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-5.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-5.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-5.npy', 'chembl-Potency-CHEMBL2114861/chembl-x-5.npy', 'chembl-Potency-CHEMBL2114913/chembl-x-5.npy', 'chembl-Potency-CHEMBL2354211/chembl-x-5.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-5.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-5.npy', 'chembl-Potency-CHEMBL2354287/chembl-x-5.npy', 'chembl-Potency-CHEMBL2354311/chembl-x-5.npy', 'chembl-Potency-CHEMBL3215017/chembl-x-5.npy', 'chembl-Potency-CHEMBL3215106/chembl-x-5.npy', 'chembl-Potency-CHEMBL3215181/chembl-x-5.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-5.npy', 'chembl-GI50-CHEMBL1963844/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963854/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963860/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963866/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963868/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963874/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963876/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963880/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963895/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963901/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963903/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963911/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963922/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963929/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963953/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963960/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963961/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963981/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963985/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963989/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963990/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963991/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963994/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964004/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964006/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964007/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964009/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964012/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964018/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964021/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964025/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964030/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964034/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964037/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964040/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964043/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964047/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964048/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964049/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964059/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964065/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964066/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964072/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964075/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964077/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964085/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964087/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964088/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964091/chembl-x-6.npy', 'chembl-GI50-CHEMBL1964092/chembl-x-6.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-6.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-6.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-6.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-6.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-6.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-6.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-6.npy', 'chembl-Potency-CHEMBL1613836/chembl-x-6.npy', 'chembl-Potency-CHEMBL1613842/chembl-x-6.npy', 'chembl-Potency-CHEMBL1613914/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614087/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614146/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614166/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614211/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614236/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614249/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614257/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614421/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614441/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-6.npy', 'chembl-Potency-CHEMBL1614544/chembl-x-6.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-6.npy', 'chembl-Potency-CHEMBL1737991/chembl-x-6.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-6.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-6.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-6.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-6.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-6.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-6.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-6.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-6.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-6.npy', 'chembl-Potency-CHEMBL1794424/chembl-x-6.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-6.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-6.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-6.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-6.npy', 'chembl-Potency-CHEMBL1794585/chembl-x-6.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-6.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-6.npy', 'chembl-Potency-CHEMBL2114784/chembl-x-6.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-6.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-6.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-6.npy', 'chembl-Potency-CHEMBL2354211/chembl-x-6.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-6.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-6.npy', 'chembl-Potency-CHEMBL2354287/chembl-x-6.npy', 'chembl-Potency-CHEMBL2354311/chembl-x-6.npy', 'chembl-Potency-CHEMBL3215106/chembl-x-6.npy', 'chembl-Potency-CHEMBL3215181/chembl-x-6.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-6.npy', 'chembl-GI50-CHEMBL1963844/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963854/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963860/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963866/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963868/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963874/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963876/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963880/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963895/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963901/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963903/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963911/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963922/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963929/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963953/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963960/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963961/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963981/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963985/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963989/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963990/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963991/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963994/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964004/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964006/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964007/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964009/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964012/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964018/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964021/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964025/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964030/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964037/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964040/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964043/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964047/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964048/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964049/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964059/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964066/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964072/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964075/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964077/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964085/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964087/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964088/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964091/chembl-x-7.npy', 'chembl-GI50-CHEMBL1964092/chembl-x-7.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-7.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-7.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-7.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-7.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-7.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-7.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-7.npy', 'chembl-Potency-CHEMBL1613914/chembl-x-7.npy', 'chembl-Potency-CHEMBL1614087/chembl-x-7.npy', 'chembl-Potency-CHEMBL1614211/chembl-x-7.npy', 'chembl-Potency-CHEMBL1614249/chembl-x-7.npy', 'chembl-Potency-CHEMBL1614257/chembl-x-7.npy', 'chembl-Potency-CHEMBL1614421/chembl-x-7.npy', 'chembl-Potency-CHEMBL1614441/chembl-x-7.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-7.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-7.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-7.npy', 'chembl-Potency-CHEMBL1614544/chembl-x-7.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-7.npy', 'chembl-Potency-CHEMBL1737991/chembl-x-7.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-7.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-7.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-7.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-7.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-7.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-7.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-7.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-7.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-7.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-7.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-7.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-7.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-7.npy', 'chembl-Potency-CHEMBL1794585/chembl-x-7.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-7.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-7.npy', 'chembl-Potency-CHEMBL2114784/chembl-x-7.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-7.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-7.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-7.npy', 'chembl-Potency-CHEMBL2354211/chembl-x-7.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-7.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-7.npy', 'chembl-Potency-CHEMBL2354287/chembl-x-7.npy', 'chembl-Potency-CHEMBL3215106/chembl-x-7.npy', 'chembl-Potency-CHEMBL3215181/chembl-x-7.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-7.npy', 'chembl-GI50-CHEMBL1963854/chembl-x-8.npy', 'chembl-GI50-CHEMBL1963868/chembl-x-8.npy', 'chembl-GI50-CHEMBL1963874/chembl-x-8.npy', 'chembl-GI50-CHEMBL1963876/chembl-x-8.npy', 'chembl-GI50-CHEMBL1963922/chembl-x-8.npy', 'chembl-GI50-CHEMBL1963929/chembl-x-8.npy', 'chembl-GI50-CHEMBL1963960/chembl-x-8.npy', 'chembl-GI50-CHEMBL1963981/chembl-x-8.npy', 'chembl-GI50-CHEMBL1963985/chembl-x-8.npy', 'chembl-GI50-CHEMBL1964006/chembl-x-8.npy', 'chembl-GI50-CHEMBL1964012/chembl-x-8.npy', 'chembl-GI50-CHEMBL1964047/chembl-x-8.npy', 'chembl-GI50-CHEMBL1964048/chembl-x-8.npy', 'chembl-GI50-CHEMBL1964059/chembl-x-8.npy', 'chembl-GI50-CHEMBL1964072/chembl-x-8.npy', 'chembl-GI50-CHEMBL1964077/chembl-x-8.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-8.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-8.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-8.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-8.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-8.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-8.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-8.npy', 'chembl-Potency-CHEMBL1614087/chembl-x-8.npy', 'chembl-Potency-CHEMBL1614249/chembl-x-8.npy', 'chembl-Potency-CHEMBL1614421/chembl-x-8.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-8.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-8.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-8.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-8.npy', 'chembl-Potency-CHEMBL1737991/chembl-x-8.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-8.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-8.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-8.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-8.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-8.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-8.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-8.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-8.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-8.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-8.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-8.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-8.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-8.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-8.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-8.npy', 'chembl-Potency-CHEMBL2114784/chembl-x-8.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-8.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-8.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-8.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-8.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-8.npy', 'chembl-Potency-CHEMBL2354287/chembl-x-8.npy', 'chembl-Potency-CHEMBL3215106/chembl-x-8.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-8.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-9.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-9.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-9.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-9.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-9.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-9.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-9.npy', 'chembl-Potency-CHEMBL1614087/chembl-x-9.npy', 'chembl-Potency-CHEMBL1614421/chembl-x-9.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-9.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-9.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-9.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-9.npy', 'chembl-Potency-CHEMBL1737991/chembl-x-9.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-9.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-9.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-9.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-9.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-9.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-9.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-9.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-9.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-9.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-9.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-9.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-9.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-9.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-9.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-9.npy', 'chembl-Potency-CHEMBL2114784/chembl-x-9.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-9.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-9.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-9.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-9.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-9.npy', 'chembl-Potency-CHEMBL3215106/chembl-x-9.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-9.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-10.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-10.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-10.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-10.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-10.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-10.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-10.npy', 'chembl-Potency-CHEMBL1614087/chembl-x-10.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-10.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-10.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-10.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-10.npy', 'chembl-Potency-CHEMBL1737991/chembl-x-10.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-10.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-10.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-10.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-10.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-10.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-10.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-10.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-10.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-10.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-10.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-10.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-10.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-10.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-10.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-10.npy', 'chembl-Potency-CHEMBL2114784/chembl-x-10.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-10.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-10.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-10.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-10.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-10.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-10.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-11.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-11.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-11.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-11.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-11.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-11.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-11.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-11.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-11.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-11.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-11.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-11.npy', 'chembl-Potency-CHEMBL1738312/chembl-x-11.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-11.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-11.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-11.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-11.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-11.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-11.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-11.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-11.npy', 'chembl-Potency-CHEMBL1794553/chembl-x-11.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-11.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-11.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-11.npy', 'chembl-Potency-CHEMBL2114780/chembl-x-11.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-11.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-11.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-11.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-11.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-11.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-11.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-12.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-12.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-12.npy', 'chembl-INHIBITION-CHEMBL4513220/chembl-x-12.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-12.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-12.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-12.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-12.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-12.npy', 'chembl-Potency-CHEMBL1614530/chembl-x-12.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-12.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-12.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-12.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-12.npy', 'chembl-Potency-CHEMBL1794308/chembl-x-12.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-12.npy', 'chembl-Potency-CHEMBL1794352/chembl-x-12.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-12.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-12.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-12.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-12.npy', 'chembl-Potency-CHEMBL1794584/chembl-x-12.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-12.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-12.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-12.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-12.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-12.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-12.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-12.npy', 'chembl-INHIBITION-CHEMBL4513217/chembl-x-13.npy', 'chembl-INHIBITION-CHEMBL4513218/chembl-x-13.npy', 'chembl-INHIBITION-CHEMBL4513219/chembl-x-13.npy', 'chembl-INHIBITION-CHEMBL4513221/chembl-x-13.npy', 'chembl-Inhibition-CHEMBL3507681/chembl-x-13.npy', 'chembl-Inhibition-CHEMBL3988443/chembl-x-13.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-13.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-13.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-13.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-13.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-13.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-13.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-13.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-13.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-13.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-13.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-13.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-13.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-13.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-13.npy', 'chembl-Potency-CHEMBL2114843/chembl-x-13.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-13.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-13.npy', 'chembl-Potency-CHEMBL3562077/chembl-x-13.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-14.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-14.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-14.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-14.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-14.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-14.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-14.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-14.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-14.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-14.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-14.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-14.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-14.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-14.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-14.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-14.npy', 'chembl-Potency-CHEMBL1614458/chembl-x-15.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-15.npy', 'chembl-Potency-CHEMBL1737902/chembl-x-15.npy', 'chembl-Potency-CHEMBL1738184/chembl-x-15.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-15.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-15.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-15.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-15.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-15.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-15.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-15.npy', 'chembl-Potency-CHEMBL2114775/chembl-x-15.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-15.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-15.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-15.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-15.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-16.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-16.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-16.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-16.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-16.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-16.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-16.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-16.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-16.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-16.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-16.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-16.npy', 'chembl-Potency-CHEMBL1614459/chembl-x-17.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-17.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-17.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-17.npy', 'chembl-Potency-CHEMBL1794375/chembl-x-17.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-17.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-17.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-17.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-17.npy', 'chembl-Potency-CHEMBL2114810/chembl-x-17.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-17.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-17.npy', 'chembl-Potency-CHEMBL1738442/chembl-x-18.npy', 'chembl-Potency-CHEMBL1738588/chembl-x-18.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-18.npy', 'chembl-Potency-CHEMBL1794401/chembl-x-18.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-18.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-18.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-18.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-18.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-18.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-19.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-19.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-19.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-19.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-19.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-19.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-20.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-20.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-20.npy', 'chembl-Potency-CHEMBL2114788/chembl-x-20.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-20.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-20.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-21.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-21.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-21.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-21.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-21.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-22.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-22.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-22.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-22.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-22.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-23.npy', 'chembl-Potency-CHEMBL1794483/chembl-x-23.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-23.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-23.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-23.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-24.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-24.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-24.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-24.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-25.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-25.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-25.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-25.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-26.npy', 'chembl-Potency-CHEMBL1794580/chembl-x-26.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-26.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-26.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-27.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-27.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-27.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-28.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-28.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-28.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-29.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-29.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-29.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-30.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-30.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-30.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-31.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-31.npy', 'chembl-Potency-CHEMBL2354254/chembl-x-31.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-32.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-32.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-33.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-33.npy', 'chembl-Potency-CHEMBL1794345/chembl-x-34.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-34.npy', 'chembl-Potency-CHEMBL2354221/chembl-x-35.npy'] +""" + +chembl-AC50-CHEMBL1741322/chembl-x-0.npy 1190 +chembl-ALB-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-ALP-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-ALT-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-AST-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-BASOLE-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-BILI-CHEMBL3885882/chembl-x-0.npy 1093 +chembl-BUN-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-CHLORIDE-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-CHOL-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-CK-CHEMBL3885882/chembl-x-0.npy 1014 +chembl-CREAT-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-EOSLE-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-GI50-CHEMBL1963844/chembl-x-0.npy 5049 +chembl-GI50-CHEMBL1963848/chembl-x-0.npy 1900 +chembl-GI50-CHEMBL1963854/chembl-x-0.npy 5503 +chembl-GI50-CHEMBL1963860/chembl-x-0.npy 4757 +chembl-GI50-CHEMBL1963866/chembl-x-0.npy 5068 +chembl-GI50-CHEMBL1963868/chembl-x-0.npy 5344 +chembl-GI50-CHEMBL1963874/chembl-x-0.npy 5419 +chembl-GI50-CHEMBL1963876/chembl-x-0.npy 5377 +chembl-GI50-CHEMBL1963880/chembl-x-0.npy 5340 +chembl-GI50-CHEMBL1963882/chembl-x-0.npy 3596 +chembl-GI50-CHEMBL1963885/chembl-x-0.npy 3578 +chembl-GI50-CHEMBL1963887/chembl-x-0.npy 2020 +chembl-GI50-CHEMBL1963889/chembl-x-0.npy 1859 +chembl-GI50-CHEMBL1963895/chembl-x-0.npy 5167 +chembl-GI50-CHEMBL1963900/chembl-x-0.npy 3311 +chembl-GI50-CHEMBL1963901/chembl-x-0.npy 5282 +chembl-GI50-CHEMBL1963903/chembl-x-0.npy 5344 +chembl-GI50-CHEMBL1963911/chembl-x-0.npy 5004 +chembl-GI50-CHEMBL1963921/chembl-x-0.npy 3570 +chembl-GI50-CHEMBL1963922/chembl-x-0.npy 5438 +chembl-GI50-CHEMBL1963929/chembl-x-0.npy 5404 +chembl-GI50-CHEMBL1963935/chembl-x-0.npy 1541 +chembl-GI50-CHEMBL1963945/chembl-x-0.npy 3327 +chembl-GI50-CHEMBL1963953/chembl-x-0.npy 5249 +chembl-GI50-CHEMBL1963954/chembl-x-0.npy 1925 +chembl-GI50-CHEMBL1963960/chembl-x-0.npy 5379 +chembl-GI50-CHEMBL1963961/chembl-x-0.npy 5187 +chembl-GI50-CHEMBL1963963/chembl-x-0.npy 1988 +chembl-GI50-CHEMBL1963976/chembl-x-0.npy 3484 +chembl-GI50-CHEMBL1963981/chembl-x-0.npy 5430 +chembl-GI50-CHEMBL1963985/chembl-x-0.npy 5458 +chembl-GI50-CHEMBL1963989/chembl-x-0.npy 5332 +chembl-GI50-CHEMBL1963990/chembl-x-0.npy 5364 +chembl-GI50-CHEMBL1963991/chembl-x-0.npy 4645 +chembl-GI50-CHEMBL1963994/chembl-x-0.npy 4968 +chembl-GI50-CHEMBL1964004/chembl-x-0.npy 4985 +chembl-GI50-CHEMBL1964006/chembl-x-0.npy 5460 +chembl-GI50-CHEMBL1964007/chembl-x-0.npy 5377 +chembl-GI50-CHEMBL1964009/chembl-x-0.npy 4883 +chembl-GI50-CHEMBL1964012/chembl-x-0.npy 5351 +chembl-GI50-CHEMBL1964014/chembl-x-0.npy 1561 +chembl-GI50-CHEMBL1964017/chembl-x-0.npy 3558 +chembl-GI50-CHEMBL1964018/chembl-x-0.npy 5020 +chembl-GI50-CHEMBL1964021/chembl-x-0.npy 5356 +chembl-GI50-CHEMBL1964025/chembl-x-0.npy 5053 +chembl-GI50-CHEMBL1964030/chembl-x-0.npy 5379 +chembl-GI50-CHEMBL1964034/chembl-x-0.npy 4679 +chembl-GI50-CHEMBL1964037/chembl-x-0.npy 5222 +chembl-GI50-CHEMBL1964040/chembl-x-0.npy 5354 +chembl-GI50-CHEMBL1964043/chembl-x-0.npy 5271 +chembl-GI50-CHEMBL1964045/chembl-x-0.npy 3639 +chembl-GI50-CHEMBL1964047/chembl-x-0.npy 5487 +chembl-GI50-CHEMBL1964048/chembl-x-0.npy 5460 +chembl-GI50-CHEMBL1964049/chembl-x-0.npy 5266 +chembl-GI50-CHEMBL1964059/chembl-x-0.npy 5396 +chembl-GI50-CHEMBL1964062/chembl-x-0.npy 1905 +chembl-GI50-CHEMBL1964063/chembl-x-0.npy 3494 +chembl-GI50-CHEMBL1964065/chembl-x-0.npy 4421 +chembl-GI50-CHEMBL1964066/chembl-x-0.npy 5309 +chembl-GI50-CHEMBL1964072/chembl-x-0.npy 5503 +chembl-GI50-CHEMBL1964074/chembl-x-0.npy 2025 +chembl-GI50-CHEMBL1964075/chembl-x-0.npy 4893 +chembl-GI50-CHEMBL1964077/chembl-x-0.npy 5423 +chembl-GI50-CHEMBL1964085/chembl-x-0.npy 5306 +chembl-GI50-CHEMBL1964086/chembl-x-0.npy 1659 +chembl-GI50-CHEMBL1964087/chembl-x-0.npy 5325 +chembl-GI50-CHEMBL1964088/chembl-x-0.npy 4984 +chembl-GI50-CHEMBL1964091/chembl-x-0.npy 5150 +chembl-GI50-CHEMBL1964092/chembl-x-0.npy 5080 +chembl-GI50-CHEMBL1964099/chembl-x-0.npy 3056 +chembl-GLUC-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-HCT-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-HGB-CHEMBL3885882/chembl-x-0.npy 1093 +chembl-INHIBITION-CHEMBL4513217/chembl-x-0.npy 4807 +chembl-INHIBITION-CHEMBL4513218/chembl-x-0.npy 4815 +chembl-INHIBITION-CHEMBL4513219/chembl-x-0.npy 4815 +chembl-INHIBITION-CHEMBL4513220/chembl-x-0.npy 4589 +chembl-INHIBITION-CHEMBL4513221/chembl-x-0.npy 4815 +chembl-Inhibition-CHEMBL3507681/chembl-x-0.npy 4574 +chembl-Inhibition-CHEMBL3988443/chembl-x-0.npy 4574 +chembl-Inhibition-CHEMBL4296187/chembl-x-0.npy 10334 +chembl-Inhibition-CHEMBL4296188/chembl-x-0.npy 9777 +chembl-Inhibition-CHEMBL4296802/chembl-x-0.npy 9464 +chembl-Inhibition-CHEMBL4495582/chembl-x-0.npy 1433 +chembl-Inhibition-CHEMBL4513082/chembl-x-0.npy 1433 +chembl-LYMLE-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-MCH-CHEMBL3885882/chembl-x-0.npy 1093 +chembl-MCHC-CHEMBL3885882/chembl-x-0.npy 1093 +chembl-MCV-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-MONOLE-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-NEUTLE-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-PHOS-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-PLAT-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-POTASSIUM-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-PROT-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-Potency-CHEMBL1613836/chembl-x-0.npy 4118 +chembl-Potency-CHEMBL1613838/chembl-x-0.npy 3004 +chembl-Potency-CHEMBL1613842/chembl-x-0.npy 3090 +chembl-Potency-CHEMBL1613910/chembl-x-0.npy 1107 +chembl-Potency-CHEMBL1613914/chembl-x-0.npy 3668 +chembl-Potency-CHEMBL1613918/chembl-x-0.npy 1278 +chembl-Potency-CHEMBL1613970/chembl-x-0.npy 1009 +chembl-Potency-CHEMBL1614038/chembl-x-0.npy 2568 +chembl-Potency-CHEMBL1614076/chembl-x-0.npy 1239 +chembl-Potency-CHEMBL1614079/chembl-x-0.npy 1402 +chembl-Potency-CHEMBL1614087/chembl-x-0.npy 4682 +chembl-Potency-CHEMBL1614146/chembl-x-0.npy 4118 +chembl-Potency-CHEMBL1614161/chembl-x-0.npy 2131 +chembl-Potency-CHEMBL1614166/chembl-x-0.npy 2027 +chembl-Potency-CHEMBL1614174/chembl-x-0.npy 2482 +chembl-Potency-CHEMBL1614211/chembl-x-0.npy 3493 +chembl-Potency-CHEMBL1614227/chembl-x-0.npy 1200 +chembl-Potency-CHEMBL1614236/chembl-x-0.npy 3023 +chembl-Potency-CHEMBL1614249/chembl-x-0.npy 3282 +chembl-Potency-CHEMBL1614250/chembl-x-0.npy 1111 +chembl-Potency-CHEMBL1614257/chembl-x-0.npy 1739 +chembl-Potency-CHEMBL1614275/chembl-x-0.npy 1887 +chembl-Potency-CHEMBL1614280/chembl-x-0.npy 1240 +chembl-Potency-CHEMBL1614281/chembl-x-0.npy 1155 +chembl-Potency-CHEMBL1614342/chembl-x-0.npy 2517 +chembl-Potency-CHEMBL1614361/chembl-x-0.npy 1531 +chembl-Potency-CHEMBL1614364/chembl-x-0.npy 1195 +chembl-Potency-CHEMBL1614410/chembl-x-0.npy 1093 +chembl-Potency-CHEMBL1614421/chembl-x-0.npy 2639 +chembl-Potency-CHEMBL1614441/chembl-x-0.npy 3371 +chembl-Potency-CHEMBL1614458/chembl-x-0.npy 6304 +chembl-Potency-CHEMBL1614459/chembl-x-0.npy 7096 +chembl-Potency-CHEMBL1614530/chembl-x-0.npy 4849 +chembl-Potency-CHEMBL1614544/chembl-x-0.npy 2218 +chembl-Potency-CHEMBL1737902/chembl-x-0.npy 6480 +chembl-Potency-CHEMBL1737991/chembl-x-0.npy 4048 +chembl-Potency-CHEMBL1738132/chembl-x-0.npy 2610 +chembl-Potency-CHEMBL1738184/chembl-x-0.npy 4994 +chembl-Potency-CHEMBL1738312/chembl-x-0.npy 3919 +chembl-Potency-CHEMBL1738317/chembl-x-0.npy 3331 +chembl-Potency-CHEMBL1738442/chembl-x-0.npy 7331 +chembl-Potency-CHEMBL1738588/chembl-x-0.npy 7848 +chembl-Potency-CHEMBL1738606/chembl-x-0.npy 1208 +chembl-Potency-CHEMBL1794308/chembl-x-0.npy 3654 +chembl-Potency-CHEMBL1794311/chembl-x-0.npy 1799 +chembl-Potency-CHEMBL1794345/chembl-x-0.npy 13702 +chembl-Potency-CHEMBL1794352/chembl-x-0.npy 5877 +chembl-Potency-CHEMBL1794359/chembl-x-0.npy 1537 +chembl-Potency-CHEMBL1794375/chembl-x-0.npy 6607 +chembl-Potency-CHEMBL1794401/chembl-x-0.npy 8322 +chembl-Potency-CHEMBL1794424/chembl-x-0.npy 2448 +chembl-Potency-CHEMBL1794440/chembl-x-0.npy 1002 +chembl-Potency-CHEMBL1794461/chembl-x-0.npy 1596 +chembl-Potency-CHEMBL1794483/chembl-x-0.npy 9530 +chembl-Potency-CHEMBL1794499/chembl-x-0.npy 1290 +chembl-Potency-CHEMBL1794553/chembl-x-0.npy 3546 +chembl-Potency-CHEMBL1794580/chembl-x-0.npy 9915 +chembl-Potency-CHEMBL1794584/chembl-x-0.npy 3014 +chembl-Potency-CHEMBL1794585/chembl-x-0.npy 1200 +chembl-Potency-CHEMBL2114713/chembl-x-0.npy 1250 +chembl-Potency-CHEMBL2114738/chembl-x-0.npy 1580 +chembl-Potency-CHEMBL2114775/chembl-x-0.npy 3350 +chembl-Potency-CHEMBL2114780/chembl-x-0.npy 4186 +chembl-Potency-CHEMBL2114784/chembl-x-0.npy 2479 +chembl-Potency-CHEMBL2114788/chembl-x-0.npy 5101 +chembl-Potency-CHEMBL2114807/chembl-x-0.npy 2307 +chembl-Potency-CHEMBL2114810/chembl-x-0.npy 7269 +chembl-Potency-CHEMBL2114836/chembl-x-0.npy 1881 +chembl-Potency-CHEMBL2114843/chembl-x-0.npy 4050 +chembl-Potency-CHEMBL2114861/chembl-x-0.npy 1798 +chembl-Potency-CHEMBL2114908/chembl-x-0.npy 1139 +chembl-Potency-CHEMBL2114913/chembl-x-0.npy 2068 +chembl-Potency-CHEMBL2354211/chembl-x-0.npy 1594 +chembl-Potency-CHEMBL2354221/chembl-x-0.npy 6549 +chembl-Potency-CHEMBL2354254/chembl-x-0.npy 5683 +chembl-Potency-CHEMBL2354287/chembl-x-0.npy 3305 +chembl-Potency-CHEMBL2354311/chembl-x-0.npy 1538 +chembl-Potency-CHEMBL3214953/chembl-x-0.npy 1102 +chembl-Potency-CHEMBL3215017/chembl-x-0.npy 1197 +chembl-Potency-CHEMBL3215106/chembl-x-0.npy 2289 +chembl-Potency-CHEMBL3215181/chembl-x-0.npy 3089 +chembl-Potency-CHEMBL3215278/chembl-x-0.npy 2233 +chembl-Potency-CHEMBL3562077/chembl-x-0.npy 5146 +chembl-RBC-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-SODIUM-CHEMBL3885882/chembl-x-0.npy 1096 +chembl-WBC-CHEMBL3885882/chembl-x-0.npy 1095 +chembl-WEIGHT-CHEMBL3885862/chembl-x-0.npy 3196 +chembl-WEIGHT-CHEMBL3885863/chembl-x-0.npy 3360 + +""" + + +CHEMBL_FILES = ['chembl-AC50-CHEMBL1741322/chembl-x-0.npy', + 'chembl-ALB-CHEMBL3885882/chembl-x-0.npy', + 'chembl-ALP-CHEMBL3885882/chembl-x-0.npy', + 'chembl-ALT-CHEMBL3885882/chembl-x-0.npy', + 'chembl-AST-CHEMBL3885882/chembl-x-0.npy', + 'chembl-BASOLE-CHEMBL3885882/chembl-x-0.npy', + 'chembl-BILI-CHEMBL3885882/chembl-x-0.npy', + 'chembl-BUN-CHEMBL3885882/chembl-x-0.npy', + 'chembl-CHLORIDE-CHEMBL3885882/chembl-x-0.npy', + 'chembl-CHOL-CHEMBL3885882/chembl-x-0.npy', + 'chembl-CK-CHEMBL3885882/chembl-x-0.npy', + 'chembl-CREAT-CHEMBL3885882/chembl-x-0.npy', + 'chembl-EOSLE-CHEMBL3885882/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963844/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963848/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963854/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963860/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963866/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963868/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963874/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963876/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963880/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963882/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963885/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963887/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963889/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963895/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963900/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963901/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963903/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963911/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963921/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963922/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963929/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963935/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963945/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963953/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963954/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963960/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963961/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963963/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963976/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963981/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963985/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963989/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963990/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963991/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1963994/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964004/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964006/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964007/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964009/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964012/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964014/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964017/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964018/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964021/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964025/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964030/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964034/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964037/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964040/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964043/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964045/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964047/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964048/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964049/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964059/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964062/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964063/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964065/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964066/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964072/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964074/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964075/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964077/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964085/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964086/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964087/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964088/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964091/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964092/chembl-x-0.npy', + 'chembl-GI50-CHEMBL1964099/chembl-x-0.npy', + 'chembl-GLUC-CHEMBL3885882/chembl-x-0.npy', + 'chembl-HCT-CHEMBL3885882/chembl-x-0.npy', + 'chembl-HGB-CHEMBL3885882/chembl-x-0.npy', + 'chembl-INHIBITION-CHEMBL4513217/chembl-x-0.npy', + 'chembl-INHIBITION-CHEMBL4513218/chembl-x-0.npy', + 'chembl-INHIBITION-CHEMBL4513219/chembl-x-0.npy', + 'chembl-INHIBITION-CHEMBL4513220/chembl-x-0.npy', + 'chembl-INHIBITION-CHEMBL4513221/chembl-x-0.npy', + 'chembl-Inhibition-CHEMBL3507681/chembl-x-0.npy', + 'chembl-Inhibition-CHEMBL3988443/chembl-x-0.npy', + 'chembl-Inhibition-CHEMBL4296187/chembl-x-0.npy', + 'chembl-Inhibition-CHEMBL4296188/chembl-x-0.npy', + 'chembl-Inhibition-CHEMBL4296802/chembl-x-0.npy', + 'chembl-Inhibition-CHEMBL4495582/chembl-x-0.npy', + 'chembl-Inhibition-CHEMBL4513082/chembl-x-0.npy', + 'chembl-LYMLE-CHEMBL3885882/chembl-x-0.npy', + 'chembl-MCH-CHEMBL3885882/chembl-x-0.npy', + 'chembl-MCHC-CHEMBL3885882/chembl-x-0.npy', + 'chembl-MCV-CHEMBL3885882/chembl-x-0.npy', + 'chembl-MONOLE-CHEMBL3885882/chembl-x-0.npy', + 'chembl-NEUTLE-CHEMBL3885882/chembl-x-0.npy', + 'chembl-PHOS-CHEMBL3885882/chembl-x-0.npy', + 'chembl-PLAT-CHEMBL3885882/chembl-x-0.npy', + 'chembl-POTASSIUM-CHEMBL3885882/chembl-x-0.npy', + 'chembl-PROT-CHEMBL3885882/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1613836/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1613838/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1613842/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1613910/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1613914/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1613918/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1613970/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614038/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614076/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614079/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614087/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614146/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614161/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614166/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614174/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614211/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614227/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614236/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614249/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614250/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614257/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614275/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614280/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614281/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614342/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614361/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614364/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614410/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614421/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614441/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614458/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614459/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614530/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1614544/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1737902/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1737991/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1738132/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1738184/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1738312/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1738317/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1738442/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1738588/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1738606/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794308/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794311/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794345/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794352/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794359/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794375/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794401/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794424/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794440/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794461/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794483/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794499/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794553/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794580/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794584/chembl-x-0.npy', + 'chembl-Potency-CHEMBL1794585/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114713/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114738/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114775/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114780/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114784/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114788/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114807/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114810/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114836/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114843/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114861/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114908/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2114913/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2354211/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2354221/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2354254/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2354287/chembl-x-0.npy', + 'chembl-Potency-CHEMBL2354311/chembl-x-0.npy', + 'chembl-Potency-CHEMBL3214953/chembl-x-0.npy', + 'chembl-Potency-CHEMBL3215017/chembl-x-0.npy', + 'chembl-Potency-CHEMBL3215106/chembl-x-0.npy', + 'chembl-Potency-CHEMBL3215181/chembl-x-0.npy', + 'chembl-Potency-CHEMBL3215278/chembl-x-0.npy', + 'chembl-Potency-CHEMBL3562077/chembl-x-0.npy', + 'chembl-RBC-CHEMBL3885882/chembl-x-0.npy', + 'chembl-SODIUM-CHEMBL3885882/chembl-x-0.npy', + 'chembl-WBC-CHEMBL3885882/chembl-x-0.npy', + 'chembl-WEIGHT-CHEMBL3885862/chembl-x-0.npy', + 'chembl-WEIGHT-CHEMBL3885863/chembl-x-0.npy'] class ChEMBLDataset(DiscreteDataset): @@ -202,8 +599,8 @@ class ChEMBLDataset(DiscreteDataset): x_name = "smiles" @staticmethod - def register_x_shards(assay_chembl_id="CHEMBL1964047", - standard_type="GI50"): # max percentile 53 works well + def register_x_shards(assay_chembl_id="CHEMBL1794345", + standard_type="Potency"): # max percentile 53 works well """Registers a remote file for download that contains design values in a format compatible with the dataset builder class; these files are downloaded all at once in the dataset initialization @@ -235,8 +632,8 @@ def register_x_shards(assay_chembl_id="CHEMBL1964047", if f"{standard_type}-{assay_chembl_id}" in file] @staticmethod - def register_y_shards(assay_chembl_id="CHEMBL1964047", - standard_type="GI50"): + def register_y_shards(assay_chembl_id="CHEMBL1794345", + standard_type="Potency"): """Registers a remote file for download that contains prediction values in a format compatible with the dataset builder class; these files are downloaded all at once in the dataset initialization @@ -267,8 +664,8 @@ def register_y_shards(assay_chembl_id="CHEMBL1964047", download_method="direct") for file in CHEMBL_FILES if f"{standard_type}-{assay_chembl_id}" in file] - def __init__(self, assay_chembl_id="CHEMBL1964047", - standard_type="GI50", + def __init__(self, assay_chembl_id="CHEMBL1794345", + standard_type="Potency", soft_interpolation=0.6, **kwargs): """Initialize a model-based optimization dataset and prepare that dataset by loading that dataset from disk and modifying diff --git a/design_bench/oracles/feature_extractors/__init__.py b/design_bench/oracles/feature_extractors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/design_bench/oracles/feature_extractors/feature_extractor.py b/design_bench/oracles/feature_extractors/feature_extractor.py new file mode 100644 index 0000000..525656e --- /dev/null +++ b/design_bench/oracles/feature_extractors/feature_extractor.py @@ -0,0 +1,230 @@ +import abc + + +class FeatureExtractor(abc.ABC): + """An abstract class for managing transformations applied to model-based + optimization datasets when constructing the oracle; for example, if the + oracle is intended to learn from molecule fingerprints + + max_x { y = f(x) } + + Public Methods: + + dataset_to_oracle_x(np.ndarray) -> np.ndarray + Helper function for converting from designs contained in the + dataset format into a format the oracle is expecting to process, + such as from integers to logits of a categorical distribution + + dataset_to_oracle_y(np.ndarray) -> np.ndarray + Helper function for converting from predictions contained in the + dataset format into a format the oracle is expecting to process, + such as from normalized to denormalized predictions + + oracle_to_dataset_x(np.ndarray) -> np.ndarray + Helper function for converting from designs in the format of the + oracle into the design format the dataset contains, such as + from categorical logits to integers + + oracle_to_dataset_y(np.ndarray) -> np.ndarray + Helper function for converting from predictions in the + format of the oracle into a format the dataset contains, + such as from normalized to denormalized predictions + + """ + + name = "feature_extractor" + + @abc.abstractmethod + def dataset_to_oracle_x(self, x_batch, dataset): + """Helper function for converting from designs contained in the + dataset format into a format the oracle is expecting to process, + such as from integers to logits of a categorical distribution + + Arguments: + + x_batch: np.ndarray + a batch of design values 'x' that will be given as input to the + oracle model in order to obtain a prediction value 'y' for + each 'x' which is then returned + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + x_batch: np.ndarray + a batch of design values 'x' that have been converted from the + format of designs contained in the dataset to the format + expected by the oracle score function + + """ + + raise NotImplementedError("cannot run base class") + + @abc.abstractmethod + def dataset_to_oracle_y(self, y_batch, dataset): + """Helper function for converting from predictions contained in the + dataset format into a format the oracle is expecting to process, + such as from normalized to denormalized predictions + + Arguments: + + y_batch: np.ndarray + a batch of prediction values 'y' that are from the dataset and + will be processed into a format expected by the oracle score + function, which is useful when training the oracle + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + y_batch: np.ndarray + a batch of prediction values 'y' that have been converted from + the format of predictions contained in the dataset to the + format expected by the oracle score function + + """ + + raise NotImplementedError("cannot run base class") + + @abc.abstractmethod + def oracle_to_dataset_x(self, x_batch, dataset): + """Helper function for converting from designs in the format of the + oracle into the design format the dataset contains, such as + from categorical logits to integers + + Arguments: + + x_batch: np.ndarray + a batch of design values 'x' that have been converted from + the format of designs contained in the dataset to the + format expected by the oracle score function + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + x_batch: np.ndarray + a batch of design values 'x' that have been converted from + the format of the oracle to the format of designs + contained in the dataset + + """ + + raise NotImplementedError("cannot run base class") + + @abc.abstractmethod + def oracle_to_dataset_y(self, y_batch, dataset): + """Helper function for converting from predictions in the format + of the oracle into a format the dataset contains, such as + from normalized to denormalized predictions + + Arguments: + + y_batch: np.ndarray + a batch of prediction values 'y' that have been converted from + the format of predictions contained in the dataset to the + format expected by the oracle score function + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + y_batch: np.ndarray + a batch of prediction values 'y' that have been converted from + the format of the oracle to the format of predictions + contained in the dataset + + """ + + raise NotImplementedError("cannot run base class") + + @abc.abstractmethod + def input_shape(self, dataset): + """Helper function for converting from predictions in the format + of the oracle into a format the dataset contains, such as + from normalized to denormalized predictions + + Arguments: + + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + input_shape: List[int] + the shape of input tensors that were sampled from the dataset and + are transformed into features using subclasses of this class + + """ + + raise NotImplementedError("cannot run base class") + + @abc.abstractmethod + def input_dtype(self, dataset): + """Helper function that returns the data type of the features returned + by running the feature extractor from dataset to oracle + + Arguments: + + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + input_dtype: List[int] + the type of input tensors that were sampled from the dataset and + are transformed into features using subclasses of this class + + """ + + raise NotImplementedError("cannot run base class") + + @abc.abstractmethod + def is_discrete(self, dataset): + """Helper function that specifies whether the transformation applied + by the feature extractor returns a discrete or continuous set of + features, which is required for building predictive models + + Arguments: + + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + is_discrete: bool + a boolean that indicates whether the dataset has been transformed + into a discrete or continuous representation + + """ + + raise NotImplementedError("cannot run base class") + + @abc.abstractmethod + def num_classes(self, dataset): + """Helper function for determining the number of classes in the discrete + representation intended for the oracle, if it is discrete, otherwise + this function may not be implemented and will raise an error + + Arguments: + + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + num_classes: int + the number of classes in the discrete representation for the model + based optimization dataset used for training the oracle + + """ + + raise NotImplementedError("cannot run base class") diff --git a/design_bench/oracles/feature_extractors/morgan_fingerprint_features.py b/design_bench/oracles/feature_extractors/morgan_fingerprint_features.py new file mode 100644 index 0000000..a2c295c --- /dev/null +++ b/design_bench/oracles/feature_extractors/morgan_fingerprint_features.py @@ -0,0 +1,275 @@ +from design_bench.oracles.feature_extractors.feature_extractor import FeatureExtractor +from design_bench.disk_resource import direct_download +from design_bench.disk_resource import DATA_DIR +from design_bench.disk_resource import SERVER_URL +from deepchem.feat.smiles_tokenizer import SmilesTokenizer +import deepchem.feat as feat +import os +import numpy as np + + +class MorganFingerprintFeatures(FeatureExtractor): + """An abstract class for managing transformations applied to model-based + optimization datasets when constructing the oracle; for example, if the + oracle is intended to learn from molecule fingerprints + + max_x { y = f(x) } + + Public Methods: + + dataset_to_oracle_x(np.ndarray) -> np.ndarray + Helper function for converting from designs contained in the + dataset format into a format the oracle is expecting to process, + such as from integers to logits of a categorical distribution + + dataset_to_oracle_y(np.ndarray) -> np.ndarray + Helper function for converting from predictions contained in the + dataset format into a format the oracle is expecting to process, + such as from normalized to denormalized predictions + + oracle_to_dataset_x(np.ndarray) -> np.ndarray + Helper function for converting from designs in the format of the + oracle into the design format the dataset contains, such as + from categorical logits to integers + + oracle_to_dataset_y(np.ndarray) -> np.ndarray + Helper function for converting from predictions in the + format of the oracle into a format the dataset contains, + such as from normalized to denormalized predictions + + """ + + name = "morgan_fingerprint" + + def __init__(self, size=2048, radius=4, dtype=np.int32): + """An abstract class for managing transformations applied to + model-based optimization datasets when constructing the oracle; for + example, if the oracle learns from molecule fingerprints + + Arguments: + + size: int + the number of bits in the morgan fingerprint returned by RDKit, + controls the vector size of the molecule embedding + radius: int + the substructure radius passed to RDKit that controls how local + the information encoded in the molecule embedding is + + """ + + # wrap the deepchem featurizer that relies on rdkit + self.featurizer = feat.CircularFingerprint(size=size, radius=radius) + self.size = size + self.radius = radius + self.dtype = dtype + + # download the molecule dataset if not already + direct_download(f'{SERVER_URL}/smiles_vocab.txt', + os.path.join(DATA_DIR, 'smiles_vocab.txt')) + self.tokenizer = SmilesTokenizer( + os.path.join(DATA_DIR, 'smiles_vocab.txt')) + + def dataset_to_oracle_x(self, x_batch, dataset): + """Helper function for converting from designs contained in the + dataset format into a format the oracle is expecting to process, + such as from integers to logits of a categorical distribution + + Arguments: + + x_batch: np.ndarray + a batch of design values 'x' that will be given as input to the + oracle model in order to obtain a prediction value 'y' for + each 'x' which is then returned + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + x_batch: np.ndarray + a batch of design values 'x' that have been converted from the + format of designs contained in the dataset to the format + expected by the oracle score function + + """ + + x_out = [] + for xi in x_batch: + + # identify stop and start tokens so they can be removed + stop_tokens = np.where(xi == 13)[0] + tokens = xi[1:stop_tokens[0] if stop_tokens.size > 0 else xi[1:]] + + # apply morgan fingerprint featurization using rdkit + value = self.featurizer.featurize( + self.tokenizer.decode(tokens).replace(" ", ""))[0] + + # collate all results into a single numpy array + x_out.append(np.zeros([2048], dtype=self.dtype) + if value is None + else np.array(value, dtype=self.dtype)) + + return np.asarray(x_out) + + def dataset_to_oracle_y(self, y_batch, dataset): + """Helper function for converting from predictions contained in the + dataset format into a format the oracle is expecting to process, + such as from normalized to denormalized predictions + + Arguments: + + y_batch: np.ndarray + a batch of prediction values 'y' that are from the dataset and + will be processed into a format expected by the oracle score + function, which is useful when training the oracle + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + y_batch: np.ndarray + a batch of prediction values 'y' that have been converted from + the format of predictions contained in the dataset to the + format expected by the oracle score function + + """ + + return y_batch + + def oracle_to_dataset_x(self, x_batch, dataset): + """Helper function for converting from designs in the format of the + oracle into the design format the dataset contains, such as + from categorical logits to integers + + Arguments: + + x_batch: np.ndarray + a batch of design values 'x' that have been converted from + the format of designs contained in the dataset to the + format expected by the oracle score function + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + x_batch: np.ndarray + a batch of design values 'x' that have been converted from + the format of the oracle to the format of designs + contained in the dataset + + """ + + raise NotImplementedError("features are not invertible") + + def oracle_to_dataset_y(self, y_batch, dataset): + """Helper function for converting from predictions in the format + of the oracle into a format the dataset contains, such as + from normalized to denormalized predictions + + Arguments: + + y_batch: np.ndarray + a batch of prediction values 'y' that have been converted from + the format of predictions contained in the dataset to the + format expected by the oracle score function + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + y_batch: np.ndarray + a batch of prediction values 'y' that have been converted from + the format of the oracle to the format of predictions + contained in the dataset + + """ + + return y_batch + + def input_shape(self, dataset): + """Helper function for converting from predictions in the format + of the oracle into a format the dataset contains, such as + from normalized to denormalized predictions + + Arguments: + + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + input_shape: List[int] + the shape of input tensors that were sampled from the dataset and + are transformed into features using subclasses of this class + + """ + + return [self.size] + + def input_dtype(self, dataset): + """Helper function that returns the data type of the features returned + by running the feature extractor from dataset to oracle + + Arguments: + + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + input_dtype: List[int] + the type of input tensors that were sampled from the dataset and + are transformed into features using subclasses of this class + + """ + + return self.dtype + + def is_discrete(self, dataset): + """Helper function that specifies whether the transformation applied + by the feature extractor returns a discrete or continuous set of + features, which is required for building predictive models + + Arguments: + + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + is_discrete: bool + a boolean that indicates whether the dataset has been transformed + into a discrete or continuous representation + + """ + + return np.issubdtype(self.dtype, np.integer) + + def num_classes(self, dataset): + """Helper function for determining the number of classes in the discrete + representation intended for the oracle, if it is discrete, otherwise + this function may not be implemented and will raise an error + + Arguments: + + dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class representing + the source of the batch, must be provided + + Returns: + + num_classes: int + the number of classes in the discrete representation for the model + based optimization dataset used for training the oracle + + """ + + if not self.is_discrete(dataset): + raise NotImplementedError("continuous features do not have ids") + return 2 diff --git a/design_bench/oracles/oracle_builder.py b/design_bench/oracles/oracle_builder.py index 0b4489b..318f7ce 100644 --- a/design_bench/oracles/oracle_builder.py +++ b/design_bench/oracles/oracle_builder.py @@ -1,5 +1,6 @@ from design_bench.datasets.dataset_builder import DatasetBuilder from design_bench.datasets.discrete_dataset import DiscreteDataset +from design_bench.oracles.feature_extractors.feature_extractor import FeatureExtractor import abc import numpy as np import math @@ -146,7 +147,8 @@ def __init__(self, dataset: DatasetBuilder, is_batched=True, noise_std=0.0, expect_normalized_y=False, expect_normalized_x=False, expect_logits=None, max_samples=None, distribution=None, - min_percentile=0.0, max_percentile=100.0): + min_percentile=0.0, max_percentile=100.0, + feature_extractor: FeatureExtractor = None): """Initialize the ground truth score function f(x) for a model-based optimization problem, which involves loading the parameters of an oracle model and estimating its computational cost @@ -196,6 +198,10 @@ def __init__(self, dataset: DatasetBuilder, is_batched=True, min_percentile: float the percentile between 0 and 100 of prediction values 'y' below which are hidden from access by members outside the class + feature_extractor: FeatureExtractor + an optional feature extraction module that encodes designs and + scores from a model-based optimization dataset into different + feature spaces for training oracle models """ @@ -205,10 +211,6 @@ def __init__(self, dataset: DatasetBuilder, is_batched=True, raise ValueError("is_logits is only defined " "for use with discrete datasets") - # check the given dataset is compatible with this oracle - if not self.check_input_format(dataset): - raise ValueError("the given dataset is not compatible") - # keep the dataset in case it is needed for normalization self.external_dataset = dataset @@ -237,11 +239,20 @@ def __init__(self, dataset: DatasetBuilder, is_batched=True, self.is_batched = is_batched self.internal_batch_size = internal_batch_size self.internal_measurements = internal_measurements + self.feature_extractor = feature_extractor # attributes that describe model predictions self.noise_std = noise_std self.num_evaluations = self.external_dataset.dataset_size + # check the given dataset is compatible with this oracle + if not self.check_input_format(dataset): + raise ValueError("the given dataset is not compatible") + + # update the name of this oracle + if self.feature_extractor is not None: + self.name = self.name + "_" + self.feature_extractor.name + def dataset_to_oracle_x(self, x_batch, dataset=None): """Helper function for converting from designs contained in the dataset format into a format the oracle is expecting to process, @@ -290,6 +301,10 @@ def dataset_to_oracle_x(self, x_batch, dataset=None): if self.expect_normalized_x: x_batch = self.internal_dataset.normalize_x(x_batch) + # apply a feature extraction process unique to the oracle + if self.feature_extractor is not None: + x_batch = self.feature_extractor\ + .dataset_to_oracle_x(x_batch, self.internal_dataset) return x_batch def dataset_to_oracle_y(self, y_batch, dataset=None): @@ -330,6 +345,10 @@ def dataset_to_oracle_y(self, y_batch, dataset=None): if self.expect_normalized_y: y_batch = self.internal_dataset.normalize_y(y_batch) + # apply a feature extraction process unique to the oracle + if self.feature_extractor is not None: + y_batch = self.feature_extractor\ + .dataset_to_oracle_y(y_batch, self.internal_dataset) return y_batch def oracle_to_dataset_x(self, x_batch, dataset=None): @@ -360,6 +379,11 @@ def oracle_to_dataset_x(self, x_batch, dataset=None): if dataset is None: dataset = self.external_dataset + # apply a feature extraction process unique to the oracle + if self.feature_extractor is not None: + x_batch = self.feature_extractor\ + .oracle_to_dataset_x(x_batch, self.internal_dataset) + # handle when the dataset is normalized and the normalization # statistics expected by the oracle are different if self.expect_normalized_x: @@ -410,6 +434,11 @@ def oracle_to_dataset_y(self, y_batch, dataset=None): if dataset is None: dataset = self.external_dataset + # apply a feature extraction process unique to the oracle + if self.feature_extractor is not None: + y_batch = self.feature_extractor\ + .oracle_to_dataset_y(y_batch, self.internal_dataset) + # handle when the oracle expects normalized predictions but # the dataset is not currently normalized if self.expect_normalized_y: diff --git a/design_bench/oracles/sklearn/random_forest_oracle.py b/design_bench/oracles/sklearn/random_forest_oracle.py index 1c43aa1..764b941 100644 --- a/design_bench/oracles/sklearn/random_forest_oracle.py +++ b/design_bench/oracles/sklearn/random_forest_oracle.py @@ -72,7 +72,8 @@ class RandomForestOracle(SKLearnOracle): name = "random_forest" - def __init__(self, dataset: DiscreteDataset, **kwargs): + def __init__(self, dataset: DiscreteDataset, + override_input_spec=False, **kwargs): """Initialize the ground truth score function f(x) for a model-based optimization problem, which involves loading the parameters of an oracle model and estimating its computational cost @@ -90,9 +91,9 @@ def __init__(self, dataset: DiscreteDataset, **kwargs): super(RandomForestOracle, self).__init__( dataset, is_batched=True, internal_measurements=1, expect_normalized_y=True, - expect_normalized_x=True, - expect_logits=True if isinstance( - dataset, DiscreteDataset) else None, **kwargs) + expect_normalized_x=not override_input_spec, + expect_logits=True if isinstance(dataset, DiscreteDataset) + and not override_input_spec else None, **kwargs) @classmethod def check_input_format(cls, dataset): diff --git a/design_bench/oracles/tensorflow/fully_connected_oracle.py b/design_bench/oracles/tensorflow/fully_connected_oracle.py index 0ce1c53..240872d 100644 --- a/design_bench/oracles/tensorflow/fully_connected_oracle.py +++ b/design_bench/oracles/tensorflow/fully_connected_oracle.py @@ -225,11 +225,23 @@ def protected_fit(self, training, validation, model_kwargs=None): if isinstance(training, DiscreteDataset) and training.is_logits: input_shape = input_shape[:-1] + # if the feature extraction model is given, assume its input shape + if self.feature_extractor is not None: + input_shape = self.feature_extractor\ + .input_shape(self.internal_dataset) + # the input layer of a keras model x = input_layer = keras.Input(shape=input_shape) + # if the feature extraction model is given, assume its input shape + if self.feature_extractor is not None: + if self.feature_extractor.is_discrete(self.internal_dataset): + x = layers.Embedding( + self.feature_extractor.num_classes( + self.internal_dataset), embedding_size)(x) + # build a model with an input layer and optional embedding - if isinstance(training, DiscreteDataset): + elif isinstance(training, DiscreteDataset): x = layers.Embedding(training.num_classes, embedding_size)(x) # flatten all sequence dimensions into the channels diff --git a/design_bench/oracles/tensorflow/lstm_oracle.py b/design_bench/oracles/tensorflow/lstm_oracle.py index 446e243..79940ec 100644 --- a/design_bench/oracles/tensorflow/lstm_oracle.py +++ b/design_bench/oracles/tensorflow/lstm_oracle.py @@ -99,8 +99,7 @@ def __init__(self, dataset, **kwargs): expect_logits=False if isinstance( dataset, DiscreteDataset) else None, **kwargs) - @classmethod - def check_input_format(cls, dataset): + def check_input_format(self, dataset): """a function that accepts a model-based optimization dataset as input and determines whether the provided dataset is compatible with this oracle score function (is this oracle a correct one) @@ -120,10 +119,22 @@ def check_input_format(cls, dataset): """ + # handle when a feature extractor is used + if self.feature_extractor is not None: + input_shape = self.feature_extractor\ + .input_shape(self.internal_dataset) + + # ensure that the data has exactly one sequence dimension + if self.feature_extractor.is_discrete(self.internal_dataset): + return len(input_shape) == 1 + else: + return len(input_shape) == 2 + # ensure that the data has exactly one sequence dimension if isinstance(dataset, DiscreteDataset) and not dataset.is_logits: return len(dataset.input_shape) == 1 - return len(dataset.input_shape) == 2 + else: + return len(dataset.input_shape) == 2 def save_model_to_zip(self, model, zip_archive): """a function that serializes a machine learning model and stores @@ -226,11 +237,26 @@ def protected_fit(self, training, validation, model_kwargs=None): if isinstance(training, DiscreteDataset) and training.is_logits: input_shape = input_shape[:-1] + # if the feature extraction model is given, assume its input shape + if self.feature_extractor is not None: + input_shape = self.feature_extractor\ + .input_shape(self.internal_dataset) + # the input layer of a keras model x = input_layer = keras.Input(shape=input_shape) + # if the feature extraction model is given, assume its input shape + if self.feature_extractor is not None: + if self.feature_extractor.is_discrete(self.internal_dataset): + x = layers.Embedding( + self.feature_extractor.num_classes( + self.internal_dataset), hidden_size)(x) + else: + x = layers.Dense(hidden_size, + activation=None, use_bias=False)(x) + # build a model with an input layer and optional embedding - if isinstance(training, DiscreteDataset): + elif isinstance(training, DiscreteDataset): x = layers.Embedding(training.num_classes, hidden_size)(x) else: x = layers.Dense(hidden_size, diff --git a/design_bench/oracles/tensorflow/resnet_oracle.py b/design_bench/oracles/tensorflow/resnet_oracle.py index f3ce20a..c9f145b 100644 --- a/design_bench/oracles/tensorflow/resnet_oracle.py +++ b/design_bench/oracles/tensorflow/resnet_oracle.py @@ -100,8 +100,7 @@ def __init__(self, dataset, **kwargs): expect_logits=False if isinstance( dataset, DiscreteDataset) else None, **kwargs) - @classmethod - def check_input_format(cls, dataset): + def check_input_format(self, dataset): """a function that accepts a model-based optimization dataset as input and determines whether the provided dataset is compatible with this oracle score function (is this oracle a correct one) @@ -121,10 +120,22 @@ def check_input_format(cls, dataset): """ + # handle when a feature extractor is used + if self.feature_extractor is not None: + input_shape = self.feature_extractor\ + .input_shape(self.internal_dataset) + + # ensure that the data has exactly one sequence dimension + if self.feature_extractor.is_discrete(self.internal_dataset): + return len(input_shape) == 1 + else: + return len(input_shape) == 2 + # ensure that the data has exactly one sequence dimension if isinstance(dataset, DiscreteDataset) and not dataset.is_logits: return len(dataset.input_shape) == 1 - return len(dataset.input_shape) == 2 + else: + return len(dataset.input_shape) == 2 def save_model_to_zip(self, model, zip_archive): """a function that serializes a machine learning model and stores @@ -229,11 +240,26 @@ def protected_fit(self, training, validation, model_kwargs=None): if isinstance(training, DiscreteDataset) and training.is_logits: input_shape = input_shape[:-1] + # if the feature extraction model is given, assume its input shape + if self.feature_extractor is not None: + input_shape = self.feature_extractor\ + .input_shape(self.internal_dataset) + # the input layer of a keras model x = input_layer = keras.Input(shape=input_shape) + # if the feature extraction model is given, assume its input shape + if self.feature_extractor is not None: + if self.feature_extractor.is_discrete(self.internal_dataset): + x = layers.Embedding( + self.feature_extractor.num_classes( + self.internal_dataset), hidden_size)(x) + else: + x = layers.Dense(hidden_size, + activation=None, use_bias=False)(x) + # build a model with an input layer and optional embedding - if isinstance(training, DiscreteDataset): + elif isinstance(training, DiscreteDataset): x = layers.Embedding(training.num_classes, hidden_size)(x) else: x = layers.Dense(hidden_size, diff --git a/design_bench/oracles/tensorflow/tensorflow_oracle.py b/design_bench/oracles/tensorflow/tensorflow_oracle.py index 5dfd013..c8d945c 100644 --- a/design_bench/oracles/tensorflow/tensorflow_oracle.py +++ b/design_bench/oracles/tensorflow/tensorflow_oracle.py @@ -106,6 +106,11 @@ def create_tensorflow_dataset(self, dataset, batch_size=32, DiscreteDataset) and dataset.is_logits: input_shape = input_shape[:-1] + # if the feature extraction model is given, assume its input shape + if self.feature_extractor is not None: + input_shape = self.feature_extractor\ + .input_shape(self.internal_dataset) + # map from dataset format to oracle format using numpy def dataset_to_oracle_numpy(x, y): return self.dataset_to_oracle_x(x, dataset=dataset), \ @@ -113,8 +118,14 @@ def dataset_to_oracle_numpy(x, y): # map from dataset format to oracle format using tensorflow def dataset_to_oracle_tensorflow(x, y): - dtype = tf.int32 if isinstance( - dataset, DiscreteDataset) else tf.float32 + + # identify the data type of the designs (or their features) + dtype = tf.float32 + if self.feature_extractor is not None: + if self.feature_extractor.is_discrete(self.internal_dataset): + dtype = tf.int32 + elif isinstance(dataset, DiscreteDataset): + dtype = tf.int32 # process the input tensors using numpy x, y = tf.numpy_function(dataset_to_oracle_numpy, diff --git a/design_bench/oracles/tensorflow/transformer_oracle.py b/design_bench/oracles/tensorflow/transformer_oracle.py index 7f515be..4d4e6fc 100644 --- a/design_bench/oracles/tensorflow/transformer_oracle.py +++ b/design_bench/oracles/tensorflow/transformer_oracle.py @@ -106,8 +106,7 @@ def __init__(self, dataset, **kwargs): expect_logits=False if isinstance( dataset, DiscreteDataset) else None, **kwargs) - @classmethod - def check_input_format(cls, dataset): + def check_input_format(self, dataset): """a function that accepts a model-based optimization dataset as input and determines whether the provided dataset is compatible with this oracle score function (is this oracle a correct one) @@ -127,10 +126,22 @@ def check_input_format(cls, dataset): """ + # handle when a feature extractor is used + if self.feature_extractor is not None: + input_shape = self.feature_extractor\ + .input_shape(self.internal_dataset) + + # ensure that the data has exactly one sequence dimension + if self.feature_extractor.is_discrete(self.internal_dataset): + return len(input_shape) == 1 + else: + return len(input_shape) == 2 + # ensure that the data has exactly one sequence dimension if isinstance(dataset, DiscreteDataset) and not dataset.is_logits: return len(dataset.input_shape) == 1 - return len(dataset.input_shape) == 2 + else: + return len(dataset.input_shape) == 2 def save_model_to_zip(self, model, zip_archive): """a function that serializes a machine learning model and stores @@ -230,9 +241,28 @@ def protected_fit(self, training, validation, model_kwargs=None): shuffle_buffer = model_kwargs["shuffle_buffer"] learning_rate = model_kwargs["learning_rate"] + # determine the vocab size to initialize the model with + num_classes = 1 + if self.feature_extractor is not None: + if self.feature_extractor.is_discrete(self.internal_dataset): + num_classes = self.feature_extractor\ + .num_classes(self.internal_dataset) + elif isinstance(training, DiscreteDataset): + num_classes = training.num_classes + + # obtain the expected shape of inputs to the model + input_shape = training.input_shape + if isinstance(training, DiscreteDataset) and training.is_logits: + input_shape = input_shape[:-1] + + # if the feature extraction model is given, assume its input shape + if self.feature_extractor is not None: + input_shape = self.feature_extractor\ + .input_shape(self.internal_dataset) + # build the hugging face model from a configuration model = TFBert(transformers.BertConfig( - vocab_size=training.num_classes, + vocab_size=num_classes, num_labels=1, hidden_size=hidden_size, num_hidden_layers=num_blocks, @@ -241,7 +271,7 @@ def protected_fit(self, training, validation, model_kwargs=None): hidden_act=activation, hidden_dropout_prob=dropout_rate, attention_probs_dropout_prob=dropout_rate, - max_position_embeddings=training.input_shape[0], + max_position_embeddings=input_shape[0], initializer_range=0.02, layer_norm_eps=1e-12, position_embedding_type='absolute')) @@ -257,9 +287,15 @@ def protected_fit(self, training, validation, model_kwargs=None): model.compile(optimizer=optimizer, loss=tf.keras.losses.MeanSquaredError()) - # an input key for the huggingface transformer api - input_key = "input_ids" if isinstance( - training, DiscreteDataset) else "inputs_embeds" + # if the feature extraction model is given, assume its format + if self.feature_extractor is not None: + input_key = ("input_ids" if self.feature_extractor + .is_discrete(self.internal_dataset) + else "inputs_embeds") + elif isinstance(training, DiscreteDataset): + input_key = "input_ids" + else: + input_key = "inputs_embeds" # create a tensorflow dataset generator for training training = self.create_tensorflow_dataset( @@ -311,8 +347,15 @@ def protected_predict(self, x, model=None): """ - input_key = "input_ids" if isinstance( - self.internal_dataset, DiscreteDataset) else "inputs_embeds" + # if the feature extraction model is given, assume its format + if self.feature_extractor is not None: + input_key = ("input_ids" if self.feature_extractor + .is_discrete(self.internal_dataset) + else "inputs_embeds") + elif isinstance(training, DiscreteDataset): + input_key = "input_ids" + else: + input_key = "inputs_embeds" # call the model's predict function to generate predictions return (model if model else self.params["model"])\ diff --git a/process/process_raw_chembl.py b/process/process_raw_chembl.py index 171187e..3567547 100644 --- a/process/process_raw_chembl.py +++ b/process/process_raw_chembl.py @@ -30,8 +30,9 @@ parser = argparse.ArgumentParser("Process Raw ChEMBL") parser.add_argument("--dir", type=str, default="data/chembl_activities") parser.add_argument("--shard-folder", type=str, default="./") - parser.add_argument("--min-samples", type=int, default=20000) - parser.add_argument("--samples-per-shard", type=int, default=5000) + parser.add_argument("--max-smiles-len", type=int, default=30) + parser.add_argument("--min-samples", type=int, default=1000) + parser.add_argument("--samples-per-shard", type=int, default=50000) args = parser.parse_args() file_matches = glob.glob(os.path.join(args.dir, "*.csv")) @@ -51,20 +52,23 @@ data = data.dropna(subset=['Standard Value']) data = data.groupby(["Standard Type", "Assay ChEMBL ID"])\ - .filter(lambda x: len(x) >= args.min_samples) + .filter(lambda x: sum([1 if len(sm) < args.max_smiles_len + else 0 for sm in x['Smiles'].tolist()]) >= args.min_samples) group_sizes = data.groupby(["Standard Type", "Assay ChEMBL ID"]).size() group = list(zip(*list( data.groupby(["Standard Type", "Assay ChEMBL ID"]))))[0] + # download the molecule dataset if not already + google_drive_download('1u5wQVwVSK7PG6dxGL2p_6pXf8gvsfUAk', + os.path.join(DATA_DIR, 'smiles_vocab.txt')) + tokenizer = SmilesTokenizer( + os.path.join(DATA_DIR, 'smiles_vocab.txt')) + os.makedirs(args.shard_folder, exist_ok=True) files_list = [] for standard_type, assay_chembl_id in group: - # download the molecule dataset if not already - google_drive_download('1u5wQVwVSK7PG6dxGL2p_6pXf8gvsfUAk', - os.path.join(DATA_DIR, 'smiles_vocab.txt')) - # load the static dataset df = data @@ -72,16 +76,17 @@ df = df[df["Standard Type"] == standard_type][df["Assay ChEMBL ID"] == assay_chembl_id] + x = df['Smiles'].to_list() + y = df['Standard Value'].to_list() + x, y = zip(*[(xi, yi) for xi, yi in + zip(x, y) if len(xi) < args.max_smiles_len]) + # build an integer encoder for smiles sequences - tokenizer = SmilesTokenizer( - os.path.join(DATA_DIR, 'smiles_vocab.txt')) - x = tokenizer(df['Smiles'].to_list(), - padding="longest")["input_ids"] + x = tokenizer(x, padding="longest")["input_ids"] x = np.array(x).astype(np.int32) # extract the prediction property of interest - y = df['Standard Value'] \ - .to_numpy().astype(np.float32).reshape([-1, 1]) + y = np.array(y).astype(np.float32).reshape([-1, 1]) # calculate the number of batches per single shard batch_per_shard = int(math.ceil(