diff --git a/README.md b/README.md index 905d770..d02deb6 100644 --- a/README.md +++ b/README.md @@ -15,14 +15,14 @@ The goal of model-based optimization is to find an input **x** that maximizes an Design-Bench can be installed with the complete set of benchmarks via our pip package. ```bash -pip install design-bench[all]>=2.0.10 -pip install morphing-agents==1.5 +pip install design-bench[all]>=2.0.11 +pip install morphing-agents==1.5.1 ``` Alternatively, if you do not have MuJoCo, you may opt for a minimal install. ```bash -pip install design-bench>=2.0.10 +pip install design-bench>=2.0.11 ``` ## Available Tasks @@ -50,8 +50,8 @@ ChEMBL-ResNet-v0 | ChEMBL | ResNet | 40516 | 0.3208 UTR-ResNet-v0 | UTR | Transformer | 280000 | 0.8617 HopperController-Exact-v0 | Hopper Controller | Exact | 3200 | Superconductor-FullyConnected-v0 | Superconductor | Fully Connected | 21263 | 0.9210 -AntMorphology-Exact-v0 | Ant Morphology | Exact | 12300 | -DKittyMorphology-Exact-v0 | D'Kitty Morphology | Exact | 9546 | +AntMorphology-Exact-v0 | Ant Morphology | Exact | 25009 | +DKittyMorphology-Exact-v0 | D'Kitty Morphology | Exact | 25009 | ## Task API diff --git a/design_bench/__init__.py b/design_bench/__init__.py index f4e0934..eaadb4b 100644 --- a/design_bench/__init__.py +++ b/design_bench/__init__.py @@ -1120,7 +1120,7 @@ # keyword arguments for building the dataset dataset_kwargs=dict( max_samples=None, - max_percentile=20, + max_percentile=40, min_percentile=0), # keyword arguments for building the exact oracle @@ -1135,7 +1135,7 @@ # keyword arguments for building the dataset dataset_kwargs=dict( max_samples=None, - max_percentile=20, + max_percentile=40, min_percentile=0), # keyword arguments for building GP oracle @@ -1154,10 +1154,10 @@ # parameters used for building the validation set split_kwargs=dict(val_fraction=0.5, subset=None, - shard_size=5000, - to_disk=True, - disk_target="ant_morphology/split", - is_absolute=False))) + shard_size=25000, + to_disk=False, + disk_target=None, + is_absolute=None))) register('AntMorphology-RandomForest-v0', @@ -1167,7 +1167,7 @@ # keyword arguments for building the dataset dataset_kwargs=dict( max_samples=None, - max_percentile=20, + max_percentile=40, min_percentile=0), # keyword arguments for building RandomForest oracle @@ -1185,10 +1185,10 @@ # parameters used for building the validation set split_kwargs=dict(val_fraction=0.5, subset=None, - shard_size=5000, - to_disk=True, - disk_target="ant_morphology/split", - is_absolute=False))) + shard_size=25000, + to_disk=False, + disk_target=None, + is_absolute=None))) register('AntMorphology-FullyConnected-v0', @@ -1198,7 +1198,7 @@ # keyword arguments for building the dataset dataset_kwargs=dict( max_samples=None, - max_percentile=20, + max_percentile=40, min_percentile=0), # keyword arguments for training FullyConnected oracle @@ -1219,10 +1219,10 @@ # parameters used for building the validation set split_kwargs=dict(val_fraction=0.1, subset=None, - shard_size=5000, - to_disk=True, - disk_target="ant_morphology/split", - is_absolute=False))) + shard_size=25000, + to_disk=False, + disk_target=None, + is_absolute=None))) register('DKittyMorphology-Exact-v0', @@ -1232,7 +1232,7 @@ # keyword arguments for building the dataset dataset_kwargs=dict( max_samples=None, - max_percentile=20, + max_percentile=40, min_percentile=0), # keyword arguments for building the exact oracle @@ -1247,7 +1247,7 @@ # keyword arguments for building the dataset dataset_kwargs=dict( max_samples=None, - max_percentile=20, + max_percentile=40, min_percentile=0), # keyword arguments for building GP oracle @@ -1266,10 +1266,10 @@ # parameters used for building the validation set split_kwargs=dict(val_fraction=0.5, subset=None, - shard_size=5000, - to_disk=True, - disk_target="dkitty_morphology/split", - is_absolute=False))) + shard_size=25000, + to_disk=False, + disk_target=None, + is_absolute=None))) register('DKittyMorphology-RandomForest-v0', @@ -1279,7 +1279,7 @@ # keyword arguments for building the dataset dataset_kwargs=dict( max_samples=None, - max_percentile=20, + max_percentile=40, min_percentile=0), # keyword arguments for building RandomForest oracle @@ -1297,10 +1297,10 @@ # parameters used for building the validation set split_kwargs=dict(val_fraction=0.5, subset=None, - shard_size=5000, - to_disk=True, - disk_target="dkitty_morphology/split", - is_absolute=False))) + shard_size=25000, + to_disk=False, + disk_target=None, + is_absolute=None))) register('DKittyMorphology-FullyConnected-v0', @@ -1310,7 +1310,7 @@ # keyword arguments for building the dataset dataset_kwargs=dict( max_samples=None, - max_percentile=20, + max_percentile=40, min_percentile=0), # keyword arguments for training FullyConnected oracle @@ -1331,7 +1331,7 @@ # parameters used for building the validation set split_kwargs=dict(val_fraction=0.1, subset=None, - shard_size=5000, - to_disk=True, - disk_target="dkitty_morphology/split", - is_absolute=False))) + shard_size=25000, + to_disk=False, + disk_target=None, + is_absolute=None))) diff --git a/design_bench/datasets/dataset_builder.py b/design_bench/datasets/dataset_builder.py index 0358396..345f933 100644 --- a/design_bench/datasets/dataset_builder.py +++ b/design_bench/datasets/dataset_builder.py @@ -267,9 +267,11 @@ def __init__(self, x_shards, y_shards, internal_batch_size=32, # save the provided dataset shards to be loaded into batches self.x_shards = (x_shards,) if \ - not isinstance(x_shards, Iterable) else x_shards + isinstance(x_shards, np.ndarray) or \ + isinstance(x_shards, DiskResource) else x_shards self.y_shards = (y_shards,) if \ - not isinstance(y_shards, Iterable) else y_shards + isinstance(y_shards, np.ndarray) or \ + isinstance(y_shards, DiskResource) else y_shards # download the remote resources if they are given self.num_shards = 0 diff --git a/process/process_raw_ant.py b/process/process_raw_ant.py new file mode 100644 index 0000000..a63d9a2 --- /dev/null +++ b/process/process_raw_ant.py @@ -0,0 +1,89 @@ +import cma +import numpy as np +import multiprocessing +import os +import argparse +import itertools +from design_bench.oracles.exact.ant_morphology_oracle import AntMorphologyOracle +from design_bench.datasets.continuous.ant_morphology_dataset import AntMorphologyDataset +from morphing_agents.mujoco.ant.designs import DEFAULT_DESIGN +from morphing_agents.mujoco.ant.designs import normalize_design_vector +from morphing_agents.mujoco.ant.designs import denormalize_design_vector + + +def single_evaluate(design): + placeholder_dataset = AntMorphologyDataset() + oracle = AntMorphologyOracle(placeholder_dataset) + return oracle.predict( + denormalize_design_vector(design)[np.newaxis].astype(np.float32))[0] + + +pool = multiprocessing.Pool() + + +def many_evaluate(designs): + return pool.map(single_evaluate, designs) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser("Process Raw Ant Morphologies") + parser.add_argument("--shard-folder", type=str, default="./") + parser.add_argument("--samples", type=int, default=25000) + args = parser.parse_args() + + os.makedirs(os.path.join( + args.shard_folder, f"ant_morphology/"), exist_ok=True) + + golden_design = normalize_design_vector( + np.concatenate(DEFAULT_DESIGN, axis=0)) + + sigma = 0.02 + max_iterations = 250 + save_every = 1 + + designs = [denormalize_design_vector(golden_design)] + predictions = [single_evaluate(golden_design)] + + for i in itertools.count(): + + initial_design = golden_design + \ + np.random.normal(0, 0.075, golden_design.shape) + + es = cma.CMAEvolutionStrategy(initial_design, sigma) + + step = 0 + while not es.stop() and step < max_iterations: + + if len(designs) >= args.samples: + break + + xs = es.ask() + ys = many_evaluate(xs) + + es.tell(xs, [-yi[0] for yi in ys]) + + step += 1 + + if step % save_every == 0: + designs.extend([denormalize_design_vector(xi) for xi in xs]) + predictions.extend(ys) + + print(f"CMA-ES ({len(designs)} samples)" + f" - Restart {i+1}" + f" - Step {step+1}/{max_iterations}" + f" - Current Objective Value = {np.mean(ys)}") + + np.save(os.path.join( + args.shard_folder, + f"ant_morphology/ant_morphology-x-0.npy"), + np.array(designs).astype(np.float32)) + + np.save(os.path.join( + args.shard_folder, + f"ant_morphology/ant_morphology-y-0.npy"), + np.array(predictions).astype(np.float32).reshape([-1, 1])) + + if len(designs) >= args.samples: + break + diff --git a/process/process_raw_dkitty.py b/process/process_raw_dkitty.py new file mode 100644 index 0000000..d7d041d --- /dev/null +++ b/process/process_raw_dkitty.py @@ -0,0 +1,89 @@ +import cma +import numpy as np +import multiprocessing +import os +import argparse +import itertools +from design_bench.oracles.exact.dkitty_morphology_oracle import DKittyMorphologyOracle +from design_bench.datasets.continuous.dkitty_morphology_dataset import DKittyMorphologyDataset +from morphing_agents.mujoco.dkitty.designs import DEFAULT_DESIGN +from morphing_agents.mujoco.dkitty.designs import normalize_design_vector +from morphing_agents.mujoco.dkitty.designs import denormalize_design_vector + + +def single_evaluate(design): + placeholder_dataset = DKittyMorphologyDataset() + oracle = DKittyMorphologyOracle(placeholder_dataset) + return oracle.predict( + denormalize_design_vector(design)[np.newaxis].astype(np.float32))[0] + + +pool = multiprocessing.Pool() + + +def many_evaluate(designs): + return pool.map(single_evaluate, designs) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser("Process Raw DKitty Morphologies") + parser.add_argument("--shard-folder", type=str, default="./") + parser.add_argument("--samples", type=int, default=25000) + args = parser.parse_args() + + os.makedirs(os.path.join( + args.shard_folder, f"dkitty_morphology/"), exist_ok=True) + + golden_design = normalize_design_vector( + np.concatenate(DEFAULT_DESIGN, axis=0)) + + sigma = 0.02 + max_iterations = 250 + save_every = 1 + + designs = [denormalize_design_vector(golden_design)] + predictions = [single_evaluate(golden_design)] + + for i in itertools.count(): + + initial_design = golden_design + \ + np.random.normal(0, 0.1, golden_design.shape) + + es = cma.CMAEvolutionStrategy(initial_design, sigma) + + step = 0 + while not es.stop() and step < max_iterations: + + if len(designs) >= args.samples: + break + + xs = es.ask() + ys = many_evaluate(xs) + + es.tell(xs, [-yi[0] for yi in ys]) + + step += 1 + + if step % save_every == 0: + designs.extend([denormalize_design_vector(xi) for xi in xs]) + predictions.extend(ys) + + print(f"CMA-ES ({len(designs)} samples)" + f" - Restart {i+1}" + f" - Step {step+1}/{max_iterations}" + f" - Current Objective Value = {np.mean(ys)}") + + np.save(os.path.join( + args.shard_folder, + f"dkitty_morphology/dkitty_morphology-x-0.npy"), + np.array(designs).astype(np.float32)) + + np.save(os.path.join( + args.shard_folder, + f"dkitty_morphology/dkitty_morphology-y-0.npy"), + np.array(predictions).astype(np.float32).reshape([-1, 1])) + + if len(designs) >= args.samples: + break + diff --git a/setup.py b/setup.py index 229f6f9..ba4cbe2 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ LONG_DESCRIPTION = readme.read() -setup(name='design-bench', version='2.0.10', license='MIT', +setup(name='design-bench', version='2.0.11', license='MIT', packages=find_packages(include=['design_bench', 'design_bench.*']), description='Design-Bench: Benchmarks for ' 'Data-Driven Offline Model-Based Optimization', @@ -17,10 +17,10 @@ author_email='brandon@btrabucco.com', url='https://github.com/brandontrabucco/design-bench', download_url='https://github.com/' - 'brandontrabucco/design-bench/archive/v2_0_10.tar.gz', + 'brandontrabucco/design-bench/archive/v2_0_11.tar.gz', keywords=['Deep Learning', 'Neural Networks', 'Benchmark', 'Model-Based Optimization'], - extras_require={'all': ['gym[mujoco]']}, + extras_require={'all': ['gym[mujoco]'], 'cma': ['cma']}, install_requires=['pandas', 'requests', 'scikit-learn', 'numpy', 'tensorflow>=2.2', 'transformers'], classifiers=[