diff --git a/README.md b/README.md
index 2e83291..e7591de 100644
--- a/README.md
+++ b/README.md
@@ -15,14 +15,14 @@ The goal of model-based optimization is to find an input **x** that maximizes an
 Design-Bench can be installed with the complete set of benchmarks via our pip package.
 
 ```bash
-pip install design-bench[all]>=2.0.7
+pip install design-bench[all]>=2.0.8
 pip install morphing-agents==1.4
 ```
 
 Alternatively, if you do not have MuJoCo, you may opt for a minimal install.
 
 ```bash
-pip install design-bench>=2.0.7
+pip install design-bench>=2.0.8
 ```
 
 ## Available Tasks
diff --git a/design_bench/__init__.py b/design_bench/__init__.py
index 0243bbf..ad996b5 100644
--- a/design_bench/__init__.py
+++ b/design_bench/__init__.py
@@ -4,8 +4,23 @@
 from sklearn.gaussian_process.kernels import ConstantKernel, RBF
 
 
+register('ToyDiscrete-Exact-v0',
+         'design_bench.datasets.discrete.toy_discrete_dataset:ToyDiscreteDataset',
+         'design_bench.oracles.exact:ToyDiscreteOracle',
+
+         # keyword arguments for building the dataset
+         dataset_kwargs=dict(
+             max_samples=None,
+             max_percentile=40,
+             min_percentile=0),
+
+         # keyword arguments for building the exact oracle
+         oracle_kwargs=dict(
+             noise_std=0.0))
+
+
 register('GFP-GP-v0',
-         'design_bench.datasets.discrete:GFPDataset',
+         'design_bench.datasets.discrete.gfp_dataset:GFPDataset',
          'design_bench.oracles.sklearn:GaussianProcessOracle',
 
          # keyword arguments for building the dataset
@@ -34,7 +49,7 @@
 
 
 register('GFP-RandomForest-v0',
-         'design_bench.datasets.discrete:GFPDataset',
+         'design_bench.datasets.discrete.gfp_dataset:GFPDataset',
          'design_bench.oracles.sklearn:RandomForestOracle',
 
          # keyword arguments for building the dataset
@@ -65,7 +80,7 @@
 
 
 register('GFP-FullyConnected-v0',
-         'design_bench.datasets.discrete:GFPDataset',
+         'design_bench.datasets.discrete.gfp_dataset:GFPDataset',
          'design_bench.oracles.tensorflow:FullyConnectedOracle',
 
          # keyword arguments for building the dataset
@@ -100,7 +115,7 @@
 
 
 register('GFP-LSTM-v0',
-         'design_bench.datasets.discrete:GFPDataset',
+         'design_bench.datasets.discrete.gfp_dataset:GFPDataset',
          'design_bench.oracles.tensorflow:LSTMOracle',
 
          # keyword arguments for building the dataset
@@ -133,7 +148,7 @@
 
 
 register('GFP-ResNet-v0',
-         'design_bench.datasets.discrete:GFPDataset',
+         'design_bench.datasets.discrete.gfp_dataset:GFPDataset',
          'design_bench.oracles.tensorflow:ResNetOracle',
 
          # keyword arguments for building the dataset
@@ -168,7 +183,7 @@
 
 
 register('GFP-Transformer-v0',
-         'design_bench.datasets.discrete:GFPDataset',
+         'design_bench.datasets.discrete.gfp_dataset:GFPDataset',
          'design_bench.oracles.tensorflow:TransformerOracle',
 
          # keyword arguments for building the dataset
@@ -206,7 +221,7 @@
 
 
 register('TFBind8-Exact-v0',
-         'design_bench.datasets.discrete:TFBind8Dataset',
+         'design_bench.datasets.discrete.tf_bind_8_dataset:TFBind8Dataset',
          'design_bench.oracles.exact:TFBind8Oracle',
 
          # keyword arguments for building the dataset
@@ -221,7 +236,7 @@
 
 
 register('TFBind8-GP-v0',
-         'design_bench.datasets.discrete:TFBind8Dataset',
+         'design_bench.datasets.discrete.tf_bind_8_dataset:TFBind8Dataset',
          'design_bench.oracles.sklearn:GaussianProcessOracle',
 
          # keyword arguments for building the dataset
@@ -251,7 +266,7 @@
 
 
 register('TFBind8-RandomForest-v0',
-         'design_bench.datasets.discrete:TFBind8Dataset',
+         'design_bench.datasets.discrete.tf_bind_8_dataset:TFBind8Dataset',
          'design_bench.oracles.sklearn:RandomForestOracle',
 
          # keyword arguments for building the dataset
@@ -283,7 +298,7 @@
 
 
 register('TFBind8-FullyConnected-v0',
-         'design_bench.datasets.discrete:TFBind8Dataset',
+         'design_bench.datasets.discrete.tf_bind_8_dataset:TFBind8Dataset',
          'design_bench.oracles.tensorflow:FullyConnectedOracle',
 
          # keyword arguments for building the dataset
@@ -319,7 +334,7 @@
 
 
 register('TFBind8-LSTM-v0',
-         'design_bench.datasets.discrete:TFBind8Dataset',
+         'design_bench.datasets.discrete.tf_bind_8_dataset:TFBind8Dataset',
          'design_bench.oracles.tensorflow:LSTMOracle',
 
          # keyword arguments for building the dataset
@@ -353,7 +368,7 @@
 
 
 register('TFBind8-ResNet-v0',
-         'design_bench.datasets.discrete:TFBind8Dataset',
+         'design_bench.datasets.discrete.tf_bind_8_dataset:TFBind8Dataset',
          'design_bench.oracles.tensorflow:ResNetOracle',
 
          # keyword arguments for building the dataset
@@ -389,7 +404,7 @@
 
 
 register('TFBind8-Transformer-v0',
-         'design_bench.datasets.discrete:TFBind8Dataset',
+         'design_bench.datasets.discrete.tf_bind_8_dataset:TFBind8Dataset',
          'design_bench.oracles.tensorflow:TransformerOracle',
 
          # keyword arguments for building the dataset
@@ -428,7 +443,7 @@
 
 
 register('TFBind10-Exact-v0',
-         'design_bench.datasets.discrete:TFBind10Dataset',
+         'design_bench.datasets.discrete.tf_bind_10_dataset:TFBind10Dataset',
          'design_bench.oracles.exact:TFBind10Oracle',
 
          # keyword arguments for building the dataset
@@ -444,7 +459,7 @@
 
 
 register('NASBench-Exact-v0',
-         'design_bench.datasets.discrete:NASBenchDataset',
+         'design_bench.datasets.discrete.nas_bench_dataset:NASBenchDataset',
          'design_bench.oracles.exact:NASBenchOracle',
 
          # keyword arguments for building the dataset
@@ -459,7 +474,7 @@
 
 
 register('UTR-GP-v0',
-         'design_bench.datasets.discrete:UTRDataset',
+         'design_bench.datasets.discrete.utr_dataset:UTRDataset',
          'design_bench.oracles.sklearn:GaussianProcessOracle',
 
          # keyword arguments for building the dataset
@@ -488,7 +503,7 @@
 
 
 register('UTR-RandomForest-v0',
-         'design_bench.datasets.discrete:UTRDataset',
+         'design_bench.datasets.discrete.utr_dataset:UTRDataset',
          'design_bench.oracles.sklearn:RandomForestOracle',
 
          # keyword arguments for building the dataset
@@ -519,7 +534,7 @@
 
 
 register('UTR-FullyConnected-v0',
-         'design_bench.datasets.discrete:UTRDataset',
+         'design_bench.datasets.discrete.utr_dataset:UTRDataset',
          'design_bench.oracles.tensorflow:FullyConnectedOracle',
 
          # keyword arguments for building the dataset
@@ -554,7 +569,7 @@
 
 
 register('UTR-LSTM-v0',
-         'design_bench.datasets.discrete:UTRDataset',
+         'design_bench.datasets.discrete.utr_dataset:UTRDataset',
          'design_bench.oracles.tensorflow:LSTMOracle',
 
          # keyword arguments for building the dataset
@@ -587,7 +602,7 @@
 
 
 register('UTR-ResNet-v0',
-         'design_bench.datasets.discrete:UTRDataset',
+         'design_bench.datasets.discrete.utr_dataset:UTRDataset',
          'design_bench.oracles.tensorflow:ResNetOracle',
 
          # keyword arguments for building the dataset
@@ -622,7 +637,7 @@
 
 
 register('UTR-Transformer-v0',
-         'design_bench.datasets.discrete:UTRDataset',
+         'design_bench.datasets.discrete.utr_dataset:UTRDataset',
          'design_bench.oracles.tensorflow:TransformerOracle',
 
          # keyword arguments for building the dataset
@@ -660,7 +675,7 @@
 
 
 register('ChEMBL-GP-v0',
-         'design_bench.datasets.discrete:ChEMBLDataset',
+         'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset',
          'design_bench.oracles.sklearn:GaussianProcessOracle',
 
          # keyword arguments for building the dataset
@@ -692,7 +707,7 @@
 
 
 register('ChEMBL-RandomForest-v0',
-         'design_bench.datasets.discrete:ChEMBLDataset',
+         'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset',
          'design_bench.oracles.sklearn:RandomForestOracle',
 
          # keyword arguments for building the dataset
@@ -725,7 +740,7 @@
 
 
 register('ChEMBL-FullyConnected-v0',
-         'design_bench.datasets.discrete:ChEMBLDataset',
+         'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset',
          'design_bench.oracles.tensorflow:FullyConnectedOracle',
 
          # keyword arguments for building the dataset
@@ -762,7 +777,7 @@
 
 
 register('ChEMBL-LSTM-v0',
-         'design_bench.datasets.discrete:ChEMBLDataset',
+         'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset',
          'design_bench.oracles.tensorflow:LSTMOracle',
 
          # keyword arguments for building the dataset
@@ -797,7 +812,7 @@
 
 
 register('ChEMBL-ResNet-v0',
-         'design_bench.datasets.discrete:ChEMBLDataset',
+         'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset',
          'design_bench.oracles.tensorflow:ResNetOracle',
 
          # keyword arguments for building the dataset
@@ -834,7 +849,7 @@
 
 
 register('ChEMBL-Transformer-v0',
-         'design_bench.datasets.discrete:ChEMBLDataset',
+         'design_bench.datasets.discrete.chembl_dataset:ChEMBLDataset',
          'design_bench.oracles.tensorflow:TransformerOracle',
 
          # keyword arguments for building the dataset
@@ -873,8 +888,23 @@
                                is_absolute=False)))
 
 
+register('ToyContinuous-Exact-v0',
+         'design_bench.datasets.continuous.toy_continuous_dataset:ToyContinuousDataset',
+         'design_bench.oracles.exact:ToyContinuousOracle',
+
+         # keyword arguments for building the dataset
+         dataset_kwargs=dict(
+             max_samples=None,
+             max_percentile=40,
+             min_percentile=0),
+
+         # keyword arguments for building the exact oracle
+         oracle_kwargs=dict(
+             noise_std=0.0))
+
+
 register('HopperController-Exact-v0',
-         'design_bench.datasets.continuous:HopperControllerDataset',
+         'design_bench.datasets.continuous.hopper_controller_dataset:HopperControllerDataset',
          'design_bench.oracles.exact:HopperControllerOracle',
 
          # keyword arguments for building the dataset
@@ -889,7 +919,7 @@
 
 
 register('HopperController-GP-v0',
-         'design_bench.datasets.continuous:HopperControllerDataset',
+         'design_bench.datasets.continuous.hopper_controller_dataset:HopperControllerDataset',
          'design_bench.oracles.sklearn:GaussianProcessOracle',
 
          # keyword arguments for building the dataset
@@ -921,7 +951,7 @@
 
 
 register('HopperController-RandomForest-v0',
-         'design_bench.datasets.continuous:HopperControllerDataset',
+         'design_bench.datasets.continuous.hopper_controller_dataset:HopperControllerDataset',
          'design_bench.oracles.sklearn:RandomForestOracle',
 
          # keyword arguments for building the dataset
@@ -952,7 +982,7 @@
 
 
 register('HopperController-FullyConnected-v0',
-         'design_bench.datasets.continuous:HopperControllerDataset',
+         'design_bench.datasets.continuous.hopper_controller_dataset:HopperControllerDataset',
          'design_bench.oracles.tensorflow:FullyConnectedOracle',
 
          # keyword arguments for building the dataset
@@ -986,7 +1016,7 @@
 
 
 register('Superconductor-GP-v0',
-         'design_bench.datasets.continuous:SuperconductorDataset',
+         'design_bench.datasets.continuous.superconductor_dataset:SuperconductorDataset',
          'design_bench.oracles.sklearn:GaussianProcessOracle',
 
          # keyword arguments for building the dataset
@@ -1018,7 +1048,7 @@
 
 
 register('Superconductor-RandomForest-v0',
-         'design_bench.datasets.continuous:SuperconductorDataset',
+         'design_bench.datasets.continuous.superconductor_dataset:SuperconductorDataset',
          'design_bench.oracles.sklearn:RandomForestOracle',
 
          # keyword arguments for building the dataset
@@ -1049,7 +1079,7 @@
 
 
 register('Superconductor-FullyConnected-v0',
-         'design_bench.datasets.continuous:SuperconductorDataset',
+         'design_bench.datasets.continuous.superconductor_dataset:SuperconductorDataset',
          'design_bench.oracles.tensorflow:FullyConnectedOracle',
 
          # keyword arguments for building the dataset
@@ -1083,7 +1113,7 @@
 
 
 register('AntMorphology-Exact-v0',
-         'design_bench.datasets.continuous:AntMorphologyDataset',
+         'design_bench.datasets.continuous.ant_morphology_dataset:AntMorphologyDataset',
          'design_bench.oracles.exact:AntMorphologyOracle',
 
          # keyword arguments for building the dataset
@@ -1098,7 +1128,7 @@
 
 
 register('AntMorphology-GP-v0',
-         'design_bench.datasets.continuous:AntMorphologyDataset',
+         'design_bench.datasets.continuous.ant_morphology_dataset:AntMorphologyDataset',
          'design_bench.oracles.sklearn:GaussianProcessOracle',
 
          # keyword arguments for building the dataset
@@ -1130,7 +1160,7 @@
 
 
 register('AntMorphology-RandomForest-v0',
-         'design_bench.datasets.continuous:AntMorphologyDataset',
+         'design_bench.datasets.continuous.ant_morphology_dataset:AntMorphologyDataset',
          'design_bench.oracles.sklearn:RandomForestOracle',
 
          # keyword arguments for building the dataset
@@ -1161,7 +1191,7 @@
 
 
 register('AntMorphology-FullyConnected-v0',
-         'design_bench.datasets.continuous:AntMorphologyDataset',
+         'design_bench.datasets.continuous.ant_morphology_dataset:AntMorphologyDataset',
          'design_bench.oracles.tensorflow:FullyConnectedOracle',
 
          # keyword arguments for building the dataset
@@ -1195,7 +1225,7 @@
 
 
 register('DKittyMorphology-Exact-v0',
-         'design_bench.datasets.continuous:DKittyMorphologyDataset',
+         'design_bench.datasets.continuous.dkitty_morphology_dataset:DKittyMorphologyDataset',
          'design_bench.oracles.exact:DKittyMorphologyOracle',
 
          # keyword arguments for building the dataset
@@ -1210,7 +1240,7 @@
 
 
 register('DKittyMorphology-GP-v0',
-         'design_bench.datasets.continuous:DKittyMorphologyDataset',
+         'design_bench.datasets.continuous.dkitty_morphology_dataset:DKittyMorphologyDataset',
          'design_bench.oracles.sklearn:GaussianProcessOracle',
 
          # keyword arguments for building the dataset
@@ -1242,7 +1272,7 @@
 
 
 register('DKittyMorphology-RandomForest-v0',
-         'design_bench.datasets.continuous:DKittyMorphologyDataset',
+         'design_bench.datasets.continuous.dkitty_morphology_dataset:DKittyMorphologyDataset',
          'design_bench.oracles.sklearn:RandomForestOracle',
 
          # keyword arguments for building the dataset
@@ -1273,7 +1303,7 @@
 
 
 register('DKittyMorphology-FullyConnected-v0',
-         'design_bench.datasets.continuous:DKittyMorphologyDataset',
+         'design_bench.datasets.continuous.dkitty_morphology_dataset:DKittyMorphologyDataset',
          'design_bench.oracles.tensorflow:FullyConnectedOracle',
 
          # keyword arguments for building the dataset
diff --git a/design_bench/datasets/continuous/__init__.py b/design_bench/datasets/continuous/__init__.py
index 1d91bab..8b13789 100644
--- a/design_bench/datasets/continuous/__init__.py
+++ b/design_bench/datasets/continuous/__init__.py
@@ -1,4 +1 @@
-from .hopper_controller_dataset import HopperControllerDataset
-from .superconductor_dataset import SuperconductorDataset
-from .ant_morphology_dataset import AntMorphologyDataset
-from .dkitty_morphology_dataset import DKittyMorphologyDataset
+
diff --git a/design_bench/datasets/continuous/toy_continuous_dataset.py b/design_bench/datasets/continuous/toy_continuous_dataset.py
new file mode 100644
index 0000000..7e1eff1
--- /dev/null
+++ b/design_bench/datasets/continuous/toy_continuous_dataset.py
@@ -0,0 +1,249 @@
+from design_bench.datasets.continuous_dataset import ContinuousDataset
+from design_bench.disk_resource import DiskResource
+
+
+TOY_CONTINUOUS_FILES = ["toy_continuous/toy_continuous-x-0.npy",
+                        "toy_continuous/toy_continuous-x-1.npy",
+                        "toy_continuous/toy_continuous-x-2.npy",
+                        "toy_continuous/toy_continuous-x-3.npy",
+                        "toy_continuous/toy_continuous-x-4.npy",
+                        "toy_continuous/toy_continuous-x-5.npy",
+                        "toy_continuous/toy_continuous-x-6.npy",
+                        "toy_continuous/toy_continuous-x-7.npy",
+                        "toy_continuous/toy_continuous-x-8.npy",
+                        "toy_continuous/toy_continuous-x-9.npy",
+                        "toy_continuous/toy_continuous-x-10.npy",
+                        "toy_continuous/toy_continuous-x-11.npy",
+                        "toy_continuous/toy_continuous-x-12.npy",
+                        "toy_continuous/toy_continuous-x-13.npy"]
+
+
+class ToyContinuousDataset(ContinuousDataset):
+    """A toy dataset that defines a common set of functions
+    and attributes for a model-based optimization dataset, where the
+    goal is to find a design 'x' that maximizes a prediction 'y':
+
+    max_x { y = f(x) }
+
+    Public Attributes:
+
+    name: str
+        An attribute that specifies the name of a model-based optimization
+        dataset, which might be used when labelling plots in a diagram of
+        performance in a research paper using design-bench
+    x_name: str
+        An attribute that specifies the name of designs in a model-based
+        optimization dataset, which might be used when labelling plots
+        in a visualization of performance in a research paper
+    y_name: str
+        An attribute that specifies the name of predictions in a model-based
+        optimization dataset, which might be used when labelling plots
+        in a visualization of performance in a research paper
+
+    x: np.ndarray
+        the design values 'x' for a model-based optimization problem
+        represented as a numpy array of arbitrary type
+    input_shape: Tuple[int]
+        the shape of a single design values 'x', represented as a list of
+        integers similar to calling np.ndarray.shape
+    input_size: int
+        the total number of components in the design values 'x', represented
+        as a single integer, the product of its shape entries
+    input_dtype: np.dtype
+        the data type of the design values 'x', which is typically either
+        floating point or integer (np.float32 or np.int32)
+
+    y: np.ndarray
+        the prediction values 'y' for a model-based optimization problem
+        represented by a scalar floating point value per 'x'
+    output_shape: Tuple[int]
+        the shape of a single prediction value 'y', represented as a list of
+        integers similar to calling np.ndarray.shape
+    output_size: int
+        the total number of components in the prediction values 'y',
+        represented as a single integer, the product of its shape entries
+    output_dtype: np.dtype
+        the data type of the prediction values 'y', which is typically a
+        type of floating point (np.float32 or np.float16)
+
+    dataset_size: int
+        the total number of paired design values 'x' and prediction values
+        'y' in the dataset, represented as a single integer
+    dataset_max_percentile: float
+        the percentile between 0 and 100 of prediction values 'y' above
+        which are hidden from access by members outside the class
+    dataset_min_percentile: float
+        the percentile between 0 and 100 of prediction values 'y' below
+        which are hidden from access by members outside the class
+    dataset_max_output: float
+        the specific cutoff threshold for prediction values 'y' above
+        which are hidden from access by members outside the class
+    dataset_min_output: float
+        the specific cutoff threshold for prediction values 'y' below
+        which are hidden from access by members outside the class
+
+    internal_batch_size: int
+        the integer number of samples per batch that is used internally
+        when processing the dataset and generating samples
+    freeze_statistics: bool
+        a boolean indicator that when set to true prevents methods from
+        changing the normalization and sub sampling statistics
+
+    is_normalized_x: bool
+        a boolean indicator that specifies whether the design values
+        in the dataset are being normalized
+    x_mean: np.ndarray
+        a numpy array that is automatically calculated to be the mean
+        of visible design values in the dataset
+    x_standard_dev: np.ndarray
+        a numpy array that is automatically calculated to be the standard
+        deviation of visible design values in the dataset
+
+    is_normalized_y: bool
+        a boolean indicator that specifies whether the prediction values
+        in the dataset are being normalized
+    y_mean: np.ndarray
+        a numpy array that is automatically calculated to be the mean
+        of visible prediction values in the dataset
+    y_standard_dev: np.ndarray
+        a numpy array that is automatically calculated to be the standard
+        deviation of visible prediction values in the dataset
+
+    Public Methods:
+
+    iterate_batches(batch_size: int, return_x: bool,
+                    return_y: bool, drop_remainder: bool)
+                    -> Iterable[Tuple[np.ndarray, np.ndarray]]:
+        Returns an object that supports iterations, which yields tuples of
+        design values 'x' and prediction values 'y' from a model-based
+        optimization data set for training a model
+    iterate_samples(return_x: bool, return_y: bool):
+                    -> Iterable[Tuple[np.ndarray, np.ndarray]]:
+        Returns an object that supports iterations, which yields tuples of
+        design values 'x' and prediction values 'y' from a model-based
+        optimization data set for training a model
+
+    subsample(max_samples: int,
+              max_percentile: float,
+              min_percentile: float):
+        a function that exposes a subsampled version of a much larger
+        model-based optimization dataset containing design values 'x'
+        whose prediction values 'y' are skewed
+    relabel(relabel_function:
+            Callable[[np.ndarray, np.ndarray], np.ndarray]):
+        a function that accepts a function that maps from a dataset of
+        design values 'x' and prediction values y to a new set of
+        prediction values 'y' and relabels the model-based optimization dataset
+
+    clone(subset: set, shard_size: int,
+          to_disk: bool, disk_target: str, is_absolute: bool):
+        Generate a cloned copy of a model-based optimization dataset
+        using the provided name and shard generation settings; useful
+        when relabelling a dataset buffer from the dis
+    split(fraction: float, subset: set, shard_size: int,
+          to_disk: bool, disk_target: str, is_absolute: bool):
+        split a model-based optimization data set into a training set and
+        a validation set allocating 'fraction' of the data set to the
+        validation set and the rest to the training set
+
+    normalize_x(new_x: np.ndarray) -> np.ndarray:
+        a helper function that accepts floating point design values 'x'
+        as input and standardizes them so that they have zero
+        empirical mean and unit empirical variance
+    denormalize_x(new_x: np.ndarray) -> np.ndarray:
+        a helper function that accepts floating point design values 'x'
+        as input and undoes standardization so that they have their
+        original empirical mean and variance
+    normalize_y(new_x: np.ndarray) -> np.ndarray:
+        a helper function that accepts floating point prediction values 'y'
+        as input and standardizes them so that they have zero
+        empirical mean and unit empirical variance
+    denormalize_y(new_x: np.ndarray) -> np.ndarray:
+        a helper function that accepts floating point prediction values 'y'
+        as input and undoes standardization so that they have their
+        original empirical mean and variance
+
+    map_normalize_x():
+        a destructive function that standardizes the design values 'x'
+        in the class dataset in-place so that they have zero empirical
+        mean and unit variance
+    map_denormalize_x():
+        a destructive function that undoes standardization of the
+        design values 'x' in the class dataset in-place which are expected
+        to have zero  empirical mean and unit variance
+    map_normalize_y():
+        a destructive function that standardizes the prediction values 'y'
+        in the class dataset in-place so that they have zero empirical
+        mean and unit variance
+    map_denormalize_y():
+        a destructive function that undoes standardization of the
+        prediction values 'y' in the class dataset in-place which are
+        expected to have zero empirical mean and unit variance
+
+    """
+
+    name = "toy_continuous/toy_continuous"
+    x_name = "toy_design"
+    y_name = "toy_prediction"
+
+    @staticmethod
+    def register_x_shards():
+        """Registers a remote file for download that contains design values
+        in a format compatible with the dataset builder class;
+        these files are downloaded all at once in the dataset initialization
+
+        Returns:
+
+        resources: list of RemoteResource
+            a list of RemoteResource objects specific to this dataset, which
+            will be automatically downloaded while the dataset is built
+            and may serve as shards if the dataset is large
+
+        """
+
+        return [DiskResource(
+            file, is_absolute=False,
+            download_target=f"https://design-bench."
+                            f"s3-us-west-1.amazonaws.com/{file}",
+            download_method="direct") for file in TOY_CONTINUOUS_FILES]
+
+    @staticmethod
+    def register_y_shards():
+        """Registers a remote file for download that contains prediction
+        values in a format compatible with the dataset builder class;
+        these files are downloaded all at once in the dataset initialization
+
+        Returns:
+
+        resources: list of RemoteResource
+            a list of RemoteResource objects specific to this dataset, which
+            will be automatically downloaded while the dataset is built
+            and may serve as shards if the dataset is large
+
+        """
+
+        return [DiskResource(
+            file.replace("-x-", "-y-"), is_absolute=False,
+            download_target=f"https://design-bench."
+                            f"s3-us-west-1.amazonaws.com/"
+                            f"{file.replace('-x-', '-y-')}",
+            download_method="direct") for file in TOY_CONTINUOUS_FILES]
+
+    def __init__(self, **kwargs):
+        """Initialize a model-based optimization dataset and prepare
+        that dataset by loading that dataset from disk and modifying
+        its distribution
+
+        Arguments:
+
+        **kwargs: dict
+            additional keyword arguments which are used to parameterize the
+            data set generation process, including which shard files are used
+            if multiple sets of data set shard files can be loaded
+
+        """
+
+        # initialize the dataset using the method in the base class
+        super(ToyContinuousDataset, self).__init__(
+            self.register_x_shards(),
+            self.register_y_shards(), **kwargs)
diff --git a/design_bench/datasets/discrete/__init__.py b/design_bench/datasets/discrete/__init__.py
index af89ce3..8b13789 100644
--- a/design_bench/datasets/discrete/__init__.py
+++ b/design_bench/datasets/discrete/__init__.py
@@ -1,6 +1 @@
-from .chembl_dataset import ChEMBLDataset
-from .gfp_dataset import GFPDataset
-from .nas_bench_dataset import NASBenchDataset
-from .tf_bind_8_dataset import TFBind8Dataset
-from .tf_bind_10_dataset import TFBind10Dataset
-from .utr_dataset import UTRDataset
+
diff --git a/design_bench/datasets/discrete/toy_discrete_dataset.py b/design_bench/datasets/discrete/toy_discrete_dataset.py
new file mode 100644
index 0000000..900c04b
--- /dev/null
+++ b/design_bench/datasets/discrete/toy_discrete_dataset.py
@@ -0,0 +1,281 @@
+from design_bench.datasets.discrete_dataset import DiscreteDataset
+from design_bench.disk_resource import DiskResource
+
+
+TOY_DISCRETE_FILES = ["toy_discrete/toy_discrete-x-0.npy",
+                      "toy_discrete/toy_discrete-x-1.npy",
+                      "toy_discrete/toy_discrete-x-2.npy",
+                      "toy_discrete/toy_discrete-x-3.npy",
+                      "toy_discrete/toy_discrete-x-4.npy",
+                      "toy_discrete/toy_discrete-x-5.npy",
+                      "toy_discrete/toy_discrete-x-6.npy",
+                      "toy_discrete/toy_discrete-x-7.npy",
+                      "toy_discrete/toy_discrete-x-8.npy",
+                      "toy_discrete/toy_discrete-x-9.npy",
+                      "toy_discrete/toy_discrete-x-10.npy",
+                      "toy_discrete/toy_discrete-x-11.npy",
+                      "toy_discrete/toy_discrete-x-12.npy",
+                      "toy_discrete/toy_discrete-x-13.npy"]
+
+
+class ToyDiscreteDataset(DiscreteDataset):
+    """A toy dataset that defines a common set of functions
+    and attributes for a model-based optimization dataset, where the
+    goal is to find a design 'x' that maximizes a prediction 'y':
+
+    max_x { y = f(x) }
+
+    Public Attributes:
+
+    name: str
+        An attribute that specifies the name of a model-based optimization
+        dataset, which might be used when labelling plots in a diagram of
+        performance in a research paper using design-bench
+    x_name: str
+        An attribute that specifies the name of designs in a model-based
+        optimization dataset, which might be used when labelling plots
+        in a visualization of performance in a research paper
+    y_name: str
+        An attribute that specifies the name of predictions in a model-based
+        optimization dataset, which might be used when labelling plots
+        in a visualization of performance in a research paper
+
+    x: np.ndarray
+        the design values 'x' for a model-based optimization problem
+        represented as a numpy array of arbitrary type
+    input_shape: Tuple[int]
+        the shape of a single design values 'x', represented as a list of
+        integers similar to calling np.ndarray.shape
+    input_size: int
+        the total number of components in the design values 'x', represented
+        as a single integer, the product of its shape entries
+    input_dtype: np.dtype
+        the data type of the design values 'x', which is typically either
+        floating point or integer (np.float32 or np.int32)
+
+    y: np.ndarray
+        the prediction values 'y' for a model-based optimization problem
+        represented by a scalar floating point value per 'x'
+    output_shape: Tuple[int]
+        the shape of a single prediction value 'y', represented as a list of
+        integers similar to calling np.ndarray.shape
+    output_size: int
+        the total number of components in the prediction values 'y',
+        represented as a single integer, the product of its shape entries
+    output_dtype: np.dtype
+        the data type of the prediction values 'y', which is typically a
+        type of floating point (np.float32 or np.float16)
+
+    dataset_size: int
+        the total number of paired design values 'x' and prediction values
+        'y' in the dataset, represented as a single integer
+    dataset_max_percentile: float
+        the percentile between 0 and 100 of prediction values 'y' above
+        which are hidden from access by members outside the class
+    dataset_min_percentile: float
+        the percentile between 0 and 100 of prediction values 'y' below
+        which are hidden from access by members outside the class
+    dataset_max_output: float
+        the specific cutoff threshold for prediction values 'y' above
+        which are hidden from access by members outside the class
+    dataset_min_output: float
+        the specific cutoff threshold for prediction values 'y' below
+        which are hidden from access by members outside the class
+
+    internal_batch_size: int
+        the integer number of samples per batch that is used internally
+        when processing the dataset and generating samples
+    freeze_statistics: bool
+        a boolean indicator that when set to true prevents methods from
+        changing the normalization and sub sampling statistics
+
+    is_normalized_x: bool
+        a boolean indicator that specifies whether the design values
+        in the dataset are being normalized
+    x_mean: np.ndarray
+        a numpy array that is automatically calculated to be the mean
+        of visible design values in the dataset
+    x_standard_dev: np.ndarray
+        a numpy array that is automatically calculated to be the standard
+        deviation of visible design values in the dataset
+
+    is_normalized_y: bool
+        a boolean indicator that specifies whether the prediction values
+        in the dataset are being normalized
+    y_mean: np.ndarray
+        a numpy array that is automatically calculated to be the mean
+        of visible prediction values in the dataset
+    y_standard_dev: np.ndarray
+        a numpy array that is automatically calculated to be the standard
+        deviation of visible prediction values in the dataset
+
+    is_logits: bool (only supported for a DiscreteDataset)
+        a value that indicates whether the design values contained in the
+        model-based optimization dataset have already been converted to
+        logits and need not be converted again
+
+    Public Methods:
+
+    iterate_batches(batch_size: int, return_x: bool,
+                    return_y: bool, drop_remainder: bool)
+                    -> Iterable[Tuple[np.ndarray, np.ndarray]]:
+        Returns an object that supports iterations, which yields tuples of
+        design values 'x' and prediction values 'y' from a model-based
+        optimization data set for training a model
+    iterate_samples(return_x: bool, return_y: bool):
+                    -> Iterable[Tuple[np.ndarray, np.ndarray]]:
+        Returns an object that supports iterations, which yields tuples of
+        design values 'x' and prediction values 'y' from a model-based
+        optimization data set for training a model
+
+    subsample(max_samples: int,
+              max_percentile: float,
+              min_percentile: float):
+        a function that exposes a subsampled version of a much larger
+        model-based optimization dataset containing design values 'x'
+        whose prediction values 'y' are skewed
+    relabel(relabel_function:
+            Callable[[np.ndarray, np.ndarray], np.ndarray]):
+        a function that accepts a function that maps from a dataset of
+        design values 'x' and prediction values y to a new set of
+        prediction values 'y' and relabels the model-based optimization dataset
+
+    clone(subset: set, shard_size: int,
+          to_disk: bool, disk_target: str, is_absolute: bool):
+        Generate a cloned copy of a model-based optimization dataset
+        using the provided name and shard generation settings; useful
+        when relabelling a dataset buffer from the dis
+    split(fraction: float, subset: set, shard_size: int,
+          to_disk: bool, disk_target: str, is_absolute: bool):
+        split a model-based optimization data set into a training set and
+        a validation set allocating 'fraction' of the data set to the
+        validation set and the rest to the training set
+
+    normalize_x(new_x: np.ndarray) -> np.ndarray:
+        a helper function that accepts floating point design values 'x'
+        as input and standardizes them so that they have zero
+        empirical mean and unit empirical variance
+    denormalize_x(new_x: np.ndarray) -> np.ndarray:
+        a helper function that accepts floating point design values 'x'
+        as input and undoes standardization so that they have their
+        original empirical mean and variance
+    normalize_y(new_x: np.ndarray) -> np.ndarray:
+        a helper function that accepts floating point prediction values 'y'
+        as input and standardizes them so that they have zero
+        empirical mean and unit empirical variance
+    denormalize_y(new_x: np.ndarray) -> np.ndarray:
+        a helper function that accepts floating point prediction values 'y'
+        as input and undoes standardization so that they have their
+        original empirical mean and variance
+
+    map_normalize_x():
+        a destructive function that standardizes the design values 'x'
+        in the class dataset in-place so that they have zero empirical
+        mean and unit variance
+    map_denormalize_x():
+        a destructive function that undoes standardization of the
+        design values 'x' in the class dataset in-place which are expected
+        to have zero  empirical mean and unit variance
+    map_normalize_y():
+        a destructive function that standardizes the prediction values 'y'
+        in the class dataset in-place so that they have zero empirical
+        mean and unit variance
+    map_denormalize_y():
+        a destructive function that undoes standardization of the
+        prediction values 'y' in the class dataset in-place which are
+        expected to have zero empirical mean and unit variance
+
+    --- for discrete tasks only
+
+    to_logits(np.ndarray) > np.ndarray:
+        A helper function that accepts design values represented as a numpy
+        array of integers as input and converts them to floating point
+        logits of a certain probability distribution
+    to_integers(np.ndarray) > np.ndarray:
+        A helper function that accepts design values represented as a numpy
+        array of floating point logits as input and converts them to integer
+        representing the max of the distribution
+
+    map_to_logits():
+        a function that processes the dataset corresponding to this
+        model-based optimization problem, and converts integers to a
+        floating point representation as logits
+    map_to_integers():
+        a function that processes the dataset corresponding to this
+        model-based optimization problem, and converts a floating point
+        representation as logits to integers
+
+    """
+
+    name = "toy_discrete/toy_discrete"
+    x_name = "toy_design"
+    y_name = "toy_prediction"
+
+    @staticmethod
+    def register_x_shards():
+        """Registers a remote file for download that contains design values
+        in a format compatible with the dataset builder class;
+        these files are downloaded all at once in the dataset initialization
+
+        Returns:
+
+        resources: list of RemoteResource
+            a list of RemoteResource objects specific to this dataset, which
+            will be automatically downloaded while the dataset is built
+            and may serve as shards if the dataset is large
+
+        """
+
+        return [DiskResource(
+            file, is_absolute=False,
+            download_target=f"https://design-bench."
+                            f"s3-us-west-1.amazonaws.com/{file}",
+            download_method="direct") for file in TOY_DISCRETE_FILES]
+
+    @staticmethod
+    def register_y_shards():
+        """Registers a remote file for download that contains prediction
+        values in a format compatible with the dataset builder class;
+        these files are downloaded all at once in the dataset initialization
+
+        Returns:
+
+        resources: list of RemoteResource
+            a list of RemoteResource objects specific to this dataset, which
+            will be automatically downloaded while the dataset is built
+            and may serve as shards if the dataset is large
+
+        """
+
+        return [DiskResource(
+            file.replace("-x-", "-y-"), is_absolute=False,
+            download_target=f"https://design-bench."
+                            f"s3-us-west-1.amazonaws.com/"
+                            f"{file.replace('-x-', '-y-')}",
+            download_method="direct") for file in TOY_DISCRETE_FILES]
+
+    def __init__(self, soft_interpolation=0.6, **kwargs):
+        """Initialize a model-based optimization dataset and prepare
+        that dataset by loading that dataset from disk and modifying
+        its distribution
+
+        Arguments:
+
+        soft_interpolation: float
+            floating point hyper parameter used when converting design values
+            from integers to a floating point representation as logits, which
+            interpolates between a uniform and dirac distribution
+            1.0 = dirac, 0.0 -> uniform
+        **kwargs: dict
+            additional keyword arguments which are used to parameterize the
+            data set generation process, including which shard files are used
+            if multiple sets of data set shard files can be loaded
+
+        """
+
+        # initialize the dataset using the method in the base class
+        super(ToyDiscreteDataset, self).__init__(
+            self.register_x_shards(),
+            self.register_y_shards(),
+            is_logits=False, num_classes=4,
+            soft_interpolation=soft_interpolation, **kwargs)
diff --git a/design_bench/oracles/exact/__init__.py b/design_bench/oracles/exact/__init__.py
index 13c9343..cf023cf 100644
--- a/design_bench/oracles/exact/__init__.py
+++ b/design_bench/oracles/exact/__init__.py
@@ -1,6 +1,8 @@
 from .hopper_controller_oracle import HopperControllerOracle
 from .ant_morphology_oracle import AntMorphologyOracle
 from .dkitty_morphology_oracle import DKittyMorphologyOracle
+from .toy_continuous_oracle import ToyContinuousOracle
 from .nas_bench_oracle import NASBenchOracle
 from .tf_bind_8_oracle import TFBind8Oracle
 from .tf_bind_10_oracle import TFBind10Oracle
+from .toy_discrete_oracle import ToyDiscreteOracle
diff --git a/design_bench/oracles/exact/ant_morphology_oracle.py b/design_bench/oracles/exact/ant_morphology_oracle.py
index bfd9dd8..56b68fd 100644
--- a/design_bench/oracles/exact/ant_morphology_oracle.py
+++ b/design_bench/oracles/exact/ant_morphology_oracle.py
@@ -4,7 +4,7 @@
 from morphing_agents.mujoco.ant.elements import LEG_UPPER_BOUND
 from design_bench.oracles.exact_oracle import ExactOracle
 from design_bench.datasets.continuous_dataset import ContinuousDataset
-from design_bench.datasets.continuous import AntMorphologyDataset
+from design_bench.datasets.continuous.ant_morphology_dataset import AntMorphologyDataset
 from design_bench.disk_resource import DiskResource
 import numpy as np
 import pickle as pkl
diff --git a/design_bench/oracles/exact/dkitty_morphology_oracle.py b/design_bench/oracles/exact/dkitty_morphology_oracle.py
index 5081624..1931039 100644
--- a/design_bench/oracles/exact/dkitty_morphology_oracle.py
+++ b/design_bench/oracles/exact/dkitty_morphology_oracle.py
@@ -4,7 +4,7 @@
 from morphing_agents.mujoco.dkitty.elements import LEG_UPPER_BOUND
 from design_bench.oracles.exact_oracle import ExactOracle
 from design_bench.datasets.continuous_dataset import ContinuousDataset
-from design_bench.datasets.continuous import DKittyMorphologyDataset
+from design_bench.datasets.continuous.dkitty_morphology_dataset import DKittyMorphologyDataset
 from design_bench.disk_resource import DiskResource
 import numpy as np
 import pickle as pkl
diff --git a/design_bench/oracles/exact/toy_continuous_oracle.py b/design_bench/oracles/exact/toy_continuous_oracle.py
new file mode 100644
index 0000000..f5fcf15
--- /dev/null
+++ b/design_bench/oracles/exact/toy_continuous_oracle.py
@@ -0,0 +1,162 @@
+from design_bench.oracles.exact_oracle import ExactOracle
+from design_bench.datasets.continuous_dataset import ContinuousDataset
+from design_bench.datasets.continuous.toy_continuous_dataset import ToyContinuousDataset
+from design_bench.disk_resource import DiskResource
+import numpy as np
+
+
+class ToyContinuousOracle(ExactOracle):
+    """An abstract class for managing the ground truth score functions f(x)
+    for model-based optimization problems, where the
+    goal is to find a design 'x' that maximizes a prediction 'y':
+
+    max_x { y = f(x) }
+
+    Public Attributes:
+
+    external_dataset: DatasetBuilder
+        an instance of a subclass of the DatasetBuilder class which points to
+        the mutable task dataset for a model-based optimization problem
+
+    internal_dataset: DatasetBuilder
+        an instance of a subclass of the DatasetBuilder class which has frozen
+        statistics and is used for training the oracle
+
+    is_batched: bool
+        a boolean variable that indicates whether the evaluation function
+        implemented for a particular oracle is batched, which effects
+        the scaling coefficient of its computational cost
+
+    internal_batch_size: int
+        an integer representing the number of design values to process
+        internally at the same time, if None defaults to the entire
+        tensor given to the self.score method
+    internal_measurements: int
+        an integer representing the number of independent measurements of
+        the prediction made by the oracle, which are subsequently
+        averaged, and is useful when the oracle is stochastic
+
+    noise_std: float
+        the standard deviation of gaussian noise added to the prediction
+        values 'y' coming out of the ground truth score function f(x)
+        in order to make the optimization problem difficult
+
+    expect_normalized_y: bool
+        a boolean indicator that specifies whether the inputs to the oracle
+        score function are expected to be normalized
+    expect_normalized_x: bool
+        a boolean indicator that specifies whether the outputs of the oracle
+        score function are expected to be normalized
+    expect_logits: bool
+        a boolean that specifies whether the oracle score function is
+        expecting logits when the dataset is discrete
+
+    Public Methods:
+
+    predict(np.ndarray) -> np.ndarray:
+        a function that accepts a batch of design values 'x' as input and for
+        each design computes a prediction value 'y' which corresponds
+        to the score in a model-based optimization problem
+
+    check_input_format(DatasetBuilder) -> bool:
+        a function that accepts a list of integers as input and returns true
+        when design values 'x' with the shape specified by that list are
+        compatible with this class of approximate oracle
+
+    """
+
+    name = "toy_prediction"
+
+    @classmethod
+    def supported_datasets(cls):
+        """An attribute the defines the set of dataset classes which this
+        oracle can be applied to forming a valid ground truth score
+        function for a model-based optimization problem
+
+        """
+
+        return {ToyContinuousDataset}
+
+    @classmethod
+    def fully_characterized(cls):
+        """An attribute the defines whether all possible inputs to the
+        model-based optimization problem have been evaluated and
+        are are returned via lookup in self.predict
+
+        """
+
+        return False
+
+    @classmethod
+    def is_simulated(cls):
+        """An attribute the defines whether the values returned by the oracle
+         were obtained by running a computer simulation rather than
+         performing physical experiments with real data
+
+        """
+
+        return True
+
+    def protected_predict(self, x):
+        """Score function to be implemented by oracle subclasses, where x is
+        either a batch of designs if self.is_batched is True or is a
+        single design when self._is_batched is False
+
+        Arguments:
+
+        x_batch: np.ndarray
+            a batch or single design 'x' that will be given as input to the
+            oracle model in order to obtain a prediction value 'y' for
+            each 'x' which is then returned
+
+        Returns:
+
+        y_batch: np.ndarray
+            a batch or single prediction 'y' made by the oracle model,
+            corresponding to the ground truth score for each design
+            value 'x' in a model-based optimization problem
+
+        """
+
+        return np.square(x -
+                         self.optimum).sum(keepdims=True).astype(np.float32)
+
+    def __init__(self, dataset: ContinuousDataset, **kwargs):
+        """Initialize the ground truth score function f(x) for a model-based
+        optimization problem, which involves loading the parameters of an
+        oracle model and estimating its computational cost
+
+        Arguments:
+
+        dataset: DiscreteDataset
+            an instance of a subclass of the DatasetBuilder class which has
+            a set of design values 'x' and prediction values 'y', and defines
+            batching and sampling methods for those attributes
+        noise_std: float
+            the standard deviation of gaussian noise added to the prediction
+            values 'y' coming out of the ground truth score function f(x)
+            in order to make the optimization problem difficult
+        internal_measurements: int
+            an integer representing the number of independent measurements of
+            the prediction made by the oracle, which are subsequently
+            averaged, and is useful when the oracle is stochastic
+
+        """
+
+        # ensure optimum has been downloaded
+        optimum = "toy_discrete/optimum.npy"
+        optimum = DiskResource(
+            optimum, is_absolute=False, download_method="direct",
+            download_target=f"https://design-bench."
+                            f"s3-us-west-1.amazonaws.com/{optimum}")
+        if not optimum.is_downloaded and not optimum.download():
+            raise ValueError("unable to download optimum for toy example")
+
+        # load optimum used to calculate y values
+        self.optimum = np.load(optimum.disk_target)
+
+        # initialize the oracle using the super class
+        super(ToyContinuousOracle, self).__init__(
+            dataset, internal_batch_size=1, is_batched=False,
+            expect_normalized_y=False,
+            expect_normalized_x=False, expect_logits=None, **kwargs)
diff --git a/design_bench/oracles/exact/toy_discrete_oracle.py b/design_bench/oracles/exact/toy_discrete_oracle.py
new file mode 100644
index 0000000..f83aa56
--- /dev/null
+++ b/design_bench/oracles/exact/toy_discrete_oracle.py
@@ -0,0 +1,162 @@
+from design_bench.oracles.exact_oracle import ExactOracle
+from design_bench.datasets.discrete_dataset import DiscreteDataset
+from design_bench.datasets.discrete.toy_discrete_dataset import ToyDiscreteDataset
+from design_bench.disk_resource import DiskResource
+import numpy as np
+
+
+class ToyDiscreteOracle(ExactOracle):
+    """An abstract class for managing the ground truth score functions f(x)
+    for model-based optimization problems, where the
+    goal is to find a design 'x' that maximizes a prediction 'y':
+
+    max_x { y = f(x) }
+
+    Public Attributes:
+
+    external_dataset: DatasetBuilder
+        an instance of a subclass of the DatasetBuilder class which points to
+        the mutable task dataset for a model-based optimization problem
+
+    internal_dataset: DatasetBuilder
+        an instance of a subclass of the DatasetBuilder class which has frozen
+        statistics and is used for training the oracle
+
+    is_batched: bool
+        a boolean variable that indicates whether the evaluation function
+        implemented for a particular oracle is batched, which effects
+        the scaling coefficient of its computational cost
+
+    internal_batch_size: int
+        an integer representing the number of design values to process
+        internally at the same time, if None defaults to the entire
+        tensor given to the self.score method
+    internal_measurements: int
+        an integer representing the number of independent measurements of
+        the prediction made by the oracle, which are subsequently
+        averaged, and is useful when the oracle is stochastic
+
+    noise_std: float
+        the standard deviation of gaussian noise added to the prediction
+        values 'y' coming out of the ground truth score function f(x)
+        in order to make the optimization problem difficult
+
+    expect_normalized_y: bool
+        a boolean indicator that specifies whether the inputs to the oracle
+        score function are expected to be normalized
+    expect_normalized_x: bool
+        a boolean indicator that specifies whether the outputs of the oracle
+        score function are expected to be normalized
+    expect_logits: bool
+        a boolean that specifies whether the oracle score function is
+        expecting logits when the dataset is discrete
+
+    Public Methods:
+
+    predict(np.ndarray) -> np.ndarray:
+        a function that accepts a batch of design values 'x' as input and for
+        each design computes a prediction value 'y' which corresponds
+        to the score in a model-based optimization problem
+
+    check_input_format(DatasetBuilder) -> bool:
+        a function that accepts a list of integers as input and returns true
+        when design values 'x' with the shape specified by that list are
+        compatible with this class of approximate oracle
+
+    """
+
+    name = "toy_prediction"
+
+    @classmethod
+    def supported_datasets(cls):
+        """An attribute the defines the set of dataset classes which this
+        oracle can be applied to forming a valid ground truth score
+        function for a model-based optimization problem
+
+        """
+
+        return {ToyDiscreteDataset}
+
+    @classmethod
+    def fully_characterized(cls):
+        """An attribute the defines whether all possible inputs to the
+        model-based optimization problem have been evaluated and
+        are are returned via lookup in self.predict
+
+        """
+
+        return False
+
+    @classmethod
+    def is_simulated(cls):
+        """An attribute the defines whether the values returned by the oracle
+         were obtained by running a computer simulation rather than
+         performing physical experiments with real data
+
+        """
+
+        return True
+
+    def protected_predict(self, x):
+        """Score function to be implemented by oracle subclasses, where x is
+        either a batch of designs if self.is_batched is True or is a
+        single design when self._is_batched is False
+
+        Arguments:
+
+        x_batch: np.ndarray
+            a batch or single design 'x' that will be given as input to the
+            oracle model in order to obtain a prediction value 'y' for
+            each 'x' which is then returned
+
+        Returns:
+
+        y_batch: np.ndarray
+            a batch or single prediction 'y' made by the oracle model,
+            corresponding to the ground truth score for each design
+            value 'x' in a model-based optimization problem
+
+        """
+
+        return np.square(x.astype(np.float32) -
+                         self.optimum).sum(keepdims=True).astype(np.float32)
+
+    def __init__(self, dataset: DiscreteDataset, **kwargs):
+        """Initialize the ground truth score function f(x) for a model-based
+        optimization problem, which involves loading the parameters of an
+        oracle model and estimating its computational cost
+
+        Arguments:
+
+        dataset: DiscreteDataset
+            an instance of a subclass of the DatasetBuilder class which has
+            a set of design values 'x' and prediction values 'y', and defines
+            batching and sampling methods for those attributes
+        noise_std: float
+            the standard deviation of gaussian noise added to the prediction
+            values 'y' coming out of the ground truth score function f(x)
+            in order to make the optimization problem difficult
+        internal_measurements: int
+            an integer representing the number of independent measurements of
+            the prediction made by the oracle, which are subsequently
+            averaged, and is useful when the oracle is stochastic
+
+        """
+
+        # ensure optimum has been downloaded
+        optimum = "toy_discrete/optimum.npy"
+        optimum = DiskResource(
+            optimum, is_absolute=False, download_method="direct",
+            download_target=f"https://design-bench."
+                            f"s3-us-west-1.amazonaws.com/{optimum}")
+        if not optimum.is_downloaded and not optimum.download():
+            raise ValueError("unable to download optimum for toy example")
+
+        # load optimum used to calculate y values
+        self.optimum = np.load(optimum.disk_target)
+
+        # initialize the oracle using the super class
+        super(ToyDiscreteOracle, self).__init__(
+            dataset, internal_batch_size=1, is_batched=False,
+            expect_normalized_y=False,
+            expect_normalized_x=False, expect_logits=False, **kwargs)
diff --git a/process/process_raw_toy_continuous.py b/process/process_raw_toy_continuous.py
new file mode 100644
index 0000000..c6e9c03
--- /dev/null
+++ b/process/process_raw_toy_continuous.py
@@ -0,0 +1,78 @@
+from design_bench.disk_resource import DATA_DIR
+from design_bench.disk_resource import google_drive_download
+from deepchem.feat.smiles_tokenizer import SmilesTokenizer
+import pandas as pd
+import numpy as np
+import argparse
+import glob
+import os
+import math
+import itertools
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser("Process Toy Continuous Dataset")
+    parser.add_argument("--shard-folder", type=str, default="./")
+    parser.add_argument("--seq-length", type=int, default=8)
+    parser.add_argument("--options", type=int, default=4)
+    parser.add_argument("--samples-per-shard", type=int, default=5000)
+    args = parser.parse_args()
+
+    optimum = np.random.uniform(0, 1, size=(args.seq_length,)).astype(np.float32)
+    os.makedirs(os.path.join(
+        args.shard_folder, f"toy_continuous/"), exist_ok=True)
+    np.save(os.path.join(args.shard_folder,
+                         "toy_continuous/", "optimum.npy"), optimum)
+
+    xs = []
+    ys = []
+    files_list = []
+    shard_id = 0
+
+    options = list(range(args.options))
+    list_options = [options for i in range(args.seq_length)]
+    for sample in itertools.product(*list_options):
+
+        x = np.array(sample, dtype=np.int32).astype(np.float32)
+        x = (x + np.random.uniform(0., 1., size=x.shape)) / args.options
+        y = np.square(x - optimum).sum(keepdims=True).astype(np.float32)
+
+        xs.append(x)
+        ys.append(y)
+
+        if len(xs) == args.samples_per_shard:
+
+            np.save(os.path.join(
+                args.shard_folder,
+                f"toy_continuous/"
+                f"toy_continuous-x-{shard_id}.npy"), xs)
+
+            np.save(os.path.join(
+                args.shard_folder,
+                f"toy_continuous/"
+                f"toy_continuous-y-{shard_id}.npy"), ys)
+
+            xs = []
+            ys = []
+            files_list.append(f"toy_continuous/"
+                              f"toy_continuous-x-{shard_id}.npy")
+            shard_id += 1
+
+    if len(xs) > 0:
+
+        np.save(os.path.join(
+            args.shard_folder,
+            f"toy_continuous/"
+            f"toy_continuous-x-{shard_id}.npy"), xs)
+
+        np.save(os.path.join(
+            args.shard_folder,
+            f"toy_continuous/"
+            f"toy_continuous-y-{shard_id}.npy"), ys)
+
+        xs = []
+        ys = []
+        files_list.append(f"toy_continuous/"
+                          f"toy_continuous-x-{shard_id}.npy")
+        shard_id += 1
diff --git a/process/process_raw_toy_discrete.py b/process/process_raw_toy_discrete.py
new file mode 100644
index 0000000..371d231
--- /dev/null
+++ b/process/process_raw_toy_discrete.py
@@ -0,0 +1,73 @@
+import numpy as np
+import argparse
+import os
+import itertools
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser("Process Toy Discrete Dataset")
+    parser.add_argument("--shard-folder", type=str, default="./")
+    parser.add_argument("--seq-length", type=int, default=8)
+    parser.add_argument("--options", type=int, default=4)
+    parser.add_argument("--samples-per-shard", type=int, default=5000)
+    args = parser.parse_args()
+
+    optimum = np.random.randint(args.options,
+                                size=(args.seq_length,)).astype(np.float32)
+    os.makedirs(os.path.join(
+        args.shard_folder, f"toy_discrete/"), exist_ok=True)
+    np.save(os.path.join(args.shard_folder,
+                         "toy_discrete/", "optimum.npy"), optimum)
+
+    xs = []
+    ys = []
+    files_list = []
+    shard_id = 0
+
+    options = list(range(args.options))
+    list_options = [options for i in range(args.seq_length)]
+    for sample in itertools.product(*list_options):
+
+        x = np.array(sample, dtype=np.int32)
+        y = np.square(x.astype(np.float32) -
+                      optimum).sum(keepdims=True).astype(np.float32)
+
+        xs.append(x)
+        ys.append(y)
+
+        if len(xs) == args.samples_per_shard:
+
+            np.save(os.path.join(
+                args.shard_folder,
+                f"toy_discrete/"
+                f"toy_discrete-x-{shard_id}.npy"), xs)
+
+            np.save(os.path.join(
+                args.shard_folder,
+                f"toy_discrete/"
+                f"toy_discrete-y-{shard_id}.npy"), ys)
+
+            xs = []
+            ys = []
+            files_list.append(f"toy_discrete/"
+                              f"toy_discrete-x-{shard_id}.npy")
+            shard_id += 1
+
+    if len(xs) > 0:
+
+        np.save(os.path.join(
+            args.shard_folder,
+            f"toy_discrete/"
+            f"toy_discrete-x-{shard_id}.npy"), xs)
+
+        np.save(os.path.join(
+            args.shard_folder,
+            f"toy_discrete/"
+            f"toy_discrete-y-{shard_id}.npy"), ys)
+
+        xs = []
+        ys = []
+        files_list.append(f"toy_discrete/"
+                          f"toy_discrete-x-{shard_id}.npy")
+        shard_id += 1
diff --git a/setup.py b/setup.py
index 2884996..422bf65 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
     LONG_DESCRIPTION = readme.read()
 
 
-setup(name='design-bench', version='2.0.7', license='MIT',
+setup(name='design-bench', version='2.0.8', license='MIT',
       packages=find_packages(include=['design_bench', 'design_bench.*']),
       description='Design-Bench: Benchmarks for '
                   'Data-Driven Offline Model-Based Optimization',
@@ -17,7 +17,7 @@
       author_email='brandon@btrabucco.com',
       url='https://github.com/brandontrabucco/design-bench',
       download_url='https://github.com/'
-                   'brandontrabucco/design-bench/archive/v2_0.tar.gz',
+                   'brandontrabucco/design-bench/archive/v2_0_8.tar.gz',
       keywords=['Deep Learning', 'Neural Networks',
                 'Benchmark', 'Model-Based Optimization'],
       extras_require={'all': ['gym[mujoco]']},