diff --git a/docs/.gitignore b/docs/.gitignore
index c795b054e..9476e7f56 100644
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -1 +1,2 @@
-build
\ No newline at end of file
+build
+source/user_guide/reference/_autosummary/
\ No newline at end of file
diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
new file mode 100644
index 000000000..c1d174308
--- /dev/null
+++ b/docs/source/_static/css/custom.css
@@ -0,0 +1,3 @@
+.logo__image { 
+  padding: 10px;
+}
\ No newline at end of file
diff --git a/docs/source/_static/favicon.svg b/docs/source/_static/favicon.svg
new file mode 100644
index 000000000..39b5fcf16
--- /dev/null
+++ b/docs/source/_static/favicon.svg
@@ -0,0 +1,16 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="96" height="96" viewBox="0 0 96 96" fill="none">
+<g clip-path="url(#clip0_8544_12867)">
+<g clip-path="url(#clip1_8544_12867)">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M12 0L0 12V84L12 96H35.3083L17.6031 78.65L17.6031 54.7884L31.7568 41.5134L36.0069 18.2751L62 10.1962L70.9999 25.7845L50.2031 30.8634L49.298 44.896L62.2489 51.7274L77.0458 36.2562L85.925 51.6355L65.9319 70.1067L43.6526 62.1176L39.4584 65.15V70.1067L64.9193 96H84L96 84V12L84 0H12Z" fill="#1D1D1E"/>
+<path d="M36.0069 18.2751L62 10.1962L70.9999 25.7845L50.2031 30.8634L49.298 44.896L62.2489 51.7274L77.0458 36.2562L85.925 51.6355L65.9319 70.1067L43.6526 62.1176L39.4584 65.15L39.4584 70.1067L65.5584 96.65L68.5584 117.35L47.4069 117.35L35.7084 96.3921L17.6031 78.65L17.6031 54.7884L31.7568 41.5134L36.0069 18.2751Z" fill="white"/>
+</g>
+</g>
+<defs>
+<clipPath id="clip0_8544_12867">
+<rect width="96" height="96" fill="white"/>
+</clipPath>
+<clipPath id="clip1_8544_12867">
+<rect width="96" height="96" fill="white"/>
+</clipPath>
+</defs>
+</svg>
\ No newline at end of file
diff --git a/docs/source/_static/logo_black.svg b/docs/source/_static/logo_black.svg
new file mode 100644
index 000000000..08ad84319
--- /dev/null
+++ b/docs/source/_static/logo_black.svg
@@ -0,0 +1,12 @@
+<svg width="396" height="63" viewBox="0 0 396 63" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M366.453 8.18066H376.377L382.704 11.9105V47.6767L384.636 50.5406H395.959V58.0002H380.706L374.712 49.1419V15.6403H366.453V8.18066Z" fill="black"/>
+<path d="M338.506 8.18066H348.43L354.757 11.9105V47.6767L356.689 50.5406H368.011V58.0002H352.759L346.765 49.1419V15.6403H338.506V8.18066Z" fill="black"/>
+<path d="M312.09 20.0357H323.279L329.207 28.8274V50.74H341.395V57.9998H309.292V50.74H321.481V30.2261L319.483 27.2955H312.09V20.0357ZM320.682 12.7093H330.006V3.38477H320.682V12.7093Z" fill="black"/>
+<path d="M272.021 8.18066H280.08V31.7584L292.201 19.1703H302.791L291.069 31.0257L304.19 50.474V58.0002H296.131V51.8727L285.674 36.6204L280.08 42.415V58.0002H272.021V8.18066Z" fill="black"/>
+<path d="M239.678 11.377H259.858L264.454 18.3037L258.993 23.632L255.796 18.9032H242.808L241.609 20.1686V27.9613L243.607 30.8918H260.458L265.786 38.9509V52.3382L260.325 57.9995H237.08L233.15 52.4048L238.412 46.9433L241.01 50.4733H256.995L257.727 49.8739V40.283L256.395 38.4181H239.544L233.55 29.4932V17.7709L239.678 11.377Z" fill="black"/>
+<path d="M197.61 20.0357H208.799L214.727 28.8274V50.74H226.915V57.9998H194.812V50.74H207.001V30.2261L205.003 27.2955H197.61V20.0357ZM206.202 12.7093H215.526V3.38477H206.202V12.7093Z" fill="black"/>
+<path d="M156.208 21.3018H164.268V23.4997L167.931 21.3018H182.384L188.378 30.1601V58.0004H180.319V31.6253L178.388 28.7614H165.466L164.268 30.1601V58.0004H156.208V21.3018Z" fill="black"/>
+<path d="M122.866 21.168H145.112L149.774 28.0282V57.9998H141.715V55.8019L138.052 57.9998H123.532L117.538 49.0749V45.5449L128.261 34.3555H141.715V28.561H122.866V21.168ZM125.863 47.6096L127.129 50.4736H141.448V41.9483H131.258L125.863 47.4764V47.6096Z" fill="black"/>
+<path d="M79 11.377H87.059V12.1096L94.0524 22.5664L103.111 11.377H111.17V57.9995H103.111V22.2334L97.8488 28.4275V32.6235H92.0543V31.5579L87.059 24.0983V57.9995H79V11.377Z" fill="black"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M7.875 0L0 7.875V55.125L7.875 63H23.763L23.7235 62.9292L11.8418 51.2859L11.8418 35.6268L21.1302 26.915L23.9193 11.6649L40.9773 6.3631L46.8835 16.5929L33.2356 19.926L32.6417 29.1349L41.1407 33.618L50.8511 23.465L56.6781 33.5577L43.5576 45.6794L28.9369 40.4365L26.1844 42.4266L26.1844 45.6794L43.2157 63H55.125L63 55.125V7.875L55.125 0H7.875Z" fill="black"/>
+</svg>
diff --git a/docs/source/_static/logo_black_notext.svg b/docs/source/_static/logo_black_notext.svg
new file mode 100644
index 000000000..65392e20b
--- /dev/null
+++ b/docs/source/_static/logo_black_notext.svg
@@ -0,0 +1,3 @@
+<svg width="116" height="116" viewBox="0 0 116 116" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M14.5 0L0 14.5V101.5L14.5 116H43.754L43.6812 115.87L21.8039 94.4312L21.8039 65.5985L38.9064 49.5579L44.0419 21.4782L75.4502 11.7162L86.3251 30.5521L61.1957 36.6891L60.1021 53.6452L75.751 61.8997L93.6305 43.2054L104.36 61.7887L80.2012 84.1081L53.2805 74.4546L48.2125 78.1187V84.1081L79.5718 116H101.5L116 101.5V14.5L101.5 0H14.5Z" fill="black"/>
+</svg>
diff --git a/docs/source/_static/logo_white.svg b/docs/source/_static/logo_white.svg
new file mode 100644
index 000000000..ac0531705
--- /dev/null
+++ b/docs/source/_static/logo_white.svg
@@ -0,0 +1,12 @@
+<svg width="396" height="63" viewBox="0 0 396 63" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M366.453 8.18066H376.377L382.704 11.9105V47.6767L384.636 50.5406H395.959V58.0002H380.706L374.712 49.1419V15.6403H366.453V8.18066Z" fill="white"/>
+<path d="M338.506 8.18066H348.43L354.757 11.9105V47.6767L356.689 50.5406H368.011V58.0002H352.759L346.765 49.1419V15.6403H338.506V8.18066Z" fill="white"/>
+<path d="M312.09 20.0357H323.279L329.207 28.8274V50.74H341.395V57.9998H309.292V50.74H321.481V30.2261L319.483 27.2955H312.09V20.0357ZM320.682 12.7093H330.006V3.38477H320.682V12.7093Z" fill="white"/>
+<path d="M272.021 8.18066H280.08V31.7584L292.201 19.1703H302.791L291.069 31.0257L304.19 50.474V58.0002H296.131V51.8727L285.674 36.6204L280.08 42.415V58.0002H272.021V8.18066Z" fill="white"/>
+<path d="M239.678 11.377H259.858L264.454 18.3037L258.993 23.632L255.796 18.9032H242.808L241.609 20.1686V27.9613L243.607 30.8918H260.458L265.786 38.9509V52.3382L260.325 57.9995H237.08L233.15 52.4048L238.412 46.9433L241.01 50.4733H256.995L257.727 49.8739V40.283L256.395 38.4181H239.544L233.55 29.4932V17.7709L239.678 11.377Z" fill="white"/>
+<path d="M197.61 20.0357H208.799L214.727 28.8274V50.74H226.915V57.9998H194.812V50.74H207.001V30.2261L205.003 27.2955H197.61V20.0357ZM206.202 12.7093H215.526V3.38477H206.202V12.7093Z" fill="white"/>
+<path d="M156.208 21.3018H164.268V23.4997L167.931 21.3018H182.384L188.378 30.1601V58.0004H180.319V31.6253L178.388 28.7614H165.466L164.268 30.1601V58.0004H156.208V21.3018Z" fill="white"/>
+<path d="M122.866 21.168H145.112L149.774 28.0282V57.9998H141.715V55.8019L138.052 57.9998H123.532L117.538 49.0749V45.5449L128.261 34.3555H141.715V28.561H122.866V21.168ZM125.863 47.6096L127.129 50.4736H141.448V41.9483H131.258L125.863 47.4764V47.6096Z" fill="white"/>
+<path d="M79 11.377H87.059V12.1096L94.0524 22.5664L103.111 11.377H111.17V57.9995H103.111V22.2334L97.8488 28.4275V32.6235H92.0543V31.5579L87.059 24.0983V57.9995H79V11.377Z" fill="white"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M7.875 0L0 7.875V55.125L7.875 63H23.763L23.7235 62.9292L11.8418 51.2859L11.8418 35.6268L21.1302 26.915L23.9193 11.6649L40.9773 6.3631L46.8835 16.5929L33.2356 19.926L32.6417 29.1349L41.1407 33.618L50.8511 23.465L56.6781 33.5577L43.5576 45.6794L28.9369 40.4365L26.1844 42.4266L26.1844 45.6794L43.2157 63H55.125L63 55.125V7.875L55.125 0H7.875Z" fill="white"/>
+</svg>
diff --git a/docs/source/_static/logo_white_notext.svg b/docs/source/_static/logo_white_notext.svg
new file mode 100644
index 000000000..d345c4383
--- /dev/null
+++ b/docs/source/_static/logo_white_notext.svg
@@ -0,0 +1,3 @@
+<svg width="116" height="116" viewBox="0 0 116 116" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M14.5 0L0 14.5V101.5L14.5 116H43.754L43.6812 115.87L21.8039 94.4312L21.8039 65.5985L38.9064 49.5579L44.0419 21.4782L75.4502 11.7162L86.3251 30.5521L61.1957 36.6891L60.1021 53.6452L75.751 61.8997L93.6305 43.2054L104.36 61.7887L80.2012 84.1081L53.2805 74.4546L48.2125 78.1187V84.1081L79.5718 116H101.5L116 101.5V14.5L101.5 0H14.5Z" fill="white"/>
+</svg>
diff --git a/docs/source/_static/version_switcher.json b/docs/source/_static/version_switcher.json
new file mode 100644
index 000000000..2b782b6a5
--- /dev/null
+++ b/docs/source/_static/version_switcher.json
@@ -0,0 +1,12 @@
+[
+  {
+    "version": "latest",
+    "url": "https://maniskill.readthedocs.io/en/latest/"
+  },
+  {
+    "name": "3.0.0b8 (stable)",
+    "version": "v3.0.0b8",
+    "url": "https://maniskill.readthedocs.io/en/v3.0.0b8/",
+    "preferred": true
+  }
+]
\ No newline at end of file
diff --git a/docs/source/_templates/class_no_parameters.rst b/docs/source/_templates/class_no_parameters.rst
new file mode 100644
index 000000000..4625be391
--- /dev/null
+++ b/docs/source/_templates/class_no_parameters.rst
@@ -0,0 +1,28 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+
+   {% block methods %}
+   
+   {% if methods %}
+   .. rubric:: {{ _('Methods') }}
+
+   .. autosummary::
+   {% for item in methods %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block attributes %}
+   {% if attributes %}
+   .. rubric:: {{ _('Attributes') }}
+
+   .. autosummary::
+   {% for item in attributes %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 152c9f6ef..74edd0196 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,7 +1,9 @@
 import os
 import sys
+
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../mani_skill"))
-__version__ = "3.0.0b8"
+import mani_skill
+__version__ = mani_skill.__version__
 # Configuration file for the Sphinx documentation builder.
 #
 # For the full list of built-in configuration values, see the documentation:
@@ -21,8 +23,11 @@
 
 extensions = [
     "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
     "sphinx.ext.mathjax",
     "sphinx.ext.viewcode",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.intersphinx",
     "sphinx_copybutton",
     "myst_parser",
     "sphinx_subfigure",
@@ -37,13 +42,21 @@
 myst_heading_anchors = 4
 
 templates_path = ["_templates"]
-exclude_patterns = []
+# exclude_patterns = ["user_guide/reference/_autosummary/*"]
 
 
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
 html_theme = "pydata_sphinx_theme"
+html_logo = "_static/logo_black.svg"
+html_favicon = "_static/favicon.svg"
+
+
+json_url = "https://maniskill.readthedocs.io/en/docs-upgrades/_static/version_switcher.json"
+version_match = "latest" #os.environ.get("READTHEDOCS_VERSION")
+release = __version__
+
 html_theme_options = {
     "use_edit_page_button": True,
     "icon_links": [
@@ -51,12 +64,25 @@
             "name": "GitHub",
             "url": "https://github.com/haosulab/ManiSkill",
             "icon": "fa-brands fa-github",
+        },
+        {
+            "name": "Website",
+            "url": "https://maniskill.ai",
+            "icon": "fa-solid fa-globe",
         }
     ],
     "external_links": [
         {"name": "Changelog", "url": "https://github.com/haosulab/ManiSkill/releases"},
-    ]
-
+    ],
+    "logo": {
+        "image_dark": "_static/logo_white.svg",
+    },
+    "navbar_center": ["version-switcher", "navbar-nav"],
+    "show_version_warning_banner": True,
+    "switcher": {
+        "json_url": json_url,
+        "version_match": version_match,
+    },
 }
 html_context = {
     "display_github": True,
@@ -66,8 +92,18 @@
     "conf_py_path": "/source/",
     "doc_path": "docs/source"
 }
-
+html_css_files = [
+    'css/custom.css',
+]
 html_static_path = ['_static']
 
-autodoc_typehints = "description"
+### Autodoc configurations ###
+autodoc_typehints = "signature"
 autodoc_typehints_description_target = "all"
+autodoc_default_flags = ['members', 'show-inheritance', 'undoc-members']
+
+autosummary_generate = True
+
+# remove_from_toctrees = ["_autosummary/*"]
+
+intersphinx_mapping = {'gymnasium': ('https://gymnasium.farama.org/', None)}
diff --git a/docs/source/user_guide/datasets/demos.md b/docs/source/user_guide/datasets/demos.md
index b272fa99f..01fb0ece1 100644
--- a/docs/source/user_guide/datasets/demos.md
+++ b/docs/source/user_guide/datasets/demos.md
@@ -1,6 +1,6 @@
 # Demonstrations
 
-We provide a command line tool to download demonstrations directly from our [Hugging Face 🤗 dataset page](https://huggingface.co/datasets/haosulab/ManiSkill) which are done by task ID. The tool will download the demonstration files to a folder and also a few demonstration videos visualizing what the demonstrations look like. See [Tasks](../concepts/environments.md) for a list of all supported tasks.
+We provide a command line tool to download demonstrations directly from our [Hugging Face 🤗 dataset page](https://huggingface.co/datasets/haosulab/ManiSkill) which are done by task ID. The tool will download the demonstration files to a folder and also a few demonstration videos visualizing what the demonstrations look like. See [Tasks](../../tasks/index.md) for a list of all supported tasks.
 
 <!-- TODO: add a table here detailing the data info in detail -->
 <!-- Please see our [notes](https://docs.google.com/document/d/1bBKmsR-R_7tR9LwaT1c3J26SjIWw27tWSLdHnfBR01c/edit?usp=sharing) about the details of the demonstrations. -->
diff --git a/docs/source/user_guide/getting_started/quickstart.md b/docs/source/user_guide/getting_started/quickstart.md
index 91479efa8..1909e637a 100644
--- a/docs/source/user_guide/getting_started/quickstart.md
+++ b/docs/source/user_guide/getting_started/quickstart.md
@@ -5,7 +5,7 @@ ManiSkill is a robotics simulator built on top of SAPIEN. It provides a standard
 
 ## Interface
 
-Here is a basic example of how to run a ManiSkill task following the interface of [Gymnasium](https://gymnasium.farama.org/) and execute a random policy.
+Here is a basic example of how to run a ManiSkill task following the interface of [Gymnasium](https://gymnasium.farama.org/) and execute a random policy with a few basic options
 
 ```python
 import gymnasium as gym
@@ -31,15 +31,15 @@ while not done:
 env.close()
 ```
 
-Changing `num_envs` to a value > 1 will automatically turn on the GPU simulation mode. More quick details [covered below](#gpu-parallelizedvectorized-tasks). You will also notice that all data returned is a batched torch tensor. To have the exact same API defined by [gym/gymnasium](https://gymnasium.farama.org/) see the section on [reinforcement learning setups](../reinforcement_learning/setup.md)
+Changing `num_envs` to a value > 1 will automatically turn on the GPU simulation mode. More quick details [covered below](#gpu-parallelizedvectorized-tasks).
 
-You can also run the same code from the command line to demo random actions
+You can also run the same code from the command line to demo random actions and play with rendering options
 
 ```bash
 # run headless / without a display
 python -m mani_skill.examples.demo_random_action -e PickCube-v1
-# run with A GUI
-python -m mani_skill.examples.demo_random_action -e PickCube-v1 --render-mode="human"
+# run with A GUI and ray tracing
+python -m mani_skill.examples.demo_random_action -e PickCube-v1 --render-mode="human" --shader="rt-fast"
 ```
 
 Running with `render_mode="human"` will open up a GUI shown below that you can use to interactively explore the scene, pause/play the script, teleport objects around, and more.
@@ -49,7 +49,7 @@ Running with `render_mode="human"` will open up a GUI shown below that you can u
 alt: SAPIEN GUI showing the PickCube task
 ---
 ```
-
+<!-- 
 We also have demos for simulations of more interesting scenes like ReplicaCAD, which can be run by doing
 
 ```bash
@@ -67,21 +67,23 @@ python -m mani_skill.examples.demo_random_action -e "ReplicaCAD_SceneManipulatio
 
 <video preload="auto" controls="True" width="100%">
 <source src="https://github.com/haosulab/ManiSkill/raw/main/docs/source/_static/videos/fetch_random_action_replica_cad_rt.mp4" type="video/mp4">
-</video>
-
+</video> -->
 
-You may notice that everything returned by the environment is a torch Tensor and has a batch dimension with value 1. To reduce extra code handling numpy vs torch, cpu vs gpu sim, everything in ManiSkill defaults to serving/using batched torch Tensors of all data. To change the environment to serve numpy, unbatched data simply do the following
+You will also notice that all data returned is a batched torch tensor. To reduce extra code handling numpy vs torch, cpu vs gpu sim, everything in ManiSkill defaults to serving/using batched torch Tensors of all data. To change the environment to serve numpy, unbatched data simply do the following
 
 ```python
 from mani_skill.utils.wrappers.gymnasium import CPUGymWrapper
-env = gym.make(env_id)
+env = gym.make(env_id, num_envs=1)
 env = CPUGymWrapper(env)
 obs, _ = env.reset() # obs is numpy and unbatched
 ```
 
+To have the exact same API defined by [gym/gymnasium](https://gymnasium.farama.org/) for single/vectorized environments see the section on [reinforcement learning setups](../reinforcement_learning/setup.md).
+
 For a compilation of demos you can run without having to write any extra code check out the [demos page](../demos/index)
 
-For the full documentation of options you can provide for gym.make see the [docstring in our repo](https://github.com/haosulab/ManiSkill/blob/main/mani_skill/envs/sapien_env.py)
+See {py:class}`mani_skill.envs.sapien_env` for the full list of environment instantiation options.
+
 
 
 ## GPU Parallelized/Vectorized Tasks
@@ -153,6 +155,10 @@ env = gym.make(
     parallel_in_single_scene=True,
     viewer_camera_configs=dict(shader_pack="rt-fast"),
 )
+env.reset()
+while True:
+    env.step(env.action_space.sample())
+    env.render_human()
 ```
 
 This will then open up a GUI that looks like so:
@@ -167,6 +173,8 @@ We currently do not properly support exposing multiple visible CUDA devices to a
 
 ## Task Instantiation Options
 
+For the full list of environment instantiation options see {py:class}`mani_skill.envs.sapien_env`. Here we list some common options:
+
 
 Each ManiSkill task supports different **observation modes** and **control modes**, which determine its **observation space** and **action space**. They can be specified by `gym.make(env_id, obs_mode=..., control_mode=...)`.
 
@@ -174,7 +182,7 @@ The common observation modes are `state`, `rgbd`, `pointcloud`. We also support
 
 We support a wide range of controllers. Different controllers can have different effects on your algorithms. Thus, it is recommended to understand the action space you are going to use. Please refer to [Controllers](../concepts/controllers.md) for more details.
 
-Some tasks require **downloading assets** that are not stored in the python package itself. You can download task-specific assets by `python -m mani_skill.utils.download_asset ${ENV_ID}`. The assets will be downloaded to `~/maniskill/data` by default, but you can also use the environment variable `MS_ASSET_DIR` to change this destination. Please refer to [Tasks](../../tasks/index.md) for all tasks built in out of the box, and which tasks require downloading assets.
+Some tasks require **downloading assets** that are not stored in the python package itself. You can download task-specific assets by `python -m mani_skill.utils.download_asset ${ENV_ID}`. The assets will be downloaded to `~/maniskill/data` by default, but you can also use the environment variable `MS_ASSET_DIR` to change this destination. If you don't download assets ahead of the time you will be prompted to do so if they are missing when running an environment.
 
 Some ManiSkill tasks also support swapping robot embodiments such as the `PickCube-v1` task. You can try using the fetch robot instead by running
 
diff --git a/docs/source/user_guide/reference/index.md b/docs/source/user_guide/reference/index.md
index 7e4e4be6c..34148c911 100644
--- a/docs/source/user_guide/reference/index.md
+++ b/docs/source/user_guide/reference/index.md
@@ -7,9 +7,7 @@ Subpackages:
 ```{toctree}
 :titlesonly:
 :maxdepth: 1
-   
-mani_skill.envs.sapien_env
+:glob:
+*
 
-mani_skill.utils.common
-mani_skill.utils.sapien_utils
 ```
\ No newline at end of file
diff --git a/docs/source/user_guide/reference/mani_skill.agent.base_agent.md b/docs/source/user_guide/reference/mani_skill.agent.base_agent.md
new file mode 100644
index 000000000..e6957d3a1
--- /dev/null
+++ b/docs/source/user_guide/reference/mani_skill.agent.base_agent.md
@@ -0,0 +1,8 @@
+# `mani_skill.agents.base_agent`
+
+```{eval-rst}  
+.. automodule:: mani_skill.agents.base_agent
+    :members:
+    :show-inheritance:
+    :undoc-members:
+```
diff --git a/docs/source/user_guide/reference/mani_skill.envs.sapien_env.md b/docs/source/user_guide/reference/mani_skill.envs.sapien_env.md
index 5e0c0f7f2..e3f49dd91 100644
--- a/docs/source/user_guide/reference/mani_skill.envs.sapien_env.md
+++ b/docs/source/user_guide/reference/mani_skill.envs.sapien_env.md
@@ -1,6 +1,10 @@
-# mani_skill.envs.sapien_env
+# `mani_skill.envs.sapien_env`
+
+The BaseEnv class is the class you should inherit from if you want to create a new environment/task. The arguments here also describe all the possible run-time arguments you can pass when creating environments via `gym.make`.
 
 ```{eval-rst}  
 .. automodule:: mani_skill.envs.sapien_env
     :members:
+    :show-inheritance:
+    :undoc-members:
 ```
diff --git a/docs/source/user_guide/reference/mani_skill.envs.scene.md b/docs/source/user_guide/reference/mani_skill.envs.scene.md
new file mode 100644
index 000000000..ad7ad9345
--- /dev/null
+++ b/docs/source/user_guide/reference/mani_skill.envs.scene.md
@@ -0,0 +1,10 @@
+# `mani_skill.envs.scene`
+
+The ManiSkillScene class manages most of the underlying data for a ManiSkill environment. It is created each time an environment is instantiated or reconfigured and can be accessed via `env.scene` where `env` is your created environment object.
+
+```{eval-rst}  
+.. automodule:: mani_skill.envs.scene
+    :members:
+    :show-inheritance:
+    :undoc-members:
+```
diff --git a/docs/source/user_guide/reference/mani_skill.sensors.rst b/docs/source/user_guide/reference/mani_skill.sensors.rst
new file mode 100644
index 000000000..6452a4c82
--- /dev/null
+++ b/docs/source/user_guide/reference/mani_skill.sensors.rst
@@ -0,0 +1,15 @@
+``mani_skill.sensors`` module
+===================================
+
+
+.. currentmodule:: mani_skill.sensors.base_sensor
+
+.. automodule:: mani_skill.sensors.base_sensor
+    :members:
+    :undoc-members:
+
+.. currentmodule:: mani_skill.sensors.camera
+
+.. automodule:: mani_skill.sensors.camera
+    :members:
+    :undoc-members:
\ No newline at end of file
diff --git a/docs/source/user_guide/reference/mani_skill.utils.common.md b/docs/source/user_guide/reference/mani_skill.utils.common.md
deleted file mode 100644
index 14ad420be..000000000
--- a/docs/source/user_guide/reference/mani_skill.utils.common.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# mani_skill.utils.common
-
-```{eval-rst}  
-.. automodule:: mani_skill.utils.common
-    :members:
-    :undoc-members:
-```
diff --git a/docs/source/user_guide/reference/mani_skill.utils.common.rst b/docs/source/user_guide/reference/mani_skill.utils.common.rst
new file mode 100644
index 000000000..e242aba74
--- /dev/null
+++ b/docs/source/user_guide/reference/mani_skill.utils.common.rst
@@ -0,0 +1,10 @@
+``mani_skill.utils.common`` module
+===================================
+
+.. currentmodule:: mani_skill.utils.common
+
+.. automodule:: mani_skill.utils.common
+    :members:
+    :undoc-members:
+    
+    .. rubric:: Functions
diff --git a/docs/source/user_guide/reference/mani_skill.utils.sapien_utils.md b/docs/source/user_guide/reference/mani_skill.utils.sapien_utils.md
deleted file mode 100644
index 7fe2f5ffb..000000000
--- a/docs/source/user_guide/reference/mani_skill.utils.sapien_utils.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# mani_skill.utils.sapien_utils
-
-```{eval-rst}  
-.. automodule:: mani_skill.utils.sapien_utils
-    :members:
-    :undoc-members:
-```
diff --git a/docs/source/user_guide/reference/mani_skill.utils.sapien_utils.rst b/docs/source/user_guide/reference/mani_skill.utils.sapien_utils.rst
new file mode 100644
index 000000000..401fb30b2
--- /dev/null
+++ b/docs/source/user_guide/reference/mani_skill.utils.sapien_utils.rst
@@ -0,0 +1,12 @@
+``mani_skill.utils.sapien_utils`` module
+========================================
+
+.. currentmodule:: mani_skill.utils.sapien_utils
+
+.. automodule:: mani_skill.utils.sapien_utils
+    :members:
+    :undoc-members:
+    
+    .. rubric:: Functions
+
+   
\ No newline at end of file
diff --git a/docs/source/user_guide/reference/mani_skill.utils.structs.rst b/docs/source/user_guide/reference/mani_skill.utils.structs.rst
new file mode 100644
index 000000000..f3aed4ce0
--- /dev/null
+++ b/docs/source/user_guide/reference/mani_skill.utils.structs.rst
@@ -0,0 +1,20 @@
+``mani_skill.utils.structs`` module
+===================================
+
+Structs in ManiSkill are objects that efficiently wrap around raw SAPIEN objects and manage their data on the GPU in a user-friendly way so you don't have to deal with complex memory management code. The APIs exposed here are the most commonly used structs that you will encounter when using ManiSkill.
+
+.. currentmodule:: mani_skill.utils.structs
+
+.. automodule:: mani_skill.utils.structs
+
+.. autosummary::
+
+  :template: class_no_parameters.rst
+  
+  :toctree: _autosummary
+    Pose
+    Actor
+    Articulation
+    Link
+    ArticulationJoint
+    SimConfig
\ No newline at end of file
diff --git a/docs/source/user_guide/tutorials/custom_tasks/intro.md b/docs/source/user_guide/tutorials/custom_tasks/intro.md
index ceecf3e28..5ce99f521 100644
--- a/docs/source/user_guide/tutorials/custom_tasks/intro.md
+++ b/docs/source/user_guide/tutorials/custom_tasks/intro.md
@@ -51,7 +51,7 @@ At the start of any task, you must load in all objects (robots, assets, articula
 
 Building objects in ManiSkill is nearly the exact same as it is in SAPIEN. You create an `ActorBuilder` via `self.scene.create_actor_builder` and via the actor builder add visual and collision shapes. Visual shapes only affect visual rendering processes while collision shapes affect the physical simulation. ManiSkill further will create the actor for you in every sub-scene (unless you use [scene-masks/scene-idxs](./advanced.md#scene-masks), a more advanced feature).
 
-#### Building Robots
+### Building Robots
 
 This is the simplest part and requires almost no additional work here. Robots are added in for you automatically and have their base initialized at 0. You can specify the default robot(s) added in via the init function. In PushCube this is done as so by adding `SUPPORTED_ROBOTS` to ensure users can only run your task with the selected robots. You can further add typing if you wish to the `agent` class attribute. 
 
@@ -74,7 +74,7 @@ Initializing these robots occurs in the initialization / randomization section c
 
 To create your own custom robots/agents, see the [custom robots tutorial](../custom_robots.md).
 
-#### Building Actors
+### Building Actors
 
 the `_load_scene` function must be implemented to build objects besides agents. It is also given an `options` dictionary which is the same options dictionary passed to `env.reset` and defaults to an empty dictionary (which may be useful for controlling how to load a scene with just reset arguments).
 
diff --git a/docs/source/user_guide/tutorials/sensors/index.md b/docs/source/user_guide/tutorials/sensors/index.md
index e38dfa89f..8d7a7d945 100644
--- a/docs/source/user_guide/tutorials/sensors/index.md
+++ b/docs/source/user_guide/tutorials/sensors/index.md
@@ -4,7 +4,7 @@ This page documents how to use / customize sensors and cameras in ManiSkill in d
 
 ## Cameras
 
-Cameras in ManiSkill can capture a ton of different modalities of data. By default ManiSkill limits those to just `rgb`, `depth`, `position` (which is used to derive depth), and `segmentation`. Internally ManiSkill uses [SAPIEN](https://sapien.ucsd.edu/) which has a highly optimized rendering system that leverages shaders to render different modalities of data.
+Cameras in ManiSkill can capture a ton of different modalities of data. By default ManiSkill limits those to just `rgb`, `depth`, `position` (which is used to derive depth), and `segmentation`. Internally ManiSkill uses [SAPIEN](https://sapien.ucsd.edu/) which has a highly optimized rendering system that leverages shaders to render different modalities of data. The full set of configurations can be found in {py:class}`mani_skill.sensors.camera.CameraConfig`.
 
 Each shader has a preset configuration that generates textures containing data in a image format, often in a somewhat difficult to use format due to heavy optimization. ManiSkill uses a shader configuration system in python that parses these different shaders into more user friendly formats (namely the well known `rgb`, `depth`, `position`, and `segmentation` type data). This shader config system resides in this file on [Github](https://github.com/haosulab/ManiSkill/blob/main/mani_skill/render/shaders.py) and defines a few friendly defaults for minimal/fast rendering and ray-tracing.
 
diff --git a/mani_skill/agents/base_agent.py b/mani_skill/agents/base_agent.py
index be6142ece..2dddccefb 100644
--- a/mani_skill/agents/base_agent.py
+++ b/mani_skill/agents/base_agent.py
@@ -17,13 +17,12 @@
 )
 from mani_skill.sensors.base_sensor import BaseSensor, BaseSensorConfig
 from mani_skill.utils import assets, download_asset, sapien_utils
-from mani_skill.utils.structs import Actor, Array, Articulation, Pose
+from mani_skill.utils.structs import Actor, Array, Articulation
 
 from .controllers.base_controller import (
     BaseController,
     CombinedController,
     ControllerConfig,
-    DictController,
 )
 
 if TYPE_CHECKING:
@@ -34,35 +33,38 @@
 @dataclass
 class Keyframe:
     pose: sapien.Pose
+    """sapien Pose object describe this keyframe's pose"""
     qpos: Optional[Array] = None
+    """the qpos of the robot at this keyframe"""
     qvel: Optional[Array] = None
+    """the qvel of the robot at this keyframe"""
 
 
 class BaseAgent:
-    """Base class for agents.
-
-    Agent is an interface of an articulated robot (physx.PhysxArticulation).
+    """Base class for agents/robots, forming an interface of an articulated robot (SAPIEN's physx.PhysxArticulation).
+    Users implementing their own agents/robots should inherit from this class.
+    A tutorial on how to build your own agent can be found in :doc:`its tutorial </user_guide/tutorials/custom_robots>`
 
     Args:
-        scene (sapien.Scene): simulation scene instance.
+        scene (ManiSkillScene): simulation scene instance.
         control_freq (int): control frequency (Hz).
-        control_mode: uid of controller to use
-        fix_root_link: whether to fix the robot root link
-        config: agent configuration
-        agent_idx: an index for this agent in a multi-agent task setup If None, the task should be single-agent
+        control_mode (str | None): uid of controller to use
+        fix_root_link (bool): whether to fix the robot root link
+        agent_idx (str | None): an index for this agent in a multi-agent task setup If None, the task should be single-agent
     """
 
     uid: str
     """unique identifier string of this"""
-    urdf_path: str = None
-    """path to the .urdf file describe the agent's geometry and visuals"""
-    urdf_config: dict = None
+    urdf_path: Union[str, None] = None
+    """path to the .urdf file describe the agent's geometry and visuals. One of urdf_path or mjcf_path must be provided."""
+    urdf_config: Union[str, Dict] = None
     """Optional provide a urdf_config to further modify the created articulation"""
-    mjcf_path: str = None
-    """path to a MJCF .xml file defining a robot. This will only load the articulation defined in the XML and nothing else"""
+    mjcf_path: Union[str, None] = None
+    """path to a MJCF .xml file defining a robot. This will only load the articulation defined in the XML and nothing else.
+    One of urdf_path or mjcf_path must be provided."""
 
     fix_root_link: bool = True
-    """Whether to fix the root link of the robot"""
+    """Whether to fix the root link of the robot in place."""
     load_multiple_collisions: bool = False
     """Whether the referenced collision meshes of a robot definition should be loaded as multiple convex collisions"""
     disable_self_collisions: bool = False
@@ -85,15 +87,18 @@ def __init__(
         self._agent_idx = agent_idx
 
         self.robot: Articulation = None
+        """The robot object, which is an Articulation. Data like pose, qpos etc. can be accessed from this object."""
         self.controllers: Dict[str, BaseController] = dict()
+        """The controllers of the robot."""
         self.sensors: Dict[str, BaseSensor] = dict()
+        """The sensors that come with the robot."""
 
-        self.controllers = dict()
         self._load_articulation()
         self._after_loading_articulation()
 
         # Controller
         self.supported_control_modes = list(self._controller_configs.keys())
+        """List of all possible control modes for this robot."""
         if control_mode is None:
             control_mode = self.supported_control_modes[0]
         # The control mode after reset for consistency
@@ -104,13 +109,14 @@ def __init__(
 
     @property
     def _sensor_configs(self) -> List[BaseSensorConfig]:
+        """Returns a list of sensor configs for this agent. By default this is empty."""
         return []
 
     @property
     def _controller_configs(
         self,
     ) -> Dict[str, Union[ControllerConfig, DictControllerConfig]]:
-
+        """Returns a dict of controller configs for this agent. By default this is a PDJointPos (delta and non delta) controller for all active joints."""
         return dict(
             pd_joint_pos=PDJointPosControllerConfig(
                 [x.name for x in self.robot.active_joints],
@@ -137,7 +143,7 @@ def device(self):
 
     def _load_articulation(self):
         """
-        Load the robot articulation
+        Loads the robot articulation
         """
         if self.urdf_path is not None:
             loader = self.scene.create_urdf_loader()
@@ -184,12 +190,11 @@ def _load_articulation(self):
         self.robot_link_ids = [link.name for link in self.robot.get_links()]
 
     def _after_loading_articulation(self):
-        """After loading articulation and before setting up controller. Not recommended, but is useful for when creating
-        robot classes that inherit controllers from another and only change which joints are controlled
-        """
+        """Called after loading articulation and before setting up any controllers. By default this is empty."""
 
     def _after_init(self):
-        """After initialization. E.g., caching the end-effector link."""
+        """Code that is run after initialization. Some example robot implementations use this to cache a reference to special
+        robot links like an end-effector link. By default this is empty."""
 
     # -------------------------------------------------------------------------- #
     # Controllers
@@ -200,8 +205,9 @@ def control_mode(self):
         """Get the currently activated controller uid."""
         return self._control_mode
 
-    def set_control_mode(self, control_mode=None):
-        """Set the controller and drive properties. This does not reset the controller. If given control mode is None, will set defaults"""
+    def set_control_mode(self, control_mode: str = None):
+        """Sets the controller to an pre-existing controller of this agent.
+        This does not reset the controller. If given control mode is None, will set to the default control mode."""
         if control_mode is None:
             control_mode = self._default_control_mode
         assert (
@@ -234,7 +240,7 @@ def set_control_mode(self, control_mode=None):
                     link.disable_gravity = True
 
     @property
-    def controller(self):
+    def controller(self) -> BaseController:
         """Get currently activated controller."""
         if self._control_mode is None:
             raise RuntimeError("Please specify a control mode first")
@@ -242,7 +248,7 @@ def controller(self):
             return self.controllers[self._control_mode]
 
     @property
-    def action_space(self):
+    def action_space(self) -> spaces.Space:
         if self._control_mode is None:
             return spaces.Dict(
                 {
@@ -254,7 +260,7 @@ def action_space(self):
             return self.controller.action_space
 
     @property
-    def single_action_space(self):
+    def single_action_space(self) -> spaces.Space:
         if self._control_mode is None:
             return spaces.Dict(
                 {
@@ -267,7 +273,8 @@ def single_action_space(self):
 
     def set_action(self, action):
         """
-        Set the agent's action which is to be executed in the next environment timestep
+        Set the agent's action which is to be executed in the next environment timestep.
+        This is essentially a wrapper around the controller's set_action method.
         """
         if not physx.is_gpu_enabled():
             if np.isnan(action).any():
@@ -275,6 +282,7 @@ def set_action(self, action):
         self.controller.set_action(action)
 
     def before_simulation_step(self):
+        """Code that runs before each simulation step. By default it calls the controller's before_simulation_step method."""
         self.controller.before_simulation_step()
 
     # -------------------------------------------------------------------------- #
@@ -282,7 +290,7 @@ def before_simulation_step(self):
     # -------------------------------------------------------------------------- #
     def get_proprioception(self):
         """
-        Get the proprioceptive state of the agent.
+        Get the proprioceptive state of the agent, default is the qpos and qvel of the robot and any controller state.
         """
         obs = dict(qpos=self.robot.get_qpos(), qvel=self.robot.get_qvel())
         controller_state = self.controller.get_state()
@@ -308,6 +316,8 @@ def get_state(self) -> Dict:
         return state
 
     def set_state(self, state: Dict, ignore_controller=False):
+        """Set the state of the agent, including the robot state and controller state.
+        If ignore_controller is True, the controller state will not be updated."""
         # robot state
         self.robot.set_root_pose(state["robot_root_pose"])
         self.robot.set_root_linear_velocity(state["robot_root_vel"])
@@ -317,13 +327,20 @@ def set_state(self, state: Dict, ignore_controller=False):
 
         if not ignore_controller and "controller" in state:
             self.controller.set_state(state["controller"])
+        if self.device.type == "cuda":
+            self.scene._gpu_apply_all()
+            self.scene.px.gpu_update_articulation_kinematics()
+            self.scene._gpu_fetch_all()
 
     # -------------------------------------------------------------------------- #
     # Other
     # -------------------------------------------------------------------------- #
-    def reset(self, init_qpos=None):
+    def reset(self, init_qpos: torch.Tensor = None):
         """
-        Reset the robot to a rest position or a given q-position
+        Reset the robot to a clean state with zero velocity and forces. Furthermore it resets the current active controller.
+
+        Args:
+            init_qpos (torch.Tensor): The initial qpos to set the robot to. If None, the robot's qpos is not changed.
         """
         if init_qpos is not None:
             self.robot.set_qpos(init_qpos)
diff --git a/mani_skill/agents/controllers/base_controller.py b/mani_skill/agents/controllers/base_controller.py
index 1da0141e8..d1a910790 100644
--- a/mani_skill/agents/controllers/base_controller.py
+++ b/mani_skill/agents/controllers/base_controller.py
@@ -33,13 +33,11 @@ class BaseController:
     active_joint_indices: torch.Tensor
     """indices of active joints controlled. Equivalent to [x.active_index for x in self.joints]"""
     action_space: spaces.Space
-    """the action space. If the number of parallel environments is > 1, this action space is also batched"""
+    """the action space of the controller, which by default has a batch dimension. This is typically already normalized as well."""
     single_action_space: spaces.Space
     """The unbatched version of the action space which is also typically already normalized by this class"""
-    """The batched version of the action space which is also typically already normalized by this class"""
     _original_single_action_space: spaces.Space
     """The unbatched, original action space without any additional processing like normalization"""
-    """The batched, original action space without any additional processing like normalization"""
 
     def __init__(
         self,
@@ -67,7 +65,6 @@ def __init__(
         self._normalize_action = getattr(self.config, "normalize_action", False)
         if self._normalize_action:
             self._clip_and_scale_action_space()
-
         self.action_space = self.single_action_space
         if self.scene.num_envs > 1:
             self.action_space = batch_space(
diff --git a/mani_skill/envs/sapien_env.py b/mani_skill/envs/sapien_env.py
index fadd108cf..ab637987f 100644
--- a/mani_skill/envs/sapien_env.py
+++ b/mani_skill/envs/sapien_env.py
@@ -71,26 +71,28 @@ class BaseEnv(gym.Env):
 
         enable_shadow (bool): whether to enable shadow for lights. Defaults to False.
 
-        sensor_configs (dict): configurations of sensors. See notes for more details.
+        sensor_configs (dict): configurations of sensors to override any environment defaults.
+            If the key is one of sensor names (e.g. a camera), the config value will be applied to the corresponding sensor.
+            Otherwise, the value will be applied to all sensors (but overridden by sensor-specific values). For possible configurations
+            see the documentation see :doc:`the sensors documentation </user_guide/tutorials/sensors/index>`.
 
-        human_render_camera_configs (dict): configurations of human rendering cameras. Similar usage as @sensor_configs.
+        human_render_camera_configs (dict): configurations of human rendering cameras to override any environment defaults. Similar usage as @sensor_configs.
 
-        viewer_camera_configs (dict): configurations of the viewer camera in the GUI. Similar usage as @sensor_configs.
+        viewer_camera_configs (dict): configurations of the viewer camera in the GUI to override any environment defaults. Similar usage as @sensor_configs.
 
         robot_uids (Union[str, BaseAgent, List[Union[str, BaseAgent]]]): List of robots to instantiate and control in the environment.
 
         sim_config (Union[SimConfig, dict]): Configurations for simulation if used that override the environment defaults. If given
-            a dictionary, it can just override specific attributes e.g. `sim_config=dict(scene_config=dict(solver_iterations=25))`. If
+            a dictionary, it can just override specific attributes e.g. ``sim_config=dict(scene_config=dict(solver_iterations=25))``. If
             passing in a SimConfig object, while typed, will override every attribute including the task defaults. Some environments
-            define their own recommended default sim configurations via the `self._default_sim_config` attribute that generally should not be
-            completely overriden. For a full detail/explanation of what is in the sim config see the type hints / go to the source
-            https://github.com/haosulab/ManiSkill/blob/main/mani_skill/utils/structs/types.py
+            define their own recommended default sim configurations via the ``self._default_sim_config`` attribute that generally should not be
+            completely overriden.
 
         reconfiguration_freq (int): How frequently to call reconfigure when environment is reset via `self.reset(...)`
             Generally for most users who are not building tasks this does not need to be changed. The default is 0, which means
             the environment reconfigures upon creation, and never again.
 
-        sim_backend (str): By default this is "auto". If sim_backend is "auto", then if num_envs == 1, we use the CPU sim backend, otherwise
+        sim_backend (str): By default this is "auto". If sim_backend is "auto", then if ``num_envs == 1``, we use the CPU sim backend, otherwise
             we use the GPU sim backend and automatically pick a GPU to use.
             Can also be "cpu" or "gpu" to force usage of a particular sim backend.
             To select a particular GPU to run the simulation on, you can pass "cuda:n" where n is the ID of the GPU,
@@ -103,11 +105,6 @@ class BaseEnv(gym.Env):
         parallel_in_single_scene (bool): By default this is False. If True, rendered images and the GUI will show all objects in one view.
             This is only really useful for generating cool videos showing all environments at once but it is not recommended
             otherwise as it slows down simulation and rendering.
-
-    Note:
-        `sensor_configs` is used to update environement-specific sensor configurations.
-        If the key is one of sensor names (e.g. a camera), the value will be applied to the corresponding sensor.
-        Otherwise, the value will be applied to all sensors (but overridden by sensor-specific values).
     """
 
     # fmt: off
@@ -122,13 +119,16 @@ class BaseEnv(gym.Env):
 
     metadata = {"render_modes": SUPPORTED_RENDER_MODES}
 
-    physx_system: Union[physx.PhysxCpuSystem, physx.PhysxGpuSystem] = None
-
     scene: ManiSkillScene = None
     """the main scene, which manages all sub scenes. In CPU simulation there is only one sub-scene"""
 
     agent: BaseAgent
 
+    action_space: gym.Space
+    """the batched action space of the environment, which is also the action space of the agent"""
+    single_action_space: gym.Space
+    """the unbatched action space of the environment"""
+
     _sensors: Dict[str, BaseSensor]
     """all sensors configured in this environment"""
     _sensor_configs: Dict[str, BaseSensorConfig]
@@ -309,14 +309,35 @@ def __init__(
         """the initial raw state returned by env.get_state. Useful for reconstructing state dictionaries from flattened state vectors"""
 
         self.action_space = self.agent.action_space
+        """the batched action space of the environment, which is also the action space of the agent"""
         self.single_action_space = self.agent.single_action_space
+        """the unbatched action space of the environment"""
         self._orig_single_action_space = copy.deepcopy(self.single_action_space)
+        """the original unbatched action space of the environment"""
         # initialize the cached properties
         self.single_observation_space
         self.observation_space
 
     def update_obs_space(self, obs: torch.Tensor):
-        """call this function if you modify the observations returned by env.step and env.reset via an observation wrapper."""
+        """A convenient function to auto generate observation spaces if you modify them.
+        Call this function if you modify the observations returned by env.step and env.reset via an observation wrapper.
+
+        The recommended way to use this is in a observation wrapper is as so
+
+        .. code-block:: python
+
+            import gymnasium as gym
+            from mani_skill.envs.sapien_env import BaseEnv
+            class YourObservationWrapper(gym.ObservationWrapper):
+                def __init__(self, env):
+                    super().__init__(env)
+                    self.base_env.update_obs_space(self.observation(self.base_env._init_raw_obs))
+                @property
+                def base_env(self) -> BaseEnv:
+                    return self.env.unwrapped
+                def observation(self, obs):
+                    # your code for transforming the observation
+        """
         self._init_raw_obs = obs
         del self.single_observation_space
         del self.observation_space
@@ -324,11 +345,13 @@ def update_obs_space(self, obs: torch.Tensor):
         self.observation_space
 
     @cached_property
-    def single_observation_space(self):
+    def single_observation_space(self) -> gym.Space:
+        """the unbatched observation space of the environment"""
         return gym_utils.convert_observation_to_space(common.to_numpy(self._init_raw_obs), unbatched=True)
 
     @cached_property
-    def observation_space(self):
+    def observation_space(self) -> gym.Space:
+        """the batched observation space of the environment"""
         return batch_space(self.single_observation_space, n=self.num_envs)
 
     @property
@@ -392,34 +415,41 @@ def _default_viewer_camera_configs(
         return CameraConfig(uid="viewer", pose=sapien.Pose([0, 0, 1]), width=1920, height=1080, shader_pack="default")
 
     @property
-    def sim_freq(self):
+    def sim_freq(self) -> int:
+        """The frequency (Hz) of the simulation loop"""
         return self._sim_freq
 
     @property
     def control_freq(self):
+        """The frequency (Hz) of the control loop"""
         return self._control_freq
 
     @property
     def sim_timestep(self):
+        """The timestep (dt) of the simulation loop"""
         return 1.0 / self._sim_freq
 
     @property
     def control_timestep(self):
+        """The timestep (dt) of the control loop"""
         return 1.0 / self._control_freq
 
     @property
-    def control_mode(self):
+    def control_mode(self) -> str:
+        """The control mode of the agent"""
         return self.agent.control_mode
 
     @property
-    def elapsed_steps(self):
+    def elapsed_steps(self) -> torch.Tensor:
+        """The number of steps that have elapsed in the environment"""
         return self._elapsed_steps
 
     # ---------------------------------------------------------------------------- #
     # Observation
     # ---------------------------------------------------------------------------- #
     @property
-    def obs_mode(self):
+    def obs_mode(self) -> str:
+        """The current observation mode. This affects the observation returned by env.get_obs()"""
         return self._obs_mode
 
     def get_obs(self, info: Optional[Dict] = None):
@@ -832,7 +862,9 @@ def _clear_sim_state(self):
 
     def step(self, action: Union[None, np.ndarray, torch.Tensor, Dict]):
         """
-        Take a step through the environment with an action
+        Take a step through the environment with an action. Actions are automatically clipped to the action space.
+
+        If ``action`` is None, the environment will proceed forward in time without sending any actions/control signals to the agent
         """
         action = self._step_action(action)
         self._elapsed_steps += 1
@@ -921,7 +953,7 @@ def evaluate(self) -> dict:
         """
         return dict()
 
-    def get_info(self):
+    def get_info(self) -> dict:
         """
         Get info about the current environment state, include elapsed steps and evaluation information
         """
@@ -959,7 +991,7 @@ def _setup_scene(self):
         The function should be called in reset(). Called by `self._reconfigure`"""
         self._set_scene_config()
         if self._sim_device.is_cuda():
-            self.physx_system = physx.PhysxGpuSystem(device=self._sim_device)
+            physx_system = physx.PhysxGpuSystem(device=self._sim_device)
             # Create the scenes in a square grid
             sub_scenes = []
             scene_grid_length = int(np.ceil(np.sqrt(self.num_envs)))
@@ -969,9 +1001,9 @@ def _setup_scene(self):
                     scene_idx // scene_grid_length - scene_grid_length // 2,
                 )
                 scene = sapien.Scene(
-                    systems=[self.physx_system, sapien.render.RenderSystem(self._render_device)]
+                    systems=[physx_system, sapien.render.RenderSystem(self._render_device)]
                 )
-                self.physx_system.set_scene_offset(
+                physx_system.set_scene_offset(
                     scene,
                     [
                         scene_x * self.sim_config.spacing,
@@ -981,9 +1013,9 @@ def _setup_scene(self):
                 )
                 sub_scenes.append(scene)
         else:
-            self.physx_system = physx.PhysxCpuSystem()
+            physx_system = physx.PhysxCpuSystem()
             sub_scenes = [
-                sapien.Scene([self.physx_system, sapien.render.RenderSystem(self._render_device)])
+                sapien.Scene([physx_system, sapien.render.RenderSystem(self._render_device)])
             ]
         # create a "global" scene object that users can work with that is linked with all other scenes created
         self.scene = ManiSkillScene(
@@ -992,7 +1024,7 @@ def _setup_scene(self):
             device=self.device,
             parallel_in_single_scene=self._parallel_in_single_scene
         )
-        self.physx_system.timestep = 1.0 / self._sim_freq
+        self.scene.px.timestep = 1.0 / self._sim_freq
 
     def _clear(self):
         """Clear the simulation scene instance and other buffers.
@@ -1106,13 +1138,14 @@ def _setup_viewer(self):
             )
 
     def render_human(self):
+        """render the environment by opening a GUI viewer. This also returns the viewer object. Any objects registered in the _hidden_objects list will be shown"""
         for obj in self._hidden_objects:
             obj.show_visual()
         if self._viewer is None:
             self._viewer = create_viewer(self._viewer_camera_config)
             self._setup_viewer()
         if physx.is_gpu_enabled() and self.scene._gpu_sim_initialized:
-            self.physx_system.sync_poses_gpu_to_cpu()
+            self.scene.px.sync_poses_gpu_to_cpu()
         self._viewer.render()
         for obj in self._hidden_objects:
             obj.hide_visual()
@@ -1121,12 +1154,12 @@ def render_human(self):
     def render_rgb_array(self, camera_name: str = None):
         """Returns an RGB array / image of size (num_envs, H, W, 3) of the current state of the environment.
         This is captured by any of the registered human render cameras. If a camera_name is given, only data from that camera is returned.
-        Otherwise all camera data is captured and returned as a single batched image"""
+        Otherwise all camera data is captured and returned as a single batched image. Any objects registered in the _hidden_objects list will be shown"""
         for obj in self._hidden_objects:
             obj.show_visual()
         self.scene.update_render()
         images = []
-        render_images = self.scene.get_human_render_camera_images()
+        render_images = self.scene.get_human_render_camera_images(camera_name)
         for image in render_images.values():
             images.append(image)
         if len(images) == 0:
@@ -1139,7 +1172,8 @@ def render_rgb_array(self, camera_name: str = None):
 
     def render_sensors(self):
         """
-        Renders all sensors that the agent can use and see and displays them
+        Renders all sensors that the agent can use and see and displays them in a human readable image format.
+        Any objects registered in the _hidden_objects list will not be shown
         """
         images = []
         sensor_images = self.get_sensor_images()
@@ -1155,7 +1189,8 @@ def render_all(self):
             obj.show_visual()
         self.scene.update_render()
         render_images = self.scene.get_human_render_camera_images()
-
+        for obj in self._hidden_objects:
+            obj.hide_visual()
         sensor_images = self.get_sensor_images()
         for image in render_images.values():
             for img in image.values():
@@ -1167,11 +1202,13 @@ def render_all(self):
 
     def render(self):
         """
-        Either opens a viewer if render_mode is "human", or returns an array that you can use to save videos.
+        Either opens a viewer if ``self.render_mode`` is "human", or returns an array that you can use to save videos.
+
+        If ``self.render_mode`` is "rgb_array", usually a higher quality image is rendered for the purpose of viewing only.
 
-        render_mode is "rgb_array", usually a higher quality image is rendered for the purpose of viewing only.
+        If ``self.render_mode`` is "sensors", all visual observations the agent can see is provided
 
-        if render_mode is "sensors", all visual observations the agent can see is provided
+        If ``self.render_mode`` is "all", this is then a combination of "rgb_array" and "sensors"
         """
         if self.render_mode is None:
             raise RuntimeError("render_mode is not set.")
@@ -1220,6 +1257,7 @@ def render(self):
 
     # Printing metrics/info
     def print_sim_details(self):
+        """Debug tool to call to simply print a bunch of details about the running environment, including the task ID, number of environments, sim backend, etc."""
         sensor_settings_str = []
         for uid, cam in self._sensors.items():
             if isinstance(cam, Camera):
diff --git a/mani_skill/envs/scene.py b/mani_skill/envs/scene.py
index f179ce456..daed8f391 100644
--- a/mani_skill/envs/scene.py
+++ b/mani_skill/envs/scene.py
@@ -96,6 +96,7 @@ def __init__(
     # -------------------------------------------------------------------------- #
     @property
     def timestep(self):
+        """The current simulation timestep"""
         return self.px.timestep
 
     @timestep.setter
@@ -103,22 +104,27 @@ def timestep(self, timestep):
         self.px.timestep = timestep
 
     def set_timestep(self, timestep):
+        """Sets the current simulation timestep"""
         self.timestep = timestep
 
     def get_timestep(self):
+        """Returns the current simulation timestep"""
         return self.timestep
 
     def create_actor_builder(self):
+        """Creates an ActorBuilder object that can be used to build actors in this scene"""
         from ..utils.building.actor_builder import ActorBuilder
 
         return ActorBuilder().set_scene(self)
 
     def create_articulation_builder(self):
+        """Creates an ArticulationBuilder object that can be used to build articulations in this scene"""
         from ..utils.building.articulation_builder import ArticulationBuilder
 
         return ArticulationBuilder().set_scene(self)
 
     def create_urdf_loader(self):
+        """Creates a URDFLoader object that can be used to load URDF files into this scene"""
         from ..utils.building.urdf_loader import URDFLoader
 
         loader = URDFLoader()
@@ -126,26 +132,30 @@ def create_urdf_loader(self):
         return loader
 
     def create_mjcf_loader(self):
+        """Creates a MJCFLoader object that can be used to load MJCF files into this scene"""
         from ..utils.building.mjcf_loader import MJCFLoader
 
         loader = MJCFLoader()
         loader.set_scene(self)
         return loader
 
-    def create_physical_material(
-        self, static_friction: float, dynamic_friction: float, restitution: float
-    ):
-        return physx.PhysxMaterial(static_friction, dynamic_friction, restitution)
+    # def create_physical_material(
+    #     self, static_friction: float, dynamic_friction: float, restitution: float
+    # ):
+    #     return physx.PhysxMaterial(static_friction, dynamic_friction, restitution)
 
-    def remove_actor(self, actor):
+    def remove_actor(self, actor: Actor):
+        """Removes an actor from the scene. Only works in CPU simulation."""
         if physx.is_gpu_enabled():
             raise NotImplementedError(
                 "Cannot remove actors after creating them in GPU sim at the moment"
             )
         else:
-            self.sub_scenes[0].remove_entity(actor)
+            self.sub_scenes[0].remove_entity(actor._objs[0].entity)
+            self.actors.pop(actor.name)
 
     def remove_articulation(self, articulation: Articulation):
+        """Removes an articulation from the scene. Only works in CPU simulation."""
         if physx.is_gpu_enabled():
             raise NotImplementedError(
                 "Cannot remove articulations after creating them in GPU sim at the moment"
@@ -154,6 +164,7 @@ def remove_articulation(self, articulation: Articulation):
             entities = [l.entity for l in articulation._objs[0].links]
             for e in entities:
                 self.sub_scenes[0].remove_entity(e)
+            self.articulations.pop(articulation.name)
 
     def add_camera(
         self,
diff --git a/mani_skill/sensors/camera.py b/mani_skill/sensors/camera.py
index daed95936..1721989ed 100644
--- a/mani_skill/sensors/camera.py
+++ b/mani_skill/sensors/camera.py
@@ -2,7 +2,7 @@
 
 import copy
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Union
 
 import numpy as np
 import sapien
@@ -36,25 +36,23 @@ class CameraConfig(BaseSensorConfig):
     pose: Pose
     """Pose of the camera"""
     width: int
-    """width (int): width of the camera"""
+    """width of the camera"""
     height: int
-    """height (int): height of the camera"""
+    """height of the camera"""
     fov: float = None
     """The field of view of the camera. Either fov or intrinsic must be given"""
     near: float = 0.01
-    """near (float): near plane of the camera"""
+    """near plane of the camera"""
     far: float = 100
-    """far (float): far plane of the camera"""
+    """far plane of the camera"""
     intrinsic: Array = None
     """intrinsics matrix of the camera. Either fov or intrinsic must be given"""
-    entity_uid: str = None
-    """entity_uid (str, optional): unique id of the entity to mount the camera. Defaults to None."""
+    entity_uid: Optional[str] = None
+    """unique id of the entity to mount the camera. Defaults to None. Only used by agent classes that want to define mounted cameras."""
     mount: Union[Actor, Link] = None
     """the Actor or Link to mount the camera on top of. This means the global pose of the mounted camera is now mount.pose * local_pose"""
-    texture_names: Optional[Sequence[str]] = None
-    """texture_names (Sequence[str], optional): texture names to render."""
     shader_pack: Optional[str] = "minimal"
-    """The shader to use for rendering. Defaults to "minimal" which is the fastest rendering system with minimal GPU memory usage. There is also `default` and `rt`."""
+    """The shader to use for rendering. Defaults to "minimal" which is the fastest rendering system with minimal GPU memory usage. There is also ``default`` and ``rt``."""
     shader_config: Optional[ShaderConfig] = None
     """The shader config to use for rendering. If None, the shader_pack will be used to search amongst prebuilt shader configs to create a ShaderConfig."""
 
diff --git a/mani_skill/utils/common.py b/mani_skill/utils/common.py
index e384c851e..b485877e4 100644
--- a/mani_skill/utils/common.py
+++ b/mani_skill/utils/common.py
@@ -124,9 +124,7 @@ def index_dict_array(x1, idx: Union[int, slice], inplace=True):
 
 
 # TODO (stao): this code can be simplified
-def to_tensor(
-    array: Union[torch.Tensor, np.array, Sequence], device: Optional[Device] = None
-):
+def to_tensor(array: Array, device: Optional[Device] = None):
     """
     Maps any given sequence to a torch tensor on the CPU/GPU. If physx gpu is not enabled then we use CPU, otherwise GPU, unless specified
     by the device argument
@@ -179,7 +177,7 @@ def to_tensor(
             return ret.to(device)
 
 
-def to_cpu_tensor(array: Union[torch.Tensor, np.array, Sequence]):
+def to_cpu_tensor(array: Array):
     """
     Maps any given sequence to a torch tensor on the CPU.
     """
diff --git a/mani_skill/utils/structs/pose.py b/mani_skill/utils/structs/pose.py
index 84e13aec0..53196a476 100644
--- a/mani_skill/utils/structs/pose.py
+++ b/mani_skill/utils/structs/pose.py
@@ -37,36 +37,49 @@ class Pose:
     As a result pose.p and pose.q will return shapes (N, 3) and (N, 4) respectively for N poses being stored. pose.raw_pose stores all the pose data as a single
     2D array of shape (N, 7).
 
-    All sapien.Pose API are re-implemented in batch mode here to support GPU simulation. E.g. pose multiplication and inverse with `pose_1.inv() * pose_2`,
-    or creating transformation matrices with `pose_1.to_transformation_matrix()` are suppported they same way they are in sapien.Pose.
+    All sapien.Pose API are re-implemented in batch mode here to support GPU simulation. E.g. pose multiplication and inverse with ``pose_1.inv() * pose_2``,
+    or creating transformation matrices with ``pose_1.to_transformation_matrix()`` are suppported they same way they are in sapien.Pose.
 
-    ## Pose Creation
+    Pose Creation
+    -------------
 
-    To create a batched pose with a given position `p` and/or quaternion `q`, you run
+    To create a batched pose with a given position ``p`` and/or quaternion ``q``, you run:
 
-    ```
-    pose = Pose.create_from_pq(p=p, q=q)
-    ```
-    p and q can be a torch tensor, numpy array, and/or list, or None.
+    .. code-block:: python
 
-    If p or q have only 1 value/not batched, then we automatically repeat the value to the batch size of the other given value.
-    For example, if p has a batch dimension of size > 1, and q has a batch dimension of size 1 or is a flat list, then the
-    code automatically repeats the q value to the batch size of p. Likewise in the reverse direction the same repeating occurs.
+        pose = Pose.create_from_pq(p=p, q=q)
 
-    If p and q have the same batch size, they are stored as so.
+    ``p`` and ``q`` can be a torch tensor, numpy array, and/or list, or None.
 
-    If p and q have no batch dimensions, one is automatically added (e.g. p having shape (3, ) now becomes (1, 3))
+    If ``p`` or ``q`` have only 1 value/not batched, then we automatically repeat the value to the batch size of the other given value.
+    For example, if ``p`` has a batch dimension of size > 1, and ``q`` has a batch dimension of size 1 or is a flat list, then the
+    code automatically repeats the ``q`` value to the batch size of ``p``. Likewise in the reverse direction the same repeating occurs.
 
-    If p is None, it is auto filled with zeros
+    If ``p`` and ``q`` have the same batch size, they are stored as so.
 
-    If q is None, it is auto filled with the [1, 0, 0, 0] quaternion.
+    If ``p`` and ``q`` have no batch dimensions, one is automatically added (e.g. ``p`` having shape (3,) now becomes (1, 3))
 
-    If you have a sapien.Pose, another Pose object, or a raw pose tensor of shape (N, 7) or (7, ) called `x`, you can create this Pose object with
-    pose = Pose.create(x)
+    If ``p`` is None, it is auto filled with zeros.
 
-    If you want a sapien.Pose object instead of this batched Pose, you can do pose.sp to get the sapien.Pose version (which is not batched). Note that
+    If ``q`` is None, it is auto filled with the [1, 0, 0, 0] quaternion.
+
+    If you have a sapien.Pose, another Pose object, or a raw pose tensor of shape (N, 7) or (7,) called ``x``, you can create this Pose object with:
+
+    .. code-block:: python
+
+        pose = Pose.create(x)
+
+    If you want a sapien.Pose object instead of this batched Pose, you can do ``pose.sp`` to get the sapien.Pose version (which is not batched). Note that
     this is only permitted if this Pose has a batch size of 1.
 
+    Pose Indexing
+    -------------
+
+    You can index into a Pose object like numpy/torch arrays to get a new Pose object with the indexed data.
+
+    For example if ``pose`` has a batch size of 4, then ``pose[0]`` will be a Pose object with batch size of 1, and
+    ``pose[1:3]`` will be a Pose object with batch size of 2.
+
     """
 
     raw_pose: torch.Tensor
@@ -75,6 +88,7 @@ class Pose:
     def create_from_pq(
         cls, p: torch.Tensor = None, q: torch.Tensor = None, device: Device = None
     ):
+        """Creates a Pose object from a given position ``p`` and/or quaternion ``q``"""
         if p is None:
             p = torch.zeros((1, 3), device=device)
         if q is None:
@@ -98,6 +112,7 @@ def create(
         pose: Union[torch.Tensor, sapien.Pose, List[sapien.Pose], "Pose"],
         device: Optional[Device] = None,
     ) -> "Pose":
+        """Creates a Pose object from a given ``pose``, which can be a torch tensor, sapien.Pose, list of sapien.Pose, or Pose"""
         if isinstance(pose, sapien.Pose):
             raw_pose = torch.hstack(
                 [
@@ -134,14 +149,17 @@ def __len__(self):
         return len(self.raw_pose)
 
     @property
-    def shape(self):
+    def shape(self) -> torch.Size:
+        """Shape of the Pose object"""
         return self.raw_pose.shape
 
     @property
-    def device(self):
+    def device(self) -> Device:
+        """Torch Device the Pose object is on"""
         return self.raw_pose.device
 
     def to(self, device: Device):
+        """Move the Pose object to a different device"""
         if self.raw_pose.device == device:
             return self
         return Pose.create(self.raw_pose.to(device))
@@ -173,13 +191,16 @@ def __mul__(self, arg0: Union["Pose", sapien.Pose]) -> "Pose":
     # def __repr__(self) -> str: ...
     # def __setstate__(self, arg0: tuple) -> None: ...
     def get_p(self):
+        """Returns self.p, the position"""
         return self.p
 
     def get_q(self):
+        """Returns self.q, the quaternion"""
         return self.q
 
     # def get_rpy(self) -> numpy.ndarray[numpy.float32, _Shape, _Shape[3]]: ...
-    def inv(self) -> "Pose":
+    def inv(self):
+        """Returns the inverse of this pose"""
         inverted_raw_pose = self.raw_pose.clone()
         inverted_raw_pose[..., 4:] = -inverted_raw_pose[..., 4:]
         new_p = quaternion_apply(inverted_raw_pose[..., 3:], -self.p)
@@ -187,13 +208,16 @@ def inv(self) -> "Pose":
         return Pose.create(inverted_raw_pose)
 
     def set_p(self, p: torch.Tensor) -> None:
+        """Sets the position of this pose"""
         self.p = p
 
     def set_q(self, q: torch.Tensor) -> None:
+        """Sets the quaternion of this pose"""
         self.q = q
 
     # def set_rpy(self, arg0: numpy.ndarray[numpy.float32, _Shape, _Shape[3]]) -> None: ...
     def to_transformation_matrix(self):
+        """Returns the (N, 4, 4) shaped transformation matrix equivalent to this pose"""
         b = self.raw_pose.shape[0]
         mat = torch.zeros((b, 4, 4), device=self.raw_pose.device)
         mat[..., :3, :3] = quaternion_to_matrix(self.q)
@@ -204,12 +228,13 @@ def to_transformation_matrix(self):
     @property
     def sp(self):
         """
-        Returns the equivalent sapien pose
+        Returns the equivalent sapien pose. Note that this is only permitted if this Pose has a batch size of 1.
         """
         return to_sapien_pose(self)
 
     @property
     def p(self):
+        """The position of this pose"""
         return self.raw_pose[..., :3]
 
     @p.setter
@@ -218,6 +243,7 @@ def p(self, arg1: torch.Tensor):
 
     @property
     def q(self):
+        """The quaternion of this pose"""
         return self.raw_pose[..., 3:]
 
     @q.setter
diff --git a/mani_skill/utils/structs/types.py b/mani_skill/utils/structs/types.py
index ab911f089..5e4709dd5 100644
--- a/mani_skill/utils/structs/types.py
+++ b/mani_skill/utils/structs/types.py
@@ -80,6 +80,8 @@ def dict(self):
 
 @dataclass
 class SimConfig:
+    """Simulation configurations for ManiSkill environments"""
+
     spacing: float = 5
     """Controls the spacing between parallel environments when simulating on GPU in meters. Increase this value
     if you expect objects in one parallel environment to impact objects within this spacing distance"""