From 22fb6bebbf84b4fbbd3bcbb24a5012a7fd2a651c Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Mon, 2 Sep 2024 15:50:08 +1000 Subject: [PATCH 01/10] Add faq from openff-toolkit --- devtools/conda-envs/rtd_env.yml | 42 +++--- source/_static/css/faq.css | 10 ++ source/conf.py | 11 ++ source/faq.md | 229 ++++++++++++++++++++++++++++++++ source/index.md | 3 +- 5 files changed, 273 insertions(+), 22 deletions(-) create mode 100644 source/_static/css/faq.css create mode 100644 source/faq.md diff --git a/devtools/conda-envs/rtd_env.yml b/devtools/conda-envs/rtd_env.yml index a498e489..01c8fb89 100644 --- a/devtools/conda-envs/rtd_env.yml +++ b/devtools/conda-envs/rtd_env.yml @@ -1,24 +1,24 @@ name: openff-toolkit-docs channels: - - conda-forge + - conda-forge dependencies: - - pip - # readthedocs dependencies - - sphinx>=5,<7 - - myst-parser>=1,<2 - # - myst-nb - - sphinx-notfound-page - - ipython >=8.8 - - sphinx-design - # Examples - - gitpython - - nbconvert - - nbformat - # Theme - - pip: - # Theme - - git+https://github.com/openforcefield/openff-sphinx-theme.git@main - # Lints - - sphinxawesome-codelinter - # Sphinx - - git+https://github.com/Yoshanuikabundi/MyST-NB.git@upgrade-to-1 + - pip + # readthedocs dependencies + - sphinx>=5,<7 + - myst-parser>=1,<2 + # - myst-nb + - sphinx-notfound-page + - ipython >=8.8 + - sphinx-design>=0.6.0 + # Examples + - gitpython + - nbconvert + - nbformat + # Theme + - pip: + # Theme + - git+https://github.com/openforcefield/openff-sphinx-theme.git@main + # Lints + - sphinxawesome-codelinter + # Sphinx + - git+https://github.com/Yoshanuikabundi/MyST-NB.git@upgrade-to-1 diff --git a/source/_static/css/faq.css b/source/_static/css/faq.css new file mode 100644 index 00000000..cd85a9e9 --- /dev/null +++ b/source/_static/css/faq.css @@ -0,0 +1,10 @@ +details.faq.sd-dropdown, +details.faq.sd-dropdown summary, +details.faq.sd-dropdown:not([open]) > .sd-card-header { + border: None; +} + +details.faq.sd-dropdown summary.sd-summary-title .sd-summary-state-marker { + min-width: 3em; + transform-origin: 25% 50%; +} diff --git a/source/conf.py b/source/conf.py index 2a6a916f..7faff4d9 100644 --- a/source/conf.py +++ b/source/conf.py @@ -80,6 +80,16 @@ "openff.nagl": ("https://docs.openforcefield.org/nagl/en/stable", None), } +sd_custom_directives = { + "faq-entry": { + "inherit": "dropdown", + "options": { + "animate": "fade-in-slide-down", + "class-container": "faq", + }, + } +} + # sphinx-notfound-page # https://github.com/readthedocs/sphinx-notfound-page # Renders a 404 page with absolute links @@ -132,6 +142,7 @@ html_css_files = [ "css/deflist-flowchart.css", "css/cookbook.css", + "css/faq.css", ] # List of patterns, relative to source directory, that match files and diff --git a/source/faq.md b/source/faq.md new file mode 100644 index 00000000..b54babc7 --- /dev/null +++ b/source/faq.md @@ -0,0 +1,229 @@ +# Frequently asked questions (FAQ) + +## Getting started + +:::::{faq-entry} What kinds of input files can I apply SMIRNOFF parameters to? + +SMIRNOFF force fields use direct chemical perception meaning that, unlike many molecular mechanics (MM) force fields, they apply parameters based on substructure searches acting directly on molecules. +This creates unique opportunities and allows them to encode a great deal of chemistry quite simply, but it also means that the *starting point* for parameter assignment must be well-defined chemically, giving not just the elements and connectivity for all of the atoms of all of the components of your system, but also providing the formal charges and bond orders. + +Specifically, to apply SMIRNOFF to a system, you must either: +1. Provide Open Force Field Toolkit [`Molecule`](openff.toolkit.topology.Molecule) objects corresponding to the components of your system, or +2. Provide an OpenMM [`Topology`](openff.toolkit.topology.Topology) which includes bond orders and thus can be converted to molecules corresponding to the components of your system + +Without this information, our direct chemical perception cannot be applied to your molecule, as it requires the chemical identity of the molecules in your system -- that is, bond order and formal charge as well as atoms and connectivity. +Unless you provide the full chemical identity in this sense, we must attempt to guess or infer the chemical identity of your molecules, which is a recipe for trouble. +Different molecules can have the same chemical graph but differ in bond order and formal charge, or different resonance structures may be treated rather differently by some force fields (e.g. `c1cc(ccc1c2cc[nH+]cc2)[O-]` vs `C1=CC(C=CC1=C2C=CNC=C2)=O`, where the central bond is rotatable in one resonance structure but not in the other) even though they have identical formal charge and connectivity (chemical graph). +A force field which uses the chemical identity of molecules to assign parameters needs to know the exact chemical identity of the molecule you are intending to parameterize. + +::::: + +:::::{faq-entry} Can I use an AMBER (or GROMACS) topology/coordinate file as a starting point for applying a SMIRNOFF force field? + +In a word, "no". + +Parameter files used by typical molecular dynamics simulation packages do not currently encode enough information to identify the molecules chemically present, or at least not without drawing inferences. +For example, one could take a structure file and infer bond orders based on bond lengths, or attempt to infer bond orders from force constants in a parameter file. +Such inference work is outside the scope of SMIRNOFF. + +::::: + +:::::{faq-entry} What about starting from a PDB file? + +PDB files do not in general provide the chemical identity of small molecules contained therein, and thus do not provide suitable starting points for applying SMIRNOFF to small molecules. +This is especially problematic for PDB files from X-ray crystallography which typically do not include proteins, making the problem even worse. +For our purposes here, however, we assume you begin with the coordinates of all atoms present and the full topology of your system. + +Given a PDB file of a hypothetical biomolecular system of interest containing a small molecule, there are several routes available to you for treating the small molecule present: +- Use a cheminformatics toolkit (see below) to infer bond orders +- Identify your ligand from a database; e.g. if it is in the Protein Data Bank (PDB), it will be present in the [Ligand Expo](http://ligand-expo.rcsb.org) meaning that it has a database entry and code you can use to look up its putative chemical identity +- Identify your ligand by name or SMILES string (or similar) from the literature or your collaborators + +::::: + +:::::{faq-entry} What about starting from an XYZ file? + +XYZ files generally only contain elements and positions, and are therefore similar in content to PDB files. See the above section "What about starting from a PDB file?" for more information. + +::::: + +:::::{faq-entry} What do you recommend as a starting point? + +For application of SMIRNOFF force fields, we recommend that you begin your work with formats which provide the chemical identity of your small molecule (including formal charge and bond order). +This means we recommend one of the following or equivalent: +- A `.sdf`, `.mol`, or `.mol2` file or files for the molecules comprising your system, with correct bond orders and formal charges. (Note: Do NOT generate this from a simulation package or tool which does not have access to bond order information; you may end up with a correct-seeming file, but the bond orders will be incorrect) +- Isomeric SMILES strings for the components of your system +- InChi strings for the components of your system +- Chemical Identity Registry numbers for the components of your system +- IUPAC names for the components of your system + +Essentially, anything which provides the full identity of what you want to simulate (including stereochemistry) should work, though it may require more or less work to get it into an acceptable format. + +::::: + +:::::{faq-entry} How can I transfer my prepared system to HPC resources for simulation? + +OpenFF recommends exporting a prepared `Interchange` to the target MD engine and using the MD engine's recommended method to transfer it to HPC resources. This way, no additional dependencies need to be installed on the HPC resource to use OpenFF tools during preparation. For most MD engines, simply transfer the files produced by the appropriate [`Interchange.to_*()`] methods. For OpenMM, create a `System` Python object with [`Interchange.to_openmm_system()`] or [`ForceField.create_openmm_system()`] and transfer it by [serializing to XML]. + +[`Interchange.to_*()`]: https://docs.openforcefield.org/projects/interchange/en/stable/_autosummary/openff.interchange.Interchange.html +[`Interchange.to_openmm_system()`]: https://docs.openforcefield.org/projects/interchange/en/stable/_autosummary/openff.interchange.Interchange.html#openff.interchange.Interchange.to_openmm_system +[`ForceField.create_openmm_system()`]: https://docs.openforcefield.org/projects/toolkit/en/stable/api/generated/openff.toolkit.typing.engines.smirnoff.ForceField.html#openff.toolkit.typing.engines.smirnoff.ForceField.create_openmm_system +[serializing to XML]: https://openmm.github.io/openmm-cookbook/latest/notebooks/cookbook/Saving%20Systems%20to%20XML%20Files.html +::::: + +## Errors and performance issues + +:::::{faq-entry} I'm getting stereochemistry errors when loading a molecule from a SMILES string. + +By default, the OpenFF Toolkit throws an error if a molecule with undefined stereochemistry is loaded. This is because the stereochemistry of a molecule may affect its partial charges, and assigning parameters using [direct chemical perception](https://pubs.acs.org/doi/pdf/10.1021/acs.jctc.8b00640) may require knowing the stereochemistry of chiral centers. In addition, coordinates generated by the Toolkit for undefined chiral centers may have any combination of stereochemistries; the toolkit makes no guarantees about consistency, uniformity, or randomness. Note that the main-line OpenFF force fields currently use a stereochemistry-dependent charge generation method, but do not include any other stereospecific parameters. + +This behavior is in line with OpenFF's general attitude of requiring users to explicitly acknowledge actions that may cause silent errors later on. If you're confident a `Molecule` with unassigned stereochemistry is acceptable, pass `allow_undefined_stereo=True` to molecule loading methods like [Molecule.from_smiles](openff.toolkit.topology.Molecule.from_smiles) to downgrade the exception to a warning. For an example, see the "SMILES without stereochemistry" section in the [Molecule cookbook](smiles_no_stereochemistry). Where possible, our parameter assignment infrastructure will gracefully handle molecules with undefined stereochemistry that are loaded this way, though they will be missing any stereospecific parameters. + +::::: + +:::::{faq-entry} Parameterizing my system, which contains a large molecule, is taking forever. What's wrong? + +The mainline OpenFF force fields use AM1-BCC to assign partial charges (via the `` tag in the OFFXML file). This method unfortunately scales poorly with the size of a molecule and ligands roughly 100 atoms (about 40 heavy atoms) or larger may take so long (i.e. 10 minutes or more) that it seems like your code is simply hanging indefinitely. If you have an OpenEye license and OpenEye Toolkits [installed](installation/openeye), the OpenFF Toolkit will instead use `quacpac`, which can offer better performance on large molecules. Otherwise, it uses AmberTools' `sqm`, which is free to use. + +In the future, the use of AM1-BCC in OpenFF force fields may be replaced with method(s) that perform better and scale better with molecule size, but (as of April 2022) these are still in an experimental phase. + +::::: + +## Installation issues + +:::::{faq-entry} I'm having troubles installing the OpenFF Toolkit on my Apple Silicon Mac. + +As of August 2022, some upstreams (at least AmberTools, possibly more) are not built on `osx-arm64`, so installing the OpenFF stack is only possible with [Rosetta]. See the [platform support] section of the installation documentation for more. + +(Keywords `osx-arm64`, M1 Mac, M2 Mac) + +[Rosetta]: https://support.apple.com/en-au/HT211861 +[platform support]: inv:#install_arm + +::::: + +:::::{faq-entry} My mamba/conda installation of the toolkit doesn't appear to work. What should I try next? + +We recommend that you install the toolkit in a fresh environment, explicitly passing the channels to be used, in-order: + +```shell +mamba create -n -c conda-forge openff-toolkit +mamba activate +``` + +Installing into a new environment avoids forcing mamba to satisfy the dependencies of both the toolkit and all existing packages in that environment. +Taking the approach that conda/mamba environments are generally disposable, even ephemeral, minimizes the chances for hard-to-diagnose dependency issues. + +::::: + +:::::{faq-entry} My mamba/conda installation of the toolkit STILL doesn't appear to work. + +Many of our users encounter issues that are ultimately due to their terminal finding a different `conda` at higher priority in their `PATH` than the `conda` deployment where OpenFF is installed. To fix this, find the conda deployment where OpenFF is installed. Then, if that folder is something like `~/miniconda3`, run in the terminal: + +```shell +source ~/miniconda3/etc/profile.d/conda.sh +``` + +and then try rerunning and/or reinstalling the Toolkit. + +::::: + +## Under the hood + +:::::{faq-entry} How are partial charges assigned in a SMIRNOFF force field? + +There are [many charge methods](https://openforcefield.github.io/standards/standards/smirnoff/#partial-charge-and-electrostatics-models) supported by the SMIRNOFF specification. With the exception of water, mainline OpenFF force fields only use AM1-BCC (through `ToolkitAM1BCC`) to assign partial charges. (A future biopolymer force field will likely use library charges for standard residues.) + +If OpenEye Toolkits are installed and licensed, the ELF10 variant of AM1-BCC is used. OpenEye's Quacpac (`oequacpac.OEAM1BCCELF10Charges`) is used to generate partial charges. + +Otherwise, RDKit is used to generate a conformer which is passed to AmberTool's `sqm` (with `-c bcc`). + +Note that, because of differences with the ELF10 variant and other subtle differences between OpenEye Toolkits and RDKit/AmberTools, **assigned partial charges can be expected to differ** based on the available toolkit(s). These numerical differences are often minor but in some molecules or use cases can be significant. + +A future charge method may use [NAGL](https://github.com/openforcefield/openff-nagl) to assign partial charges from a graph-convolutional neural network instead of an underlying semi-empirical method. This approach is anticipated to be faster, more scalable, and more consistent than current approaches. As of March 2024, this is under development and not released for general use. + +::::: + +:::::{faq-entry} I understand the risks and want to perform bond and formal charge inference anyway + +If you are unable to provide a molecule in the formats recommended above and want to attempt to infer the bond orders and atomic formal charges, there are tools available elsewhere that can provide guesses for this problem. These tools are not perfect, and the inference problem itself is poorly defined, so you should review each output closely (see our [Core Concepts](users/concepts) for an explanation of what information is needed to construct an OpenFF Molecule). Some tools we know of include: + +- the OpenEye Toolkit's [`OEPerceiveBondOrders`](https://docs.eyesopen.com/toolkits/python/oechemtk/OEChemFunctions/OEPerceiveBondOrders.html) functionality +- [MDAnalysis' RDKit converter](https://docs.mdanalysis.org/stable/documentation_pages/converters/RDKit.html?highlight=rdkit#module-MDAnalysis.converters.RDKit), with an [example here](https://github.com/openforcefield/openff-toolkit/issues/1126#issuecomment-969712195) +- the Jensen group's [xyz2mol program](https://github.com/jensengroup/xyz2mol/) + +::::: + +:::::{faq-entry} The partial charges generated by the toolkit don't seem to depend on the molecule's conformation! Is this a bug? + +No! This is the intended behavior. The force field parameters of a molecule should be independent of both their chemical environment and conformation so that they can be used and compared across different contexts. When applying AM1BCC partial charges, the toolkit achieves a deterministic output by ignoring the input conformation and producing several new conformations for the same molecule. Partial charges are then computed based on these conformations. This behavior can be controlled with the `use_conformers` argument to [Molecule.assign_partial_charges()](openff.toolkit.topology.Molecule.assign_partial_charges). + +::::: + +## SMIRNOFF and force fields + +:::::{faq-entry} How can I distribute my own force fields in SMIRNOFF format? + +We support conda data packages for distribution of force fields in `.offxml` format! Just add the relevant entry point to `setup.py` and distribute via a conda (or PyPI) package: + +```python +entry_points={ + 'openforcefield.smirnoff_forcefield_directory' : [ + 'my_new_force_field_paths = my_package:get_my_new_force_field_paths', + ], +} +``` + +Where `get_my_new_force_field_paths` is a function in the `my_package` module providing a list of strings holding the paths to the directories to search. You should also rename `my_new_force_field_paths` to suit your force field. See [`openff-forcefields`](https://github.com/openforcefield/openff-forcefields/blob/ed0d904/setup.py#L57-L61) for an example. + +::::: + +:::::{faq-entry} What does "unconstrained" mean in a force field name? + +Each release of an [OpenFF force field](https://github.com/openforcefield/openff-forcefields/tree/main/openforcefields/offxml) has two associated `.offxml` files: one unadorned (for example, `openff-2.0.0.offxml`) and one labeled "unconstrained" (`openff_unconstrained-2.0.0.offxml`). This reflects the presence or absence of holonomic constraints on hydrogen-involving bonds in the force field specification. + +Typically, OpenFF force fields treat bonds with a harmonic potential according to Hooke's law. With this treatment, bonds involving hydrogen atoms have a much higher vibration frequency than any other part of a typical biochemical system. By constraining these bonds to a fixed length, MD time steps can be increased past 1 fs, improving simulation performance. These bond vibrations are not structurally important to proteins so can usually be ignored. + +While we recommend hydrogen-involving bond constraints and a time step of 2 fs for ordinary use, some other specialist uses require a harmonic treatment. The unconstrained force fields are provided for these uses. + +Use the constrained force field: + - When running MD with a time step greater than 1 fs + +Use the unconstrained force field: + - When computing single point energy calculations or energy minimization + - When running MD with a time step of 1 fs (or less) + - When bond lengths may deviate from equilibrium + - When fitting a force field, both because many fitting techniques require continuity and because deviations from equilibrium bond length may be important + - Any other circumstance when forces or energies must be defined or continuous for any possible position of a hydrogen atom + +Starting with v2.0.0 (Sage), TIP3P water is included in OpenFF force fields. The geometry of TIP3P water is always constrained, even in the unconstrained force fields. + +::::: + +:::::{faq-entry} How do I add or remove constraints from my own force field? + +To make applying or removing bond constraints easy, constrained force fields released by OpenFF always include full bond parameters. Constraints on Hydrogen-involving bonds inherit their lengths from the harmonic parameters also included in the force field. To restore the harmonic treatment, simply remove the appropriate constraint entry from the force field. + +Hydrogen-involving bonds are constrained with a single constraint entry in a `.offxml` file: + +```xml + + + + +``` + +Adding or removing the inner ` Date: Tue, 10 Sep 2024 18:23:32 +1000 Subject: [PATCH 02/10] Add faq from openff-toolkit --- source/conf.py | 1 + source/faq.md | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/source/conf.py b/source/conf.py index 7faff4d9..a5ca3ce4 100644 --- a/source/conf.py +++ b/source/conf.py @@ -79,6 +79,7 @@ "openff.recharge": ("https://docs.openforcefield.org/recharge/en/stable", None), "openff.nagl": ("https://docs.openforcefield.org/nagl/en/stable", None), } +intersphinx_disabled_reftypes = ["*"] sd_custom_directives = { "faq-entry": { diff --git a/source/faq.md b/source/faq.md index b54babc7..e87349fb 100644 --- a/source/faq.md +++ b/source/faq.md @@ -2,6 +2,16 @@ ## Getting started +:::::{faq-entry} What do I need to know to get started? + +OpenFF tools follow a philosophy of failing with a descriptive error message +rather than trying to interpret intention from ambiguous information, so you +might find you have to provide more information than you're used to. For an +overview of how the ecosystem fits together, read [](modelling.md). Once +you're ready to start coding, check out [](install.md) and [](examples.md). + +::::: + :::::{faq-entry} What kinds of input files can I apply SMIRNOFF parameters to? SMIRNOFF force fields use direct chemical perception meaning that, unlike many molecular mechanics (MM) force fields, they apply parameters based on substructure searches acting directly on molecules. @@ -98,7 +108,7 @@ As of August 2022, some upstreams (at least AmberTools, possibly more) are not b (Keywords `osx-arm64`, M1 Mac, M2 Mac) [Rosetta]: https://support.apple.com/en-au/HT211861 -[platform support]: inv:#install_arm +[platform support]: install_arm ::::: From b7b7190b1fd6ca5d22fa89ae8808e9a1cb3f3840 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Tue, 10 Sep 2024 20:30:20 +1000 Subject: [PATCH 03/10] Take a stab at Matt's issues in openforcefield/openff-toolkit#1848 --- source/conf.py | 1 + source/faq.md | 102 ++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 81 insertions(+), 22 deletions(-) diff --git a/source/conf.py b/source/conf.py index a5ca3ce4..9f0c2c50 100644 --- a/source/conf.py +++ b/source/conf.py @@ -80,6 +80,7 @@ "openff.nagl": ("https://docs.openforcefield.org/nagl/en/stable", None), } intersphinx_disabled_reftypes = ["*"] +myst_heading_anchors = 2 sd_custom_directives = { "faq-entry": { diff --git a/source/faq.md b/source/faq.md index e87349fb..bb6be77e 100644 --- a/source/faq.md +++ b/source/faq.md @@ -1,14 +1,13 @@ -# Frequently asked questions (FAQ) +# Frequently Asked Questions (FAQ) -## Getting started +## Getting Started :::::{faq-entry} What do I need to know to get started? -OpenFF tools follow a philosophy of failing with a descriptive error message -rather than trying to interpret intention from ambiguous information, so you -might find you have to provide more information than you're used to. For an -overview of how the ecosystem fits together, read [](modelling.md). Once -you're ready to start coding, check out [](install.md) and [](examples.md). +OpenFF tools follow a philosophy of failing with a descriptive error message rather than trying to interpret intention from ambiguous information, so you +might find you have to provide more information than you're used to. +For an overview of how the ecosystem fits together, read [](modelling.md). +Once you're ready to start coding, check out [](install.md) and [](examples.md). ::::: @@ -18,37 +17,58 @@ SMIRNOFF force fields use direct chemical perception meaning that, unlike many m This creates unique opportunities and allows them to encode a great deal of chemistry quite simply, but it also means that the *starting point* for parameter assignment must be well-defined chemically, giving not just the elements and connectivity for all of the atoms of all of the components of your system, but also providing the formal charges and bond orders. Specifically, to apply SMIRNOFF to a system, you must either: -1. Provide Open Force Field Toolkit [`Molecule`](openff.toolkit.topology.Molecule) objects corresponding to the components of your system, or -2. Provide an OpenMM [`Topology`](openff.toolkit.topology.Topology) which includes bond orders and thus can be converted to molecules corresponding to the components of your system +1. Provide Open Force Field Toolkit [`Molecule`] objects corresponding to the components of your system, or +2. Provide an OpenMM [`Topology`] which includes bond orders and thus can be converted to molecules corresponding to the components of your system Without this information, our direct chemical perception cannot be applied to your molecule, as it requires the chemical identity of the molecules in your system -- that is, bond order and formal charge as well as atoms and connectivity. Unless you provide the full chemical identity in this sense, we must attempt to guess or infer the chemical identity of your molecules, which is a recipe for trouble. Different molecules can have the same chemical graph but differ in bond order and formal charge, or different resonance structures may be treated rather differently by some force fields (e.g. `c1cc(ccc1c2cc[nH+]cc2)[O-]` vs `C1=CC(C=CC1=C2C=CNC=C2)=O`, where the central bond is rotatable in one resonance structure but not in the other) even though they have identical formal charge and connectivity (chemical graph). A force field which uses the chemical identity of molecules to assign parameters needs to know the exact chemical identity of the molecule you are intending to parameterize. +[`Molecule`]: openff.toolkit.topology.Molecule +[`Topology`]: openff.toolkit.topology.Topology + ::::: -:::::{faq-entry} Can I use an AMBER (or GROMACS) topology/coordinate file as a starting point for applying a SMIRNOFF force field? +:::::{faq-entry} Can I use an Amber or GROMACS topology/coordinate file as a starting point for applying a SMIRNOFF force field? + +Amber and GROMACS topologies and coordinate files do not include enough explicit chemical information to apply a SMIRNOFF force field. +For example, bond orders are not present in either format; one could infer bond orders based on bond lengths, or attempt to infer bond orders from force constants, but such inference work would be error-prone and is outside the scope of SMIRNOFF. +PDB files that include all atoms in the model can be used in some cases (see next question). -In a word, "no". +Amber and GROMACS topology and coordinate files can be [experimentally loaded] by Interchange for export to other MD engines, but this does not require the chemical information needed to apply a SMIRNOFF force field. -Parameter files used by typical molecular dynamics simulation packages do not currently encode enough information to identify the molecules chemically present, or at least not without drawing inferences. -For example, one could take a structure file and infer bond orders based on bond lengths, or attempt to infer bond orders from force constants in a parameter file. -Such inference work is outside the scope of SMIRNOFF. +[experimentally loaded]: inv:openff.interchange#using/experimental + +::::: + +:::::{faq-entry} Can I use an Amber force field with SMIRNOFF ligands? + +Experimental support for this approach is available through Interchange. Briefly, the ligands are parametrized in the usual SMIRNOFF way to produce an Interchange, the Amber components are parametrized through OpenMM and then loaded into a second Interchange with [`Interchange.from_openmm()`], and then the two Interchanges are combined. + +[`Interchange.from_openmm()`]: openff.interchange.Interchange.from_openmm ::::: :::::{faq-entry} What about starting from a PDB file? -PDB files do not in general provide the chemical identity of small molecules contained therein, and thus do not provide suitable starting points for applying SMIRNOFF to small molecules. -This is especially problematic for PDB files from X-ray crystallography which typically do not include proteins, making the problem even worse. -For our purposes here, however, we assume you begin with the coordinates of all atoms present and the full topology of your system. +PDB files are a ubiquitous coordinate format, but the interpretation of the chemistry of a given PDB file is ambiguous in many ways. +Without a complete and accurate chemical description of the system, SMIRNOFF parameters cannot be applied. +When a few common biopolymers like peptides have the conventional atom and residue names and are not missing any atoms, they can be loaded unambiguously. +However, different software packages in common use make slightly different choices for these conventions. +In addition, many of the affordances provided by the format for disambiguation, like formal charges and CONECT records, are both not reliably produced by all software and have deficiencies that make chemical identification impossible. +This means while PDBs are great for providing the coordinates of a known system of atoms in a format that can be readily visualized, applying SMIRNOFF parameters to them is an active area of development. -Given a PDB file of a hypothetical biomolecular system of interest containing a small molecule, there are several routes available to you for treating the small molecule present: +To load a PDB file including appropriately named canonical peptides with all atoms present and a known list of non-protein elements, see the OpenFF Toolkit's [PDB Cookbook]. +This workflow is an active area of development and we are expanding the scope of what can be loaded as we settle on what is needed. + +Given a PDB file of a hypothetical biomolecular system of interest containing a small molecule, there are several routes available to you for identifying the small molecules present: - Use a cheminformatics toolkit (see below) to infer bond orders - Identify your ligand from a database; e.g. if it is in the Protein Data Bank (PDB), it will be present in the [Ligand Expo](http://ligand-expo.rcsb.org) meaning that it has a database entry and code you can use to look up its putative chemical identity - Identify your ligand by name or SMILES string (or similar) from the literature or your collaborators +[the PDB Cookbook]: inv:openff.toolkit#users/pdb_cookbook/index.ipynb + ::::: :::::{faq-entry} What about starting from an XYZ file? @@ -81,7 +101,45 @@ OpenFF recommends exporting a prepared `Interchange` to the target MD engine and [serializing to XML]: https://openmm.github.io/openmm-cookbook/latest/notebooks/cookbook/Saving%20Systems%20to%20XML%20Files.html ::::: -## Errors and performance issues +## Errors and Performance Issues + +:::::{faq-entry} Why does partial charge assignment fail during conformer generation, even though my molecule has conformers? + +Assigning partial charges with a quantum chemical method requires conformers, as they are an essential input to a quantum chemical calculation. +Because the charges assigned by a SMIRNOFF force field should be transferrable between systems, we default to generating our own set of conformers during charge assignment. +This requirement will become unnecessary for future SMIRNOFF force fields that use NAGL graph charges; see the [](#under-the-hood) section. + +To assign charges based on the provided conformer if conformer generation fails, first assign charges, then use the assigned charges during parametrization: + +```python +from openff.toolkit import ForceField, Topology, Molecule + +topology = ... +forcefield = ForceField(...) +problematic_molecule_indices = [...] + +for i in problematic_molecule_indices: + molecule = topology.molecule(i) + try: + molecule.assign_partial_charges( + partial_charge_method="am1bcc" + ) + except ValueError: + molecule.assign_partial_charges( + partial_charge_method="am1bcc", + use_conformers=molecule.conformers, + ) + +interchange = forcefield.create_interchange( + topology, + charge_from_molecules=[ + topology.molecule(i) + for i in problematic_molecule_indices + ] +) +``` + +::::: :::::{faq-entry} I'm getting stereochemistry errors when loading a molecule from a SMILES string. @@ -99,7 +157,7 @@ In the future, the use of AM1-BCC in OpenFF force fields may be replaced with me ::::: -## Installation issues +## Installation Issues :::::{faq-entry} I'm having troubles installing the OpenFF Toolkit on my Apple Silicon Mac. @@ -138,7 +196,7 @@ and then try rerunning and/or reinstalling the Toolkit. ::::: -## Under the hood +## Under the Hood :::::{faq-entry} How are partial charges assigned in a SMIRNOFF force field? @@ -170,7 +228,7 @@ No! This is the intended behavior. The force field parameters of a molecule shou ::::: -## SMIRNOFF and force fields +## SMIRNOFF Force Fields :::::{faq-entry} How can I distribute my own force fields in SMIRNOFF format? From 74956a5b0d7fa409673a8a994d88764f7bf8deeb Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Tue, 10 Sep 2024 20:42:56 +1000 Subject: [PATCH 04/10] Work around ACS forbidding link checking --- source/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/conf.py b/source/conf.py index 9f0c2c50..6f6c07f2 100644 --- a/source/conf.py +++ b/source/conf.py @@ -125,6 +125,9 @@ # Configure the linkcheck builder linkcheck_anchors = False # This generates lots of false positives +linkcheck_ignore = [ + r'https://pubs.acs.org/doi/' # ACS 403s the link checker. Thanks ACS. +] # Cookbook stuff import sys From 2644f03ebc6da5bbb530be5e47d5fe4c11326b47 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Tue, 10 Sep 2024 22:53:31 +1000 Subject: [PATCH 05/10] Fix broken code blocks in FAQ --- source/_ext/check_python_codeblocks.py | 48 ++++++++++++++++++++++++++ source/conf.py | 2 +- source/faq.md | 18 ++++------ 3 files changed, 56 insertions(+), 12 deletions(-) create mode 100644 source/_ext/check_python_codeblocks.py diff --git a/source/_ext/check_python_codeblocks.py b/source/_ext/check_python_codeblocks.py new file mode 100644 index 00000000..bb7efc8f --- /dev/null +++ b/source/_ext/check_python_codeblocks.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +""" +Conditionally provide some variables to a wrapped script. + +The purpose of this script is to provide the ability to quickly write short +snippets in the OpenFF documentation that do not require initialization of +common Toolkit objects while not inhibiting the quality of error messages from +more complete code examples. + +Code examples are used throughout the docs and testing that they do not raise +errors has caught many documentation errors already. The `codelinter` Sphinx +extension runs this script on all code blocks in this documentation project. +The script sets up some commonly used variables and imports so that short +snippets don't have to initialize them, and then sets up an import hook that +deletes the added names if `openff.toolkit` is imported in the code block. This +means that short snippets can assume some common variables are in scope, but +long code blocks intended to be self-sufficient can opt out of this name +pollution by importing the toolkit. +""" + +# Define the default namespace +from openff.toolkit import ForceField, Molecule, Topology + +molecule = Molecule.from_smiles("C123C(C1)(C2)C3") +topology = Topology.from_molecules([molecule]) +force_field = ForceField("openff-2.2.0.offxml") + +# Set the import hook +import sys +import builtins +_old_import = __import__ +_already_deleted = False +def __import__(name, *args, **kwargs): + """ + Clear above variables on any new import of the toolkit + """ + global _already_deleted + if name.startswith("openff.toolkit") and not _already_deleted: + global molecule, topology, force_field, ForceField, Molecule, Topology + del molecule, topology, force_field, ForceField, Molecule, Topology + _already_deleted = True + + return _old_import(name, *args, **kwargs) + +builtins.__import__ = __import__ + +# Execute the code block +exec(sys.stdin.read()) diff --git a/source/conf.py b/source/conf.py index 6f6c07f2..97e6e4cc 100644 --- a/source/conf.py +++ b/source/conf.py @@ -116,7 +116,7 @@ extensions.append("sphinxawesome.codelinter") codelinter_languages = { # Language: command to pass codeblock as stdin - "python": "python", + "python": "python source/_ext/check_python_codeblocks.py", } # Tell MyST-NB about codelinter builder nb_mime_priority_overrides = [ diff --git a/source/faq.md b/source/faq.md index bb6be77e..eb5d8835 100644 --- a/source/faq.md +++ b/source/faq.md @@ -114,9 +114,11 @@ To assign charges based on the provided conformer if conformer generation fails, ```python from openff.toolkit import ForceField, Topology, Molecule -topology = ... -forcefield = ForceField(...) -problematic_molecule_indices = [...] +topology = Topology.from_molecules([ + Molecule.from_smiles("C123C(C1)(C2)C3") +]) +force_field = ForceField("openff-2.2.0.offxml") +problematic_molecule_indices = [0] for i in problematic_molecule_indices: molecule = topology.molecule(i) @@ -130,7 +132,7 @@ for i in problematic_molecule_indices: use_conformers=molecule.conformers, ) -interchange = forcefield.create_interchange( +interchange = force_field.create_interchange( topology, charge_from_molecules=[ topology.molecule(i) @@ -285,13 +287,7 @@ Adding or removing the inner ` Date: Tue, 10 Sep 2024 23:01:36 +1000 Subject: [PATCH 06/10] Use an unconstrained force field in short snippets --- source/_ext/check_python_codeblocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/_ext/check_python_codeblocks.py b/source/_ext/check_python_codeblocks.py index bb7efc8f..dc1f33b1 100644 --- a/source/_ext/check_python_codeblocks.py +++ b/source/_ext/check_python_codeblocks.py @@ -23,7 +23,7 @@ molecule = Molecule.from_smiles("C123C(C1)(C2)C3") topology = Topology.from_molecules([molecule]) -force_field = ForceField("openff-2.2.0.offxml") +force_field = ForceField("openff_unconstrained-2.2.0.offxml") # Set the import hook import sys From c8cb2cf889d03a319df1f596296c40efbb2b4347 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Tue, 10 Sep 2024 23:20:57 +1000 Subject: [PATCH 07/10] Add faq entry about warnings --- source/_ext/check_python_codeblocks.py | 1 + source/faq.md | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/source/_ext/check_python_codeblocks.py b/source/_ext/check_python_codeblocks.py index dc1f33b1..904d8acf 100644 --- a/source/_ext/check_python_codeblocks.py +++ b/source/_ext/check_python_codeblocks.py @@ -19,6 +19,7 @@ """ # Define the default namespace +import openff.toolkit from openff.toolkit import ForceField, Molecule, Topology molecule = Molecule.from_smiles("C123C(C1)(C2)C3") diff --git a/source/faq.md b/source/faq.md index eb5d8835..c23eea25 100644 --- a/source/faq.md +++ b/source/faq.md @@ -159,6 +159,21 @@ In the future, the use of AM1-BCC in OpenFF force fields may be replaced with me ::::: +:::::{faq-entry} How can I silence warnings I'm expecting my code to generate? + +OpenFF libraries often issue warnings when they detect that the user might be doing something they don't intend. These warnings are largely borne out of bug reports from users, and we'd rather make sure new users understand our software, so they can get noisy for experienced developers. We use the Python [`warnings`] module from the standard library, so warnings can be filtered from a particular section of code like so: + +```python +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + category=openff.toolkit.utils.exceptions.AtomMappingWarning, + ) + + Molecule.from_smiles("[H:1][O:4][H:2]") +::::: + ## Installation Issues :::::{faq-entry} I'm having troubles installing the OpenFF Toolkit on my Apple Silicon Mac. From 5757987e0c1e861102dedd95fbdf28e143016fc3 Mon Sep 17 00:00:00 2001 From: "Josh A. Mitchell" Date: Wed, 11 Sep 2024 00:04:07 +1000 Subject: [PATCH 08/10] Update source/faq.md Co-authored-by: Matt Thompson --- source/faq.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/faq.md b/source/faq.md index c23eea25..181ad80a 100644 --- a/source/faq.md +++ b/source/faq.md @@ -36,7 +36,7 @@ Amber and GROMACS topologies and coordinate files do not include enough explicit For example, bond orders are not present in either format; one could infer bond orders based on bond lengths, or attempt to infer bond orders from force constants, but such inference work would be error-prone and is outside the scope of SMIRNOFF. PDB files that include all atoms in the model can be used in some cases (see next question). -Amber and GROMACS topology and coordinate files can be [experimentally loaded] by Interchange for export to other MD engines, but this does not require the chemical information needed to apply a SMIRNOFF force field. +Amber and GROMACS topology and coordinate files can be [experimentally loaded] by Interchange for export to other MD engines, but this does not include the chemical information needed to apply a SMIRNOFF force field. [experimentally loaded]: inv:openff.interchange#using/experimental From 8f83358cc51c5f55aa2d87208127c3394dba76c3 Mon Sep 17 00:00:00 2001 From: "Josh A. Mitchell" Date: Wed, 11 Sep 2024 00:08:39 +1000 Subject: [PATCH 09/10] Update source/faq.md --- source/faq.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/faq.md b/source/faq.md index 181ad80a..9a0c39c8 100644 --- a/source/faq.md +++ b/source/faq.md @@ -109,7 +109,7 @@ Assigning partial charges with a quantum chemical method requires conformers, as Because the charges assigned by a SMIRNOFF force field should be transferrable between systems, we default to generating our own set of conformers during charge assignment. This requirement will become unnecessary for future SMIRNOFF force fields that use NAGL graph charges; see the [](#under-the-hood) section. -To assign charges based on the provided conformer if conformer generation fails, first assign charges, then use the assigned charges during parametrization: +To assign charges based on the provided conformer if conformer generation fails, first assign charges using the existing conformer with the `use_conformers` argument, then use the assigned charges during parametrization with `charge_from_molecules`: ```python from openff.toolkit import ForceField, Topology, Molecule From 4daf451ad0ff7f1a5ae7eb47f918acf055699b79 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Wed, 11 Sep 2024 00:13:30 +1000 Subject: [PATCH 10/10] Add working instructions for deleting constraints --- source/_ext/check_python_codeblocks.py | 31 ++++++++++++++++++++++---- source/faq.md | 8 ++++++- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/source/_ext/check_python_codeblocks.py b/source/_ext/check_python_codeblocks.py index 904d8acf..159e536a 100644 --- a/source/_ext/check_python_codeblocks.py +++ b/source/_ext/check_python_codeblocks.py @@ -24,25 +24,48 @@ molecule = Molecule.from_smiles("C123C(C1)(C2)C3") topology = Topology.from_molecules([molecule]) -force_field = ForceField("openff_unconstrained-2.2.0.offxml") +force_field = ForceField("openff_unconstrained-2.2.0.offxml") +ff_unconstrained = force_field +ff_constrained = ForceField("openff-2.2.0.offxml") # Set the import hook -import sys import builtins +import sys + _old_import = __import__ _already_deleted = False + + def __import__(name, *args, **kwargs): """ Clear above variables on any new import of the toolkit """ global _already_deleted if name.startswith("openff.toolkit") and not _already_deleted: - global molecule, topology, force_field, ForceField, Molecule, Topology - del molecule, topology, force_field, ForceField, Molecule, Topology + global \ + molecule, \ + topology, \ + force_field, \ + ForceField, \ + Molecule, \ + Topology, \ + ff_constrained, \ + ff_unconstrained + del ( + molecule, + topology, + force_field, + ForceField, + Molecule, + Topology, + ff_constrained, + ff_unconstrained, + ) _already_deleted = True return _old_import(name, *args, **kwargs) + builtins.__import__ = __import__ # Execute the code block diff --git a/source/faq.md b/source/faq.md index 9a0c39c8..fa123610 100644 --- a/source/faq.md +++ b/source/faq.md @@ -301,8 +301,14 @@ Hydrogen-involving bonds are constrained with a single constraint entry in a `.o Adding or removing the inner `