From e9cccc2e8efb6c9671ab2b1c796c97220b9241f1 Mon Sep 17 00:00:00 2001 From: Shuming Liu Date: Fri, 14 Jun 2024 16:11:23 -0400 Subject: [PATCH] Make some small changes --- openabc/forcefields/cg_model.py | 15 ++++----- openabc/forcefields/moff_mrg_model.py | 2 ++ .../parameters/mixin_3spn2_config_parser.py | 6 ++-- .../forcefields/parsers/dna_3spn2_parser.py | 8 ++--- openabc/forcefields/parsers/hps_parser.py | 6 ++-- openabc/forcefields/parsers/moff_parser.py | 13 ++++---- openabc/forcefields/parsers/mpipi_parsers.py | 12 +++---- openabc/forcefields/parsers/mrg_parser.py | 6 ++-- openabc/forcefields/parsers/smog_parser.py | 7 ++-- openabc/lib/__init__.py | 1 + openabc/lib/ion_lib.py | 33 +++++++++++++++++++ openabc/utils/helper_functions.py | 2 +- openabc/utils/replica_exchange.py | 2 +- 13 files changed, 73 insertions(+), 40 deletions(-) create mode 100644 openabc/lib/ion_lib.py diff --git a/openabc/forcefields/cg_model.py b/openabc/forcefields/cg_model.py index 7a9acb8..5193ca7 100644 --- a/openabc/forcefields/cg_model.py +++ b/openabc/forcefields/cg_model.py @@ -11,9 +11,10 @@ try: from openmmplumed import PlumedForce except ImportError: - print('Please install openmmplumed to facilitate plumed functions.') + PlumedForce = None +import warnings from openabc.forcefields.rigid import createRigidBodies -import openabc.utils.helper_functions as helper_functions +from openabc.utils import write_pdb import sys import os @@ -91,7 +92,7 @@ def atoms_to_pdb(self, cg_pdb, reset_serial=True): atoms.loc[:, 'charge'] = '' if reset_serial: atoms['serial'] = list(range(1, len(atoms.index) + 1)) - helper_functions.write_pdb(atoms, cg_pdb) + write_pdb(atoms, cg_pdb) def create_system(self, top, use_pbc=True, box_a=500, box_b=500, box_c=500, remove_cmmotion=True): """ @@ -194,11 +195,9 @@ def add_plumed(self, plumed_script_path): """ with open(plumed_script_path, 'r') as input_reader: plumed_script = input_reader.read() - try: - force = PlumedForce(plumed_script) - self.system.addForce(force) - except NameError: - print('PlumedForce is not loaded.') + assert PlumedForce is not None, 'openmmplumed is not installed.' + force = PlumedForce(plumed_script) + self.system.addForce(force) def save_system(self, system_xml='system.xml'): """ diff --git a/openabc/forcefields/moff_mrg_model.py b/openabc/forcefields/moff_mrg_model.py index d0d931d..0058b0e 100644 --- a/openabc/forcefields/moff_mrg_model.py +++ b/openabc/forcefields/moff_mrg_model.py @@ -78,6 +78,8 @@ def add_protein_angles(self, threshold=130*np.pi/180, clip=False, force_group=2, any_theta0_beyond_threshold = True if clip and any_theta0_beyond_threshold: print(f'Decrease all the theta0 values larger than {threshold} to {threshold}.') + # set threshold as np.float32(threshold) to avoid warnings + threshold = np.float32(threshold) self.protein_angles.loc[self.protein_angles['theta0'] > threshold, 'theta0'] = threshold force = functional_terms.harmonic_angle_term(self.protein_angles, self.use_pbc, force_group) self.system.addForce(force) diff --git a/openabc/forcefields/parameters/mixin_3spn2_config_parser.py b/openabc/forcefields/parameters/mixin_3spn2_config_parser.py index 5fb506b..80ecb6c 100644 --- a/openabc/forcefields/parameters/mixin_3spn2_config_parser.py +++ b/openabc/forcefields/parameters/mixin_3spn2_config_parser.py @@ -77,14 +77,14 @@ def parse_row(row): 'Kb3': 'k_bond_3', 'Kb4': 'k_bond_4'}) flag = (self.bond_definition['DNA'] == 'B_curved') - self.bond_definition.loc[flag, 'r0'] = '' + self.bond_definition.loc[flag, 'r0'] = np.nan # will be set based on the reference structure # angle definition self.angle_definition['k_angle'] = self.angle_definition['epsilon']*2 self.angle_definition = self.angle_definition.rename(columns={'t0': 'theta0'}) flag1 = (self.angle_definition['DNA'].isin(['A', 'B'])) flag2 = (self.angle_definition['DNA'] == 'B_curved') self.angle_definition.loc[flag1, 'theta0'] *= _deg_to_rad - self.angle_definition.loc[flag2, 'theta0'] = '' + self.angle_definition.loc[flag2, 'theta0'] = np.nan # will be set based on the reference structure # stacking definition self.stacking_definition = self.stacking_definition.rename(columns={'t0': 'theta0'}) self.stacking_definition['theta0'] *= _deg_to_rad @@ -94,7 +94,7 @@ def parse_row(row): flag2 = (self.dihedral_definition['DNA'] == 'B_curved') x = self.dihedral_definition.loc[flag1, 'theta0'] self.dihedral_definition.loc[flag1, 'theta0'] = _deg_to_rad*(x + 180) - self.dihedral_definition.loc[flag2, 'theta0'] = '' + self.dihedral_definition.loc[flag2, 'theta0'] = np.nan # will be set based on the reference structure # base pair definition self.pair_definition['torsion'] *= _deg_to_rad self.pair_definition['sigma'] *= _angstrom_to_nm diff --git a/openabc/forcefields/parsers/dna_3spn2_parser.py b/openabc/forcefields/parsers/dna_3spn2_parser.py index 169c2c2..7b29f48 100644 --- a/openabc/forcefields/parsers/dna_3spn2_parser.py +++ b/openabc/forcefields/parsers/dna_3spn2_parser.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from openabc.utils import helper_functions +from openabc.utils import get_WC_paired_sequence, fix_pdb, write_pdb from openabc.forcefields.parameters import Mixin3SPN2ConfigParser from openabc.lib import _dna_nucleotides, _dna_WC_pair_dict, _angstrom_to_nm import subprocess @@ -69,7 +69,7 @@ def build_x3dna_template(self, temp_name='dna'): if n_chains == 2: sequence1, sequence2 = sequence_list[0], sequence_list[1] if len(sequence1) == len(sequence2): - if helper_functions.get_WC_paired_sequence(sequence1) == sequence2: + if get_WC_paired_sequence(sequence1) == sequence2: # one dsDNA molecule with W-C paired sequence flag = True if flag: @@ -581,14 +581,14 @@ def from_atomistic_pdb(cls, atomistic_pdb, cg_pdb, PSB_order=True, new_sequence= """ try: - atomistic_atoms = helper_functions.fix_pdb(atomistic_pdb) # fix pdb + atomistic_atoms = fix_pdb(atomistic_pdb) # fix pdb except Exception as e: print('Do not fix pdb file.') cg_atoms = cls.aa_to_cg(atomistic_atoms, PSB_order=PSB_order) # do coarse-graining if new_sequence is not None: print(f'Change to new sequence: {new_sequence}') cg_atoms = cls.change_sequence(cg_atoms, new_sequence) - helper_functions.write_pdb(cg_atoms, cg_pdb) + write_pdb(cg_atoms, cg_pdb) self = cls(dna_type) # directly set self.atoms from cg_atoms instead of loading cg_pdb # numerical accuracy is decreased when saving coordinates to pdb diff --git a/openabc/forcefields/parsers/hps_parser.py b/openabc/forcefields/parsers/hps_parser.py index 9b3cc68..9803903 100644 --- a/openabc/forcefields/parsers/hps_parser.py +++ b/openabc/forcefields/parsers/hps_parser.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from openabc.utils import helper_functions +from openabc.utils import parse_pdb, atomistic_pdb_to_ca_pdb from openabc.lib import _amino_acids, _kcal_to_kj import sys import os @@ -33,7 +33,7 @@ def __init__(self, ca_pdb, default_parse=True): """ self.pdb = ca_pdb - self.atoms = helper_functions.parse_pdb(ca_pdb) + self.atoms = parse_pdb(ca_pdb) # check if all the atoms are protein CA atoms assert ((self.atoms['resname'].isin(_amino_acids)).all() and self.atoms['name'].eq('CA').all()) if default_parse: @@ -65,7 +65,7 @@ def from_atomistic_pdb(cls, atomistic_pdb, ca_pdb, write_TER=False, default_pars A class instance. """ - helper_functions.atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER) + atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER) result = cls(ca_pdb, default_parse) return result diff --git a/openabc/forcefields/parsers/moff_parser.py b/openabc/forcefields/parsers/moff_parser.py index 47caf26..4604491 100644 --- a/openabc/forcefields/parsers/moff_parser.py +++ b/openabc/forcefields/parsers/moff_parser.py @@ -1,8 +1,7 @@ import numpy as np import pandas as pd import mdtraj -from openabc.utils import helper_functions -from openabc.utils.shadow_map import find_ca_pairs_from_atomistic_pdb +from openabc.utils import parse_pdb, atomistic_pdb_to_ca_pdb, find_ca_pairs_from_atomistic_pdb from openabc.lib import _amino_acids import sys import os @@ -17,8 +16,6 @@ LEU=0.0, LYS=1.0, MET=0.0, PHE=0.0, PRO=0.0, SER=0.0, THR=0.0, TRP=0.0, TYR=0.0, VAL=0.0) -_kcal_to_kj = 4.184 - class MOFFParser(object): """ MOFF protein parser. @@ -41,7 +38,7 @@ def __init__(self, atomistic_pdb, ca_pdb, default_parse=True): """ self.atomistic_pdb = atomistic_pdb self.pdb = ca_pdb - self.atoms = helper_functions.parse_pdb(ca_pdb) + self.atoms = parse_pdb(ca_pdb) # check if all the atoms are protein CA atoms assert ((self.atoms['resname'].isin(_amino_acids)).all() and self.atoms['name'].eq('CA').all()) if default_parse: @@ -73,7 +70,7 @@ def from_atomistic_pdb(cls, atomistic_pdb, ca_pdb, write_TER=False, default_pars A class instance. """ - helper_functions.atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER) + atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER) result = cls(atomistic_pdb, ca_pdb, default_parse) return result @@ -165,7 +162,7 @@ def parse_mol(self, get_native_pairs=True, frame=0, radius=0.1, bonded_radius=0. ss : None or sequence-like Secondary structure type of each residue. If None, no secondary structure information is provided and all the native pairs are kept. - If not None, then only native pairs within continuous ordered + If not None, then only native pairs within continuous ordered domains are kept. ordered_ss_names : sequence-like The names of ordered secondary structures. @@ -224,6 +221,8 @@ def parse_mol(self, get_native_pairs=True, frame=0, radius=0.1, bonded_radius=0. a2 = int(row['a2']) if a1 > a2: a1, a2 = a2, a1 + if self.atoms.loc[a1, 'chainID'] != self.atoms.loc[a2, 'chainID']: + continue if ss[a1] in ordered_ss_names: if all([x == ss[a1] for x in ss[a1:a2 + 1]]): new_native_pairs.loc[len(new_native_pairs.index)] = row diff --git a/openabc/forcefields/parsers/mpipi_parsers.py b/openabc/forcefields/parsers/mpipi_parsers.py index 7c6ab17..44ba320 100644 --- a/openabc/forcefields/parsers/mpipi_parsers.py +++ b/openabc/forcefields/parsers/mpipi_parsers.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from openabc.utils import helper_functions +from openabc.utils import parse_pdb, write_pdb, atomistic_pdb_to_ca_pdb, build_straight_CA_chain from openabc.lib import _amino_acids, _rna_nucleotides, _kcal_to_kj, _angstrom_to_nm import sys import os @@ -37,7 +37,7 @@ def __init__(self, ca_pdb, default_parse=True): """ self.pdb = ca_pdb - self.atoms = helper_functions.parse_pdb(ca_pdb) + self.atoms = parse_pdb(ca_pdb) # check if all the atoms are protein CA atoms assert ((self.atoms['resname'].isin(_amino_acids)).all() and self.atoms['name'].eq('CA').all()) if default_parse: @@ -64,7 +64,7 @@ def from_atomistic_pdb(cls, atomistic_pdb, ca_pdb, write_TER=False, default_pars Whether to parse with default settings. """ - helper_functions.atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER) + atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER) return cls(ca_pdb, default_parse) def parse_mol(self, exclude12=True, mass_dict=_mpipi_mass_dict, charge_dict=_mpipi_charge_dict): @@ -124,7 +124,7 @@ def __init__(self, cg_pdb, default_parse=True): """ self.pdb = cg_pdb - self.atoms = helper_functions.parse_pdb(self.pdb) + self.atoms = parse_pdb(self.pdb) # check if all the atoms are CG nucleotide atoms atom_names = self.atoms['name'] assert (self.atoms['resname'].isin(_rna_nucleotides).all() and atom_names.eq('RN').all()) @@ -159,12 +159,12 @@ def from_sequence(cls, seq, cg_pdb, write_TER=False, default_parse=True): """ n_atoms = len(seq) - atoms = helper_functions.build_straight_CA_chain(n_atoms, chainID='A', r0=0.5) + atoms = build_straight_CA_chain(n_atoms, chainID='A', r0=0.5) atoms.loc[:, 'name'] = 'RN' # RNA resnames should be from A, C, G, and U for i in range(n_atoms): atoms.loc[i, 'resname'] = seq[i] - helper_functions.write_pdb(atoms, cg_pdb, write_TER) + write_pdb(atoms, cg_pdb, write_TER) result = cls(cg_pdb, default_parse) return result diff --git a/openabc/forcefields/parsers/mrg_parser.py b/openabc/forcefields/parsers/mrg_parser.py index 31aba08..4e657d8 100644 --- a/openabc/forcefields/parsers/mrg_parser.py +++ b/openabc/forcefields/parsers/mrg_parser.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from openabc.utils import helper_functions +from openabc.utils import parse_pdb, atomistic_pdb_to_nucleotide_pdb from openabc.lib import _dna_nucleotides, _kcal_to_kj, _deg_to_rad import sys import os @@ -42,7 +42,7 @@ def __init__(self, cg_pdb, default_parse=True): """ self.pdb = cg_pdb - self.atoms = helper_functions.parse_pdb(self.pdb) + self.atoms = parse_pdb(self.pdb) # check if all the atoms are CG nucleotide atoms atom_names = self.atoms['name'] assert (self.atoms['resname'].isin(_dna_nucleotides).all() and atom_names.eq('DN').all()) @@ -86,7 +86,7 @@ def from_atomistic_pdb(cls, atomistic_pdb, cg_pdb, write_TER=False, default_pars A class instance. """ - helper_functions.atomistic_pdb_to_nucleotide_pdb(atomistic_pdb, cg_pdb, write_TER) + atomistic_pdb_to_nucleotide_pdb(atomistic_pdb, cg_pdb, write_TER) result = cls(cg_pdb, default_parse) return result diff --git a/openabc/forcefields/parsers/smog_parser.py b/openabc/forcefields/parsers/smog_parser.py index 0383c99..5be7cb3 100644 --- a/openabc/forcefields/parsers/smog_parser.py +++ b/openabc/forcefields/parsers/smog_parser.py @@ -1,8 +1,7 @@ import numpy as np import pandas as pd import mdtraj -from openabc.utils import helper_functions -from openabc.utils.shadow_map import find_ca_pairs_from_atomistic_pdb +from openabc.utils import parse_pdb, atomistic_pdb_to_ca_pdb, find_ca_pairs_from_atomistic_pdb from openabc.lib import _amino_acids import sys import os @@ -39,7 +38,7 @@ def __init__(self, atomistic_pdb, ca_pdb, default_parse=True): """ self.atomistic_pdb = atomistic_pdb self.pdb = ca_pdb - self.atoms = helper_functions.parse_pdb(ca_pdb) + self.atoms = parse_pdb(ca_pdb) # check if all the atoms are protein CA atoms assert ((self.atoms['resname'].isin(_amino_acids)).all() and self.atoms['name'].eq('CA').all()) if default_parse: @@ -66,7 +65,7 @@ def from_atomistic_pdb(cls, atomistic_pdb, ca_pdb, write_TER=False, default_pars Whether to parse with default settings. """ - helper_functions.atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER) + atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER) return cls(atomistic_pdb, ca_pdb, default_parse) def parse_exclusions(self, exclude12=True, exclude13=True, exclude14=True, exclude_native_pairs=True): diff --git a/openabc/lib/__init__.py b/openabc/lib/__init__.py index c8972ce..dfc4284 100644 --- a/openabc/lib/__init__.py +++ b/openabc/lib/__init__.py @@ -5,5 +5,6 @@ _amino_acid_3_letters_to_1_letter_dict) from .dna_lib import _dna_nucleotides, _dna_WC_pair_dict from .rna_lib import _rna_nucleotides +from .ion_lib import df_sodium_ion, df_chloride_ion, df_magnesium_ion diff --git a/openabc/lib/ion_lib.py b/openabc/lib/ion_lib.py new file mode 100644 index 0000000..9044fd7 --- /dev/null +++ b/openabc/lib/ion_lib.py @@ -0,0 +1,33 @@ +import pandas as pd + +df_ion_template = pd.DataFrame(dict(recname=['ATOM'], + serial=[0], + name=[None], + altLoc=[''], + resname=[None], + chainID=['X'], + resSeq=[0], + iCode=[''], + x=[0.0], + y=[0.0], + z=[0.0], + occupancy=[0.0], + tempFactor=[0.0], + element=[None], + charge=[None])) + +# sodium ion +df_sodium_ion = df_ion_template.copy() +df_sodium_ion.loc[:, ['name', 'resname', 'element']] = 'NA' +df_sodium_ion.loc[:, 'charge'] = 1.0 + +# chloride ion +df_chloride_ion = df_ion_template.copy() +df_chloride_ion.loc[:, ['name', 'resname', 'element']] = 'CL' +df_chloride_ion.loc[:, 'charge'] = -1.0 + +# magnesium ion +df_magnesium_ion = df_ion_template.copy() +df_magnesium_ion.loc[:, ['name', 'resname', 'element']] = 'MG' +df_magnesium_ion.loc[:, 'charge'] = 2.0 + diff --git a/openabc/utils/helper_functions.py b/openabc/utils/helper_functions.py index 92176bb..87ffb0f 100644 --- a/openabc/utils/helper_functions.py +++ b/openabc/utils/helper_functions.py @@ -401,7 +401,7 @@ def fix_pdb(pdb_file): Output fixed structure. """ - assert pdbfixer is not None + assert pdbfixer is not None, 'pdbfixer is not installed.' fixer = pdbfixer.PDBFixer(filename=pdb_file) fixer.findMissingResidues() chains = list(fixer.topology.chains()) diff --git a/openabc/utils/replica_exchange.py b/openabc/utils/replica_exchange.py index f486e3b..226b2e4 100644 --- a/openabc/utils/replica_exchange.py +++ b/openabc/utils/replica_exchange.py @@ -86,7 +86,7 @@ def __init__(self, backend, n_replicas, rank, positions, top, system, temperatur https://pytorch.org/docs/stable/distributed.html """ - assert torch is not None # ensure torch is installed and loaded correctly + assert torch is not None, 'torch is not installed.' self.backend = backend self.n_replicas = n_replicas assert self.n_replicas == int(os.environ['WORLD_SIZE'])