From e9cccc2e8efb6c9671ab2b1c796c97220b9241f1 Mon Sep 17 00:00:00 2001
From: Shuming Liu <smliu@mit.edu>
Date: Fri, 14 Jun 2024 16:11:23 -0400
Subject: [PATCH] Make some small changes

---
 openabc/forcefields/cg_model.py               | 15 ++++-----
 openabc/forcefields/moff_mrg_model.py         |  2 ++
 .../parameters/mixin_3spn2_config_parser.py   |  6 ++--
 .../forcefields/parsers/dna_3spn2_parser.py   |  8 ++---
 openabc/forcefields/parsers/hps_parser.py     |  6 ++--
 openabc/forcefields/parsers/moff_parser.py    | 13 ++++----
 openabc/forcefields/parsers/mpipi_parsers.py  | 12 +++----
 openabc/forcefields/parsers/mrg_parser.py     |  6 ++--
 openabc/forcefields/parsers/smog_parser.py    |  7 ++--
 openabc/lib/__init__.py                       |  1 +
 openabc/lib/ion_lib.py                        | 33 +++++++++++++++++++
 openabc/utils/helper_functions.py             |  2 +-
 openabc/utils/replica_exchange.py             |  2 +-
 13 files changed, 73 insertions(+), 40 deletions(-)
 create mode 100644 openabc/lib/ion_lib.py

diff --git a/openabc/forcefields/cg_model.py b/openabc/forcefields/cg_model.py
index 7a9acb8..5193ca7 100644
--- a/openabc/forcefields/cg_model.py
+++ b/openabc/forcefields/cg_model.py
@@ -11,9 +11,10 @@
 try:
     from openmmplumed import PlumedForce
 except ImportError:
-    print('Please install openmmplumed to facilitate plumed functions.')
+    PlumedForce = None
+import warnings
 from openabc.forcefields.rigid import createRigidBodies
-import openabc.utils.helper_functions as helper_functions
+from openabc.utils import write_pdb
 import sys
 import os
 
@@ -91,7 +92,7 @@ def atoms_to_pdb(self, cg_pdb, reset_serial=True):
         atoms.loc[:, 'charge'] = ''
         if reset_serial:
             atoms['serial'] = list(range(1, len(atoms.index) + 1))
-        helper_functions.write_pdb(atoms, cg_pdb)
+        write_pdb(atoms, cg_pdb)
     
     def create_system(self, top, use_pbc=True, box_a=500, box_b=500, box_c=500, remove_cmmotion=True):
         """
@@ -194,11 +195,9 @@ def add_plumed(self, plumed_script_path):
         """
         with open(plumed_script_path, 'r') as input_reader:
             plumed_script = input_reader.read()
-        try:
-            force = PlumedForce(plumed_script)
-            self.system.addForce(force)
-        except NameError:
-            print('PlumedForce is not loaded.')
+        assert PlumedForce is not None, 'openmmplumed is not installed.'
+        force = PlumedForce(plumed_script)
+        self.system.addForce(force)
     
     def save_system(self, system_xml='system.xml'):
         """
diff --git a/openabc/forcefields/moff_mrg_model.py b/openabc/forcefields/moff_mrg_model.py
index d0d931d..0058b0e 100644
--- a/openabc/forcefields/moff_mrg_model.py
+++ b/openabc/forcefields/moff_mrg_model.py
@@ -78,6 +78,8 @@ def add_protein_angles(self, threshold=130*np.pi/180, clip=False, force_group=2,
                     any_theta0_beyond_threshold = True
             if clip and any_theta0_beyond_threshold:
                 print(f'Decrease all the theta0 values larger than {threshold} to {threshold}.')
+                # set threshold as np.float32(threshold) to avoid warnings
+                threshold = np.float32(threshold)
                 self.protein_angles.loc[self.protein_angles['theta0'] > threshold, 'theta0'] = threshold
             force = functional_terms.harmonic_angle_term(self.protein_angles, self.use_pbc, force_group)
             self.system.addForce(force)
diff --git a/openabc/forcefields/parameters/mixin_3spn2_config_parser.py b/openabc/forcefields/parameters/mixin_3spn2_config_parser.py
index 5fb506b..80ecb6c 100644
--- a/openabc/forcefields/parameters/mixin_3spn2_config_parser.py
+++ b/openabc/forcefields/parameters/mixin_3spn2_config_parser.py
@@ -77,14 +77,14 @@ def parse_row(row):
                                                                     'Kb3': 'k_bond_3', 
                                                                     'Kb4': 'k_bond_4'})
         flag = (self.bond_definition['DNA'] == 'B_curved')
-        self.bond_definition.loc[flag, 'r0'] = ''
+        self.bond_definition.loc[flag, 'r0'] = np.nan # will be set based on the reference structure
         # angle definition
         self.angle_definition['k_angle'] = self.angle_definition['epsilon']*2
         self.angle_definition = self.angle_definition.rename(columns={'t0': 'theta0'})
         flag1 = (self.angle_definition['DNA'].isin(['A', 'B']))
         flag2 = (self.angle_definition['DNA'] == 'B_curved')
         self.angle_definition.loc[flag1, 'theta0'] *= _deg_to_rad
-        self.angle_definition.loc[flag2, 'theta0'] = ''
+        self.angle_definition.loc[flag2, 'theta0'] = np.nan # will be set based on the reference structure
         # stacking definition
         self.stacking_definition = self.stacking_definition.rename(columns={'t0': 'theta0'})
         self.stacking_definition['theta0'] *= _deg_to_rad
@@ -94,7 +94,7 @@ def parse_row(row):
         flag2 = (self.dihedral_definition['DNA'] == 'B_curved')
         x = self.dihedral_definition.loc[flag1, 'theta0']
         self.dihedral_definition.loc[flag1, 'theta0'] = _deg_to_rad*(x + 180)
-        self.dihedral_definition.loc[flag2, 'theta0'] = ''
+        self.dihedral_definition.loc[flag2, 'theta0'] = np.nan # will be set based on the reference structure
         # base pair definition
         self.pair_definition['torsion'] *= _deg_to_rad
         self.pair_definition['sigma'] *= _angstrom_to_nm
diff --git a/openabc/forcefields/parsers/dna_3spn2_parser.py b/openabc/forcefields/parsers/dna_3spn2_parser.py
index 169c2c2..7b29f48 100644
--- a/openabc/forcefields/parsers/dna_3spn2_parser.py
+++ b/openabc/forcefields/parsers/dna_3spn2_parser.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pandas as pd
-from openabc.utils import helper_functions
+from openabc.utils import get_WC_paired_sequence, fix_pdb, write_pdb
 from openabc.forcefields.parameters import Mixin3SPN2ConfigParser
 from openabc.lib import _dna_nucleotides, _dna_WC_pair_dict, _angstrom_to_nm
 import subprocess
@@ -69,7 +69,7 @@ def build_x3dna_template(self, temp_name='dna'):
         if n_chains == 2:
             sequence1, sequence2 = sequence_list[0], sequence_list[1]
             if len(sequence1) == len(sequence2):
-                if helper_functions.get_WC_paired_sequence(sequence1) == sequence2:
+                if get_WC_paired_sequence(sequence1) == sequence2:
                     # one dsDNA molecule with W-C paired sequence
                     flag = True
         if flag:
@@ -581,14 +581,14 @@ def from_atomistic_pdb(cls, atomistic_pdb, cg_pdb, PSB_order=True, new_sequence=
         
         """
         try:
-            atomistic_atoms = helper_functions.fix_pdb(atomistic_pdb) # fix pdb
+            atomistic_atoms = fix_pdb(atomistic_pdb) # fix pdb
         except Exception as e:
             print('Do not fix pdb file.')
         cg_atoms = cls.aa_to_cg(atomistic_atoms, PSB_order=PSB_order) # do coarse-graining
         if new_sequence is not None:
             print(f'Change to new sequence: {new_sequence}')
             cg_atoms = cls.change_sequence(cg_atoms, new_sequence)
-        helper_functions.write_pdb(cg_atoms, cg_pdb)
+        write_pdb(cg_atoms, cg_pdb)
         self = cls(dna_type)
         # directly set self.atoms from cg_atoms instead of loading cg_pdb
         # numerical accuracy is decreased when saving coordinates to pdb
diff --git a/openabc/forcefields/parsers/hps_parser.py b/openabc/forcefields/parsers/hps_parser.py
index 9b3cc68..9803903 100644
--- a/openabc/forcefields/parsers/hps_parser.py
+++ b/openabc/forcefields/parsers/hps_parser.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pandas as pd
-from openabc.utils import helper_functions
+from openabc.utils import parse_pdb, atomistic_pdb_to_ca_pdb
 from openabc.lib import _amino_acids, _kcal_to_kj
 import sys
 import os
@@ -33,7 +33,7 @@ def __init__(self, ca_pdb, default_parse=True):
         
         """
         self.pdb = ca_pdb
-        self.atoms = helper_functions.parse_pdb(ca_pdb)
+        self.atoms = parse_pdb(ca_pdb)
         # check if all the atoms are protein CA atoms
         assert ((self.atoms['resname'].isin(_amino_acids)).all() and self.atoms['name'].eq('CA').all())
         if default_parse:
@@ -65,7 +65,7 @@ def from_atomistic_pdb(cls, atomistic_pdb, ca_pdb, write_TER=False, default_pars
             A class instance. 
         
         """
-        helper_functions.atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER)
+        atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER)
         result = cls(ca_pdb, default_parse)
         return result
     
diff --git a/openabc/forcefields/parsers/moff_parser.py b/openabc/forcefields/parsers/moff_parser.py
index 47caf26..4604491 100644
--- a/openabc/forcefields/parsers/moff_parser.py
+++ b/openabc/forcefields/parsers/moff_parser.py
@@ -1,8 +1,7 @@
 import numpy as np
 import pandas as pd
 import mdtraj
-from openabc.utils import helper_functions
-from openabc.utils.shadow_map import find_ca_pairs_from_atomistic_pdb
+from openabc.utils import parse_pdb, atomistic_pdb_to_ca_pdb, find_ca_pairs_from_atomistic_pdb
 from openabc.lib import _amino_acids
 import sys
 import os
@@ -17,8 +16,6 @@
                                     LEU=0.0, LYS=1.0, MET=0.0, PHE=0.0, PRO=0.0,
                                     SER=0.0, THR=0.0, TRP=0.0, TYR=0.0, VAL=0.0)
 
-_kcal_to_kj = 4.184
-
 class MOFFParser(object):
     """
     MOFF protein parser.
@@ -41,7 +38,7 @@ def __init__(self, atomistic_pdb, ca_pdb, default_parse=True):
         """
         self.atomistic_pdb = atomistic_pdb
         self.pdb = ca_pdb
-        self.atoms = helper_functions.parse_pdb(ca_pdb)
+        self.atoms = parse_pdb(ca_pdb)
         # check if all the atoms are protein CA atoms
         assert ((self.atoms['resname'].isin(_amino_acids)).all() and self.atoms['name'].eq('CA').all())
         if default_parse:
@@ -73,7 +70,7 @@ def from_atomistic_pdb(cls, atomistic_pdb, ca_pdb, write_TER=False, default_pars
             A class instance. 
         
         """
-        helper_functions.atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER)
+        atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER)
         result = cls(atomistic_pdb, ca_pdb, default_parse)
         return result
     
@@ -165,7 +162,7 @@ def parse_mol(self, get_native_pairs=True, frame=0, radius=0.1, bonded_radius=0.
         ss : None or sequence-like
             Secondary structure type of each residue. 
             If None, no secondary structure information is provided and all the native pairs are kept.
-            If not None, then only native pairs within continuous ordered 
+            If not None, then only native pairs within continuous ordered domains are kept.
         
         ordered_ss_names : sequence-like
             The names of ordered secondary structures. 
@@ -224,6 +221,8 @@ def parse_mol(self, get_native_pairs=True, frame=0, radius=0.1, bonded_radius=0.
                     a2 = int(row['a2'])
                     if a1 > a2:
                         a1, a2 = a2, a1
+                    if self.atoms.loc[a1, 'chainID'] != self.atoms.loc[a2, 'chainID']:
+                        continue
                     if ss[a1] in ordered_ss_names:
                         if all([x == ss[a1] for x in ss[a1:a2 + 1]]):
                             new_native_pairs.loc[len(new_native_pairs.index)] = row
diff --git a/openabc/forcefields/parsers/mpipi_parsers.py b/openabc/forcefields/parsers/mpipi_parsers.py
index 7c6ab17..44ba320 100644
--- a/openabc/forcefields/parsers/mpipi_parsers.py
+++ b/openabc/forcefields/parsers/mpipi_parsers.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pandas as pd
-from openabc.utils import helper_functions
+from openabc.utils import parse_pdb, write_pdb, atomistic_pdb_to_ca_pdb, build_straight_CA_chain
 from openabc.lib import _amino_acids, _rna_nucleotides, _kcal_to_kj, _angstrom_to_nm
 import sys
 import os
@@ -37,7 +37,7 @@ def __init__(self, ca_pdb, default_parse=True):
         
         """
         self.pdb = ca_pdb
-        self.atoms = helper_functions.parse_pdb(ca_pdb)
+        self.atoms = parse_pdb(ca_pdb)
         # check if all the atoms are protein CA atoms
         assert ((self.atoms['resname'].isin(_amino_acids)).all() and self.atoms['name'].eq('CA').all())
         if default_parse:
@@ -64,7 +64,7 @@ def from_atomistic_pdb(cls, atomistic_pdb, ca_pdb, write_TER=False, default_pars
             Whether to parse with default settings. 
         
         """
-        helper_functions.atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER)
+        atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER)
         return cls(ca_pdb, default_parse)
     
     def parse_mol(self, exclude12=True, mass_dict=_mpipi_mass_dict, charge_dict=_mpipi_charge_dict):
@@ -124,7 +124,7 @@ def __init__(self, cg_pdb, default_parse=True):
         
         """
         self.pdb = cg_pdb
-        self.atoms = helper_functions.parse_pdb(self.pdb)
+        self.atoms = parse_pdb(self.pdb)
         # check if all the atoms are CG nucleotide atoms
         atom_names = self.atoms['name']
         assert (self.atoms['resname'].isin(_rna_nucleotides).all() and atom_names.eq('RN').all())
@@ -159,12 +159,12 @@ def from_sequence(cls, seq, cg_pdb, write_TER=False, default_parse=True):
 
         """
         n_atoms = len(seq)
-        atoms = helper_functions.build_straight_CA_chain(n_atoms, chainID='A', r0=0.5)
+        atoms = build_straight_CA_chain(n_atoms, chainID='A', r0=0.5)
         atoms.loc[:, 'name'] = 'RN'
         # RNA resnames should be from A, C, G, and U
         for i in range(n_atoms):
             atoms.loc[i, 'resname'] = seq[i]
-        helper_functions.write_pdb(atoms, cg_pdb, write_TER)
+        write_pdb(atoms, cg_pdb, write_TER)
         result = cls(cg_pdb, default_parse)
         return result
     
diff --git a/openabc/forcefields/parsers/mrg_parser.py b/openabc/forcefields/parsers/mrg_parser.py
index 31aba08..4e657d8 100644
--- a/openabc/forcefields/parsers/mrg_parser.py
+++ b/openabc/forcefields/parsers/mrg_parser.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pandas as pd
-from openabc.utils import helper_functions
+from openabc.utils import parse_pdb, atomistic_pdb_to_nucleotide_pdb
 from openabc.lib import _dna_nucleotides, _kcal_to_kj, _deg_to_rad
 import sys
 import os
@@ -42,7 +42,7 @@ def __init__(self, cg_pdb, default_parse=True):
         
         """
         self.pdb = cg_pdb
-        self.atoms = helper_functions.parse_pdb(self.pdb)
+        self.atoms = parse_pdb(self.pdb)
         # check if all the atoms are CG nucleotide atoms
         atom_names = self.atoms['name']
         assert (self.atoms['resname'].isin(_dna_nucleotides).all() and atom_names.eq('DN').all())
@@ -86,7 +86,7 @@ def from_atomistic_pdb(cls, atomistic_pdb, cg_pdb, write_TER=False, default_pars
             A class instance. 
         
         """
-        helper_functions.atomistic_pdb_to_nucleotide_pdb(atomistic_pdb, cg_pdb, write_TER)
+        atomistic_pdb_to_nucleotide_pdb(atomistic_pdb, cg_pdb, write_TER)
         result = cls(cg_pdb, default_parse)
         return result
     
diff --git a/openabc/forcefields/parsers/smog_parser.py b/openabc/forcefields/parsers/smog_parser.py
index 0383c99..5be7cb3 100644
--- a/openabc/forcefields/parsers/smog_parser.py
+++ b/openabc/forcefields/parsers/smog_parser.py
@@ -1,8 +1,7 @@
 import numpy as np
 import pandas as pd
 import mdtraj
-from openabc.utils import helper_functions
-from openabc.utils.shadow_map import find_ca_pairs_from_atomistic_pdb
+from openabc.utils import parse_pdb, atomistic_pdb_to_ca_pdb, find_ca_pairs_from_atomistic_pdb
 from openabc.lib import _amino_acids
 import sys
 import os
@@ -39,7 +38,7 @@ def __init__(self, atomistic_pdb, ca_pdb, default_parse=True):
         """
         self.atomistic_pdb = atomistic_pdb
         self.pdb = ca_pdb
-        self.atoms = helper_functions.parse_pdb(ca_pdb)
+        self.atoms = parse_pdb(ca_pdb)
         # check if all the atoms are protein CA atoms
         assert ((self.atoms['resname'].isin(_amino_acids)).all() and self.atoms['name'].eq('CA').all())
         if default_parse:
@@ -66,7 +65,7 @@ def from_atomistic_pdb(cls, atomistic_pdb, ca_pdb, write_TER=False, default_pars
             Whether to parse with default settings. 
         
         """
-        helper_functions.atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER)
+        atomistic_pdb_to_ca_pdb(atomistic_pdb, ca_pdb, write_TER)
         return cls(atomistic_pdb, ca_pdb, default_parse)
     
     def parse_exclusions(self, exclude12=True, exclude13=True, exclude14=True, exclude_native_pairs=True):
diff --git a/openabc/lib/__init__.py b/openabc/lib/__init__.py
index c8972ce..dfc4284 100644
--- a/openabc/lib/__init__.py
+++ b/openabc/lib/__init__.py
@@ -5,5 +5,6 @@
                           _amino_acid_3_letters_to_1_letter_dict)
 from .dna_lib import _dna_nucleotides, _dna_WC_pair_dict
 from .rna_lib import _rna_nucleotides
+from .ion_lib import df_sodium_ion, df_chloride_ion, df_magnesium_ion
 
 
diff --git a/openabc/lib/ion_lib.py b/openabc/lib/ion_lib.py
new file mode 100644
index 0000000..9044fd7
--- /dev/null
+++ b/openabc/lib/ion_lib.py
@@ -0,0 +1,33 @@
+import pandas as pd
+
+df_ion_template = pd.DataFrame(dict(recname=['ATOM'], 
+                                    serial=[0], 
+                                    name=[None], 
+                                    altLoc=[''], 
+                                    resname=[None], 
+                                    chainID=['X'], 
+                                    resSeq=[0], 
+                                    iCode=[''], 
+                                    x=[0.0], 
+                                    y=[0.0], 
+                                    z=[0.0], 
+                                    occupancy=[0.0], 
+                                    tempFactor=[0.0], 
+                                    element=[None], 
+                                    charge=[None]))
+
+# sodium ion
+df_sodium_ion = df_ion_template.copy()
+df_sodium_ion.loc[:, ['name', 'resname', 'element']] = 'NA'
+df_sodium_ion.loc[:, 'charge'] = 1.0
+
+# chloride ion
+df_chloride_ion = df_ion_template.copy()
+df_chloride_ion.loc[:, ['name', 'resname', 'element']] = 'CL'
+df_chloride_ion.loc[:, 'charge'] = -1.0
+
+# magnesium ion
+df_magnesium_ion = df_ion_template.copy()
+df_magnesium_ion.loc[:, ['name', 'resname', 'element']] = 'MG'
+df_magnesium_ion.loc[:, 'charge'] = 2.0
+
diff --git a/openabc/utils/helper_functions.py b/openabc/utils/helper_functions.py
index 92176bb..87ffb0f 100644
--- a/openabc/utils/helper_functions.py
+++ b/openabc/utils/helper_functions.py
@@ -401,7 +401,7 @@ def fix_pdb(pdb_file):
         Output fixed structure. 
 
     """
-    assert pdbfixer is not None
+    assert pdbfixer is not None, 'pdbfixer is not installed.'
     fixer = pdbfixer.PDBFixer(filename=pdb_file)
     fixer.findMissingResidues()
     chains = list(fixer.topology.chains())
diff --git a/openabc/utils/replica_exchange.py b/openabc/utils/replica_exchange.py
index f486e3b..226b2e4 100644
--- a/openabc/utils/replica_exchange.py
+++ b/openabc/utils/replica_exchange.py
@@ -86,7 +86,7 @@ def __init__(self, backend, n_replicas, rank, positions, top, system, temperatur
         https://pytorch.org/docs/stable/distributed.html
 
         """
-        assert torch is not None # ensure torch is installed and loaded correctly
+        assert torch is not None, 'torch is not installed.'
         self.backend = backend
         self.n_replicas = n_replicas
         assert self.n_replicas == int(os.environ['WORLD_SIZE'])