diff --git a/pom.xml b/pom.xml index 80fc8a3..2affca9 100644 --- a/pom.xml +++ b/pom.xml @@ -76,6 +76,11 @@ sqlite-jdbc 3.21.0.1 + + hk.ust.bioinformatics + ProteomicsLibrary + 1.0 + diff --git a/src/main/java/proteomics/ECL2.java b/src/main/java/proteomics/ECL2.java index 3454db4..9c7d9d6 100644 --- a/src/main/java/proteomics/ECL2.java +++ b/src/main/java/proteomics/ECL2.java @@ -7,7 +7,7 @@ import proteomics.Search.SearchWrap; import proteomics.Search.Search; import proteomics.Spectrum.PreSpectra; -import proteomics.TheoSeq.MassTool; +import ProteomicsLibrary.MassTool; import proteomics.Validation.CalFDR; import uk.ac.ebi.pride.tools.jmzreader.JMzReader; import uk.ac.ebi.pride.tools.mgf_parser.MgfFile; diff --git a/src/main/java/proteomics/Index/BuildIndex.java b/src/main/java/proteomics/Index/BuildIndex.java index 53372fa..97d374a 100644 --- a/src/main/java/proteomics/Index/BuildIndex.java +++ b/src/main/java/proteomics/Index/BuildIndex.java @@ -3,8 +3,8 @@ import org.apache.commons.math3.util.CombinatoricsUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import proteomics.TheoSeq.DbTool; -import proteomics.TheoSeq.MassTool; +import ProteomicsLibrary.*; +import ProteomicsLibrary.Types.*; import proteomics.Types.*; import java.util.*; @@ -90,17 +90,17 @@ public BuildIndex(Map parameter_map) throws Exception { DbTool db_tool_obj = new DbTool(db_path, parameter_map.get("database_type")); if (parameter_map.get("append_contaminants").contentEquals("1")) { DbTool contaminantsDb = new DbTool(null, "contaminants"); - pro_seq_map = contaminantsDb.getProSeqMap(); - pro_seq_map.putAll(db_tool_obj.getProSeqMap()); // using the target sequence to replace contaminant sequence if there is conflict. - pro_annotate_map = contaminantsDb.getProAnnotateMap(); - pro_annotate_map.putAll(db_tool_obj.getProAnnotateMap()); // using the target sequence to replace contaminant sequence if there is conflict. + pro_seq_map = contaminantsDb.getProteinSequenceMap(); + pro_seq_map.putAll(db_tool_obj.getProteinSequenceMap()); // using the target sequence to replace contaminant sequence if there is conflict. + pro_annotate_map = contaminantsDb.getProteinAnnotateMap(); + pro_annotate_map.putAll(db_tool_obj.getProteinAnnotateMap()); // using the target sequence to replace contaminant sequence if there is conflict. } else { - pro_seq_map = db_tool_obj.getProSeqMap(); - pro_annotate_map = db_tool_obj.getProAnnotateMap(); + pro_seq_map = db_tool_obj.getProteinSequenceMap(); + pro_annotate_map = db_tool_obj.getProteinAnnotateMap(); } // define a new MassTool object - mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, "KR", "P", mz_bin_size, one_minus_bin_offset); + mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, "KR", "P", true, mz_bin_size * 0.5, one_minus_bin_offset, "N14", "[]"); // generate seq_pro_map Map seq_term_map = new HashMap<>(); @@ -142,7 +142,7 @@ public BuildIndex(Map parameter_map) throws Exception { // mod free Set linkSiteSet = getLinkSiteSet(seq, proteinNTerm, proteinCTerm, linker_type); if (!linkSiteSet.isEmpty()) { - double totalMass = (mass_tool_obj.calResidueMass(seq) + MassTool.H2O); + double totalMass = (mass_tool_obj.calResidueMass(seq) + mass_tool_obj.H2O); int bin = massToBin(totalMass); if (bin_seq_map.containsKey(bin)) { bin_seq_map.get(bin).add(seq); @@ -161,7 +161,7 @@ public BuildIndex(Map parameter_map) throws Exception { linkSiteSet = new HashSet<>(5, 1); linkSiteSet.add(varSeq.linkSite); if (!linkSiteSet.isEmpty()) { - double totalMass = (mass_tool_obj.calResidueMass(varSeq.seq) + MassTool.H2O); + double totalMass = (mass_tool_obj.calResidueMass(varSeq.seq) + mass_tool_obj.H2O); int bin = massToBin(totalMass); if (bin_seq_map.containsKey(bin)) { bin_seq_map.get(bin).add(varSeq.seq); @@ -517,10 +517,10 @@ private List> generateLocalIdxModMassMap(int[] idxArray, Ma private Set checkKCTermMod(Set varSeqSet) { // eliminate those sequence that the middle amino acids having the same mod mass and the n-term and the first amino acid or the c-term and the last amino acid have the same mod mass. todo: check String[] varSeqArray = varSeqSet.toArray(new String[varSeqSet.size()]); Arrays.sort(varSeqArray); // Make sure that nK[].... is before n[]K..., so that n[]K... will be kept. - int seqLength = MassTool.seqToAAList(varSeqArray[0]).length; + int seqLength = MassTool.seqToAAList(varSeqArray[0], "[]").length; AA[][] aaArrays = new AA[varSeqArray.length][seqLength]; for (int i = 0; i < varSeqArray.length; ++i) { - aaArrays[i] = MassTool.seqToAAList(varSeqArray[i]); + aaArrays[i] = MassTool.seqToAAList(varSeqArray[i], "[]"); } if (aaArrays.length > 1) { @@ -528,10 +528,10 @@ private Set checkKCTermMod(Set varSeqSet) { // eliminate those s for (int i = 0; i < aaArrays.length - 1; ++i) { boolean keep = true; for (int j = i + 1; j < aaArrays.length; ++j) { - if ((Math.abs(aaArrays[i][0].delta_mass - aaArrays[j][1].delta_mass) < varModMassResolution) && (Math.abs(aaArrays[i][1].delta_mass - aaArrays[j][0].delta_mass) < varModMassResolution) && (Math.abs(aaArrays[i][seqLength - 2].delta_mass - aaArrays[j][seqLength - 1].delta_mass) < varModMassResolution) && (Math.abs(aaArrays[i][seqLength - 1].delta_mass - aaArrays[j][seqLength - 2].delta_mass) < varModMassResolution)) { + if ((Math.abs(aaArrays[i][0].ptmDeltaMass - aaArrays[j][1].ptmDeltaMass) < varModMassResolution) && (Math.abs(aaArrays[i][1].ptmDeltaMass - aaArrays[j][0].ptmDeltaMass) < varModMassResolution) && (Math.abs(aaArrays[i][seqLength - 2].ptmDeltaMass - aaArrays[j][seqLength - 1].ptmDeltaMass) < varModMassResolution) && (Math.abs(aaArrays[i][seqLength - 1].ptmDeltaMass - aaArrays[j][seqLength - 2].ptmDeltaMass) < varModMassResolution)) { keep = false; for (int k = 2; k < seqLength - 2; ++k) { - if (Math.abs(aaArrays[i][k].delta_mass - aaArrays[j][k].delta_mass) > varModMassResolution) { + if (Math.abs(aaArrays[i][k].ptmDeltaMass - aaArrays[j][k].ptmDeltaMass) > varModMassResolution) { keep = true; break; } @@ -553,19 +553,19 @@ private Set checkKCTermMod(Set varSeqSet) { // eliminate those s } private Set getLinkSiteSet(String seq, boolean n_term, boolean c_term, short linker_type) { - AA[] aa_list = MassTool.seqToAAList(seq); + AA[] aa_list = MassTool.seqToAAList(seq, "[]"); Set output = new HashSet<>(5, 1); for (int i = 1; i < aa_list.length - 2; ++i) { - if (linker_type == 1 && aa_list[i].aa == 'K' && (Math.abs(aa_list[i].delta_mass) < varModMassResolution)) { + if (linker_type == 1 && aa_list[i].aa == 'K' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) { output.add((short) i); - } else if (linker_type == 2 && aa_list[i].aa == 'C' && (Math.abs(aa_list[i].delta_mass) < varModMassResolution)) { + } else if (linker_type == 2 && aa_list[i].aa == 'C' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) { output.add((short) i); } } - if (linker_type == 1 && n_term && !output.contains((short) 1) && (Math.abs(aa_list[0].delta_mass) < varModMassResolution)) { + if (linker_type == 1 && n_term && !output.contains((short) 1) && (Math.abs(aa_list[0].ptmDeltaMass) < varModMassResolution)) { output.add((short) 0); } - if (linker_type == 1 && c_term && aa_list[aa_list.length - 2].aa == 'K' && (Math.abs(aa_list[aa_list.length - 2].delta_mass) < varModMassResolution)) { + if (linker_type == 1 && c_term && aa_list[aa_list.length - 2].aa == 'K' && (Math.abs(aa_list[aa_list.length - 2].ptmDeltaMass) < varModMassResolution)) { output.add((short) (aa_list.length - 2)); } return output; diff --git a/src/main/java/proteomics/Search/CalEValue.java b/src/main/java/proteomics/Search/CalEValue.java index b0d6e29..5064612 100644 --- a/src/main/java/proteomics/Search/CalEValue.java +++ b/src/main/java/proteomics/Search/CalEValue.java @@ -4,7 +4,8 @@ import org.slf4j.LoggerFactory; import proteomics.ECL2; import proteomics.Index.BuildIndex; -import proteomics.TheoSeq.MassTool; +import ProteomicsLibrary.MassTool; +import ProteomicsLibrary.Types.*; import proteomics.Types.*; import java.io.BufferedWriter; diff --git a/src/main/java/proteomics/Search/Search.java b/src/main/java/proteomics/Search/Search.java index ff46a50..1fc4dbe 100644 --- a/src/main/java/proteomics/Search/Search.java +++ b/src/main/java/proteomics/Search/Search.java @@ -1,7 +1,8 @@ package proteomics.Search; import proteomics.ECL2; -import proteomics.TheoSeq.MassTool; +import ProteomicsLibrary.MassTool; +import ProteomicsLibrary.Types.*; import proteomics.Types.*; import proteomics.Index.BuildIndex; diff --git a/src/main/java/proteomics/Search/SearchWrap.java b/src/main/java/proteomics/Search/SearchWrap.java index 6cc3ded..d1ddd10 100644 --- a/src/main/java/proteomics/Search/SearchWrap.java +++ b/src/main/java/proteomics/Search/SearchWrap.java @@ -3,7 +3,8 @@ import proteomics.ECL2; import proteomics.Index.BuildIndex; import proteomics.Spectrum.PreSpectrum; -import proteomics.TheoSeq.MassTool; +import ProteomicsLibrary.MassTool; +import ProteomicsLibrary.Types.*; import proteomics.Types.*; import uk.ac.ebi.pride.tools.jmzreader.JMzReader; import uk.ac.ebi.pride.tools.jmzreader.JMzReaderException; @@ -68,7 +69,7 @@ public Entry call() throws IOException, JMzReaderException, SQLException { if (ECL2.debug) { BufferedWriter writer = new BufferedWriter(new FileWriter(scanId + ".xcorr.spectrum.csv")); writer.write("bin_idx,intensity\n"); - for (int idx : xcorrPL.getIdxSet()) { + for (int idx : xcorrPL.getNonzeroIdx()) { writer.write(idx + "," + xcorrPL.get(idx) + "\n"); } writer.close(); @@ -214,7 +215,7 @@ private int getC13Num(double exp_mass, double theo_mass) { private String addFixMod(String seq, int linkSite) { Map fix_mod_map = build_index_obj.getFixModMap(); - AA[] aaList = MassTool.seqToAAList(seq); + AA[] aaList = MassTool.seqToAAList(seq, "[]"); StringBuilder sb = new StringBuilder(seq.length() * 3); for (int i = 0; i < aaList.length; ++i) { AA aa = aaList[i]; @@ -223,8 +224,8 @@ private String addFixMod(String seq, int linkSite) { } else if (Math.abs(fix_mod_map.get(aa.aa)) > 1e-6) { sb.append(String.format(Locale.US, "%c[%.3f]", aa.aa, fix_mod_map.get(aa.aa))); } else { - if (Math.abs(aa.delta_mass) > 1e-6) { - sb.append(String.format(Locale.US, "%c[%.3f]", aa.aa, aa.delta_mass)); + if (Math.abs(aa.ptmDeltaMass) > 1e-6) { + sb.append(String.format(Locale.US, "%c[%.3f]", aa.aa, aa.ptmDeltaMass)); } else { sb.append(aa.aa); } diff --git a/src/main/java/proteomics/Spectrum/PreSpectra.java b/src/main/java/proteomics/Spectrum/PreSpectra.java index 0d12f06..fc5538a 100644 --- a/src/main/java/proteomics/Spectrum/PreSpectra.java +++ b/src/main/java/proteomics/Spectrum/PreSpectra.java @@ -4,7 +4,7 @@ import org.slf4j.LoggerFactory; import proteomics.ECL2; import proteomics.Index.BuildIndex; -import proteomics.TheoSeq.MassTool; +import ProteomicsLibrary.MassTool; import uk.ac.ebi.pride.tools.jmzreader.JMzReader; import uk.ac.ebi.pride.tools.jmzreader.JMzReaderException; import uk.ac.ebi.pride.tools.jmzreader.model.*; @@ -42,7 +42,7 @@ public PreSpectra(JMzReader spectra_parser, double ms1Tolerance, double leftInve } } - IsotopeDistribution isotopeDistribution = new IsotopeDistribution(build_index_obj.returnMassTool().elementTable, 0, "N14"); + IsotopeDistribution isotopeDistribution = new IsotopeDistribution(build_index_obj.returnMassTool().getElementTable(), 0, "N14"); // prepare SQL database Connection sqlConnection = DriverManager.getConnection(sqlPath); diff --git a/src/main/java/proteomics/Spectrum/PreSpectrum.java b/src/main/java/proteomics/Spectrum/PreSpectrum.java index ef09298..2f5717a 100644 --- a/src/main/java/proteomics/Spectrum/PreSpectrum.java +++ b/src/main/java/proteomics/Spectrum/PreSpectrum.java @@ -1,8 +1,8 @@ package proteomics.Spectrum; import proteomics.ECL2; -import proteomics.TheoSeq.MassTool; -import proteomics.Types.SparseVector; +import ProteomicsLibrary.MassTool; +import ProteomicsLibrary.Types.*; import java.io.BufferedWriter; import java.io.FileWriter; diff --git a/src/main/java/proteomics/TheoSeq/DbTool.java b/src/main/java/proteomics/TheoSeq/DbTool.java deleted file mode 100644 index 4423e23..0000000 --- a/src/main/java/proteomics/TheoSeq/DbTool.java +++ /dev/null @@ -1,85 +0,0 @@ -package proteomics.TheoSeq; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.*; -import java.util.*; -import java.util.regex.*; - -public class DbTool { - - private static final Logger logger = LoggerFactory.getLogger(DbTool.class); - - private Map pro_seq_map = new HashMap<>(); - private Map pro_annotate_map = new HashMap<>(); - - public DbTool(String db_name, String databaseType) throws IOException { - String id = ""; - String annotate; - StringBuilder seq = new StringBuilder(99999); - - boolean new_pro = true; - - Pattern header_pattern; - if (databaseType.contentEquals("TAIR")) { - header_pattern = Pattern.compile("^>([^\\s]+)[\\s|]+(.+)$"); - } else if (databaseType.contentEquals("UniProt") || databaseType.contentEquals("SwissProt")) { - header_pattern = Pattern.compile("^>[^|]+\\|(.+)\\|(.+)$"); - } else if (databaseType.contentEquals("contaminants") || databaseType.contentEquals("ITAG")) { - header_pattern = Pattern.compile("^>([^ ]+) (.+)$"); - } else if (databaseType.contentEquals("Others")) { - header_pattern = Pattern.compile("^>(.+)$"); - } else { - throw new NullPointerException(String.format(Locale.US, "Incorrect database type (%s) in the parameter file.", databaseType)); - } - - BufferedReader db_reader; - if (databaseType.contentEquals("contaminants")) { - InputStream inputStream = getClass().getClassLoader().getResourceAsStream("contaminants.fasta"); - db_reader = new BufferedReader(new InputStreamReader(inputStream)); - } else { - db_reader = new BufferedReader(new FileReader(db_name)); - } - String line; - while ((line = db_reader.readLine()) != null) { - line = line.trim(); - Matcher head_matcher = header_pattern.matcher(line); - if (head_matcher.matches()) { - // This line is a header - if (!new_pro) { - // This isn't the first protein - pro_seq_map.put(id, seq.toString()); - } - id = head_matcher.group(1).trim(); - if (databaseType.contentEquals("Others")) { - annotate = id; - } else { - annotate = head_matcher.group(2).trim(); - } - pro_annotate_map.put(id, annotate); - new_pro = true; - } else if (!line.isEmpty()) { - // This line is a body - if (new_pro) { - seq = new StringBuilder(99999); - seq.append(line); - new_pro = false; - } else { - seq.append(line); - } - } - } - db_reader.close(); - // Last protein - pro_seq_map.put(id, seq.toString()); - } - - public Map getProSeqMap() { - return pro_seq_map; - } - - public Map getProAnnotateMap() { - return pro_annotate_map; - } -} diff --git a/src/main/java/proteomics/TheoSeq/MassTool.java b/src/main/java/proteomics/TheoSeq/MassTool.java deleted file mode 100644 index dfff453..0000000 --- a/src/main/java/proteomics/TheoSeq/MassTool.java +++ /dev/null @@ -1,360 +0,0 @@ -package proteomics.TheoSeq; - -import proteomics.Types.AA; -import proteomics.Types.SparseVector; - -import java.util.*; -import java.util.regex.*; - -public class MassTool { - - private static final int max_charge = 6; - private static final Pattern mod_aa_pattern = Pattern.compile("([A-Znc])(\\[([0-9\\.\\-]+)\\])?"); - public static final double PROTON = 1.00727646688; - public static final double C13_DIFF = 1.00335483; - - public static double H2O; - public Map elementTable = new HashMap<>(); - - private final Map mass_table = new HashMap<>(25, 1); - private final int missed_cleavage; - private final String cut_site; - private final String protect_site; - private final double inverseMzBinSize; - private final double one_minus_bin_offset; - - public MassTool(int missed_cleavage, Map fix_mod_map, String cut_site, String protect_site, double mz_bin_size, double one_minus_bin_offset) { - inverseMzBinSize = 1 / mz_bin_size; - this.missed_cleavage = missed_cleavage; - this.cut_site = cut_site; - this.protect_site = protect_site; - this.one_minus_bin_offset = one_minus_bin_offset; - - elementTable.put("-", 0d); - elementTable.put("H", 1.0078246); - elementTable.put("He", 3.01603); - elementTable.put("Li", 6.015121); - elementTable.put("Be", 9.012182); - elementTable.put("B", 10.012937); - elementTable.put("C", 12.0000000); - elementTable.put("N", 14.0030732); - elementTable.put("O", 15.9949141); - elementTable.put("F", 18.9984032); - elementTable.put("Ne", 19.992435); - elementTable.put("Na", 22.989767); - elementTable.put("Mg", 23.985042); - elementTable.put("Al", 26.981539); - elementTable.put("Si", 27.976927); - elementTable.put("P", 30.973762); - elementTable.put("S", 31.972070); - elementTable.put("Cl", 34.9688531); - elementTable.put("Ar", 35.967545); - elementTable.put("K", 38.963707); - elementTable.put("Ca", 39.962591); - elementTable.put("Sc", 44.955910); - elementTable.put("Ti", 45.952629); - elementTable.put("V", 49.947161); - elementTable.put("Cr", 49.946046); - elementTable.put("Mn", 54.938047); - elementTable.put("Fe", 53.939612); - elementTable.put("Co", 58.933198); - elementTable.put("Ni", 57.935346); - elementTable.put("Cu", 62.939598); - elementTable.put("Zn", 63.929145); - elementTable.put("Ga", 68.925580); - elementTable.put("Ge", 69.924250); - elementTable.put("As", 74.921594); - elementTable.put("Se", 73.922475); - elementTable.put("Br", 78.918336); - elementTable.put("Kr", 77.914); - elementTable.put("Rb", 84.911794); - elementTable.put("Sr", 83.913430); - elementTable.put("Y", 88.905849); - elementTable.put("Zr", 89.904703); - elementTable.put("Nb", 92.906377); - elementTable.put("Mo", 91.906808); - elementTable.put("Tc", 98.0); - elementTable.put("Ru", 95.907599); - elementTable.put("Rh", 102.905500); - elementTable.put("Pd", 101.905634); - elementTable.put("Ag", 106.905092); - elementTable.put("Cd", 105.906461); - elementTable.put("In", 112.904061); - elementTable.put("Sn", 111.904826); - elementTable.put("Sb", 120.903821); - elementTable.put("Te", 119.904048); - elementTable.put("I", 126.904473); - elementTable.put("Xe", 123.905894); - elementTable.put("Cs", 132.905429); - elementTable.put("Ba", 129.906282); - elementTable.put("La", 137.90711); - elementTable.put("Ce", 135.907140); - elementTable.put("Pr", 140.907647); - elementTable.put("Nd", 141.907719); - elementTable.put("Pm", 145.0); - elementTable.put("Sm", 143.911998); - elementTable.put("Eu", 150.919847); - elementTable.put("Gd", 151.919786); - elementTable.put("Tb", 158.925342); - elementTable.put("Dy", 155.925277); - elementTable.put("Ho", 164.930319); - elementTable.put("Er", 161.928775); - elementTable.put("Tm", 168.934212); - elementTable.put("Yb", 167.933894); - elementTable.put("Lu", 174.940770); - elementTable.put("Hf", 173.940044); - elementTable.put("Ta", 179.947462); - elementTable.put("W", 179.946701); - elementTable.put("Re", 184.952951); - elementTable.put("Os", 183.952488); - elementTable.put("Ir", 190.960584); - elementTable.put("Pt", 189.959917); - elementTable.put("Au", 196.966543); - elementTable.put("Hg", 195.965807); - elementTable.put("Tl", 202.972320); - elementTable.put("Pb", 203.973020); - elementTable.put("Bi", 208.980374); - elementTable.put("Po", 209.0); - elementTable.put("At", 210.0); - elementTable.put("Rn", 222.0); - elementTable.put("Fr", 223.0); - elementTable.put("Ra", 226.025); - // elementTable.put("Ac", 227.028); // conflict with Unimod bricks - elementTable.put("Th", 232.038054); - elementTable.put("Pa", 231.0359); - elementTable.put("U", 234.040946); - elementTable.put("Np", 237.048); - elementTable.put("Pu", 244.0); - elementTable.put("Am", 243.0); - elementTable.put("Cm", 247.0); - elementTable.put("Bk", 247.0); - elementTable.put("Cf", 251.0); - elementTable.put("Es", 252.0); - elementTable.put("Fm", 257.0); - elementTable.put("Md", 258.0); - elementTable.put("No", 259.0); - elementTable.put("Lr", 260.0); - elementTable.put("13C", 13.0033554); - elementTable.put("15N", 15.0001088); - elementTable.put("18O", 17.9991616); - elementTable.put("2H", 2.0141021); - elementTable.put("dHex", elementTable.get("C") * 6 + elementTable.get("O") * 4 + elementTable.get("H") * 10); - elementTable.put("Hep", elementTable.get("C") * 7 + elementTable.get("O") * 6 + elementTable.get("H") * 12); - elementTable.put("Hex", elementTable.get("C") * 6 + elementTable.get("O") * 5 + elementTable.get("H") * 10); - elementTable.put("HexA", elementTable.get("C") * 6 + elementTable.get("O") * 6 + elementTable.get("H") * 8); - elementTable.put("HexN", elementTable.get("C") * 6 + elementTable.get("O") * 4 + elementTable.get("H") * 11 + elementTable.get("N")); - elementTable.put("HexNAc", elementTable.get("C") * 8 + elementTable.get("O") * 5 + + elementTable.get("N") + elementTable.get("H") * 13); - elementTable.put("Kdn", elementTable.get("C") * 9 + elementTable.get("H") * 14 + elementTable.get("O") * 8); - elementTable.put("Kdo", elementTable.get("C") * 8 + elementTable.get("H") * 12 + elementTable.get("O") * 7); - elementTable.put("NeuAc", elementTable.get("C") * 11 + elementTable.get("H") * 17 + elementTable.get("O") * 8 + elementTable.get("N")); - elementTable.put("NeuGc", elementTable.get("C") * 11 + elementTable.get("H") * 17 + elementTable.get("O") * 9 + elementTable.get("N")); - elementTable.put("Pent", elementTable.get("C") * 5 + elementTable.get("O") * 4 + elementTable.get("H") * 8); - elementTable.put("Phos", elementTable.get("O") * 3 + elementTable.get("H") + elementTable.get("P")); - elementTable.put("Sulf", elementTable.get("S") + elementTable.get("O") * 3); - elementTable.put("Water", elementTable.get("H") * 2 + elementTable.get("O")); - elementTable.put("Me", elementTable.get("C") + elementTable.get("H") * 2); - elementTable.put("Ac", elementTable.get("C") * 2 + elementTable.get("H") * 2 + elementTable.get("O")); // Caution! This is not Actinium - - mass_table.put('G', (elementTable.get("C") * 2 + elementTable.get("H") * 3 + elementTable.get("N") + elementTable.get("O") + fix_mod_map.get('G'))); - mass_table.put('A', (elementTable.get("C") * 3 + elementTable.get("H") * 5 + elementTable.get("N") + elementTable.get("O") + fix_mod_map.get('A'))); - mass_table.put('S', (elementTable.get("C") * 3 + elementTable.get("H") * 5 + elementTable.get("N") + elementTable.get("O") * 2 + fix_mod_map.get('S'))); - mass_table.put('P', (elementTable.get("C") * 5 + elementTable.get("H") * 7 + elementTable.get("N") + elementTable.get("O") + fix_mod_map.get('P'))); - mass_table.put('V', (elementTable.get("C") * 5 + elementTable.get("H") * 9 + elementTable.get("N") + elementTable.get("O") + fix_mod_map.get('V'))); - mass_table.put('T', (elementTable.get("C") * 4 + elementTable.get("H") * 7 + elementTable.get("N") + elementTable.get("O") * 2 + fix_mod_map.get('I'))); - mass_table.put('C', (elementTable.get("C") * 3 + elementTable.get("H") * 5 + elementTable.get("N") + elementTable.get("O") + elementTable.get("S") + fix_mod_map.get('C'))); - mass_table.put('I', (elementTable.get("C") * 6 + elementTable.get("H") * 11 + elementTable.get("N") + elementTable.get("O") + fix_mod_map.get('I'))); - mass_table.put('L', (elementTable.get("C") * 6 + elementTable.get("H") * 11 + elementTable.get("N") + elementTable.get("O") + fix_mod_map.get('L'))); - mass_table.put('N', (elementTable.get("C") * 4 + elementTable.get("H") * 6 + elementTable.get("N") * 2 + elementTable.get("O") * 2 + fix_mod_map.get('N'))); - mass_table.put('D', (elementTable.get("C") * 4 + elementTable.get("H") * 5 + elementTable.get("N") + elementTable.get("O") * 3 + fix_mod_map.get('D'))); - mass_table.put('Q', (elementTable.get("C") * 5 + elementTable.get("H") * 8 + elementTable.get("N") * 2 + elementTable.get("O") * 2 + fix_mod_map.get('Q'))); - mass_table.put('K', (elementTable.get("C") * 6 + elementTable.get("H") * 12 + elementTable.get("N") * 2 + elementTable.get("O") + fix_mod_map.get('K'))); - mass_table.put('E', (elementTable.get("C") * 5 + elementTable.get("H") * 7 + elementTable.get("N") + elementTable.get("O") * 3 + fix_mod_map.get('E'))); - mass_table.put('M', (elementTable.get("C") * 5 + elementTable.get("H") * 9 + elementTable.get("N") + elementTable.get("O") + elementTable.get("S") + fix_mod_map.get('M'))); - mass_table.put('H', (elementTable.get("C") * 6 + elementTable.get("H") * 7 + elementTable.get("N") * 3 + elementTable.get("O") + fix_mod_map.get('H'))); - mass_table.put('F', (elementTable.get("C") * 9 + elementTable.get("H") * 9 + elementTable.get("N") + elementTable.get("O") + fix_mod_map.get('F'))); - mass_table.put('R', (elementTable.get("C") * 6 + elementTable.get("H") * 12 + elementTable.get("N") * 4 + elementTable.get("O") + fix_mod_map.get('R'))); - mass_table.put('Y', (elementTable.get("C") * 9 + elementTable.get("H") * 9 + elementTable.get("N") + elementTable.get("O") * 2 + fix_mod_map.get('Y'))); - mass_table.put('W', (elementTable.get("C") * 11 + elementTable.get("H") * 10 + elementTable.get("N") * 2 + elementTable.get("O") + fix_mod_map.get('W'))); - mass_table.put('U', (elementTable.get("C") * 3 + elementTable.get("H") * 7 + elementTable.get("N") + elementTable.get("O") * 2 + elementTable.get("Se") + fix_mod_map.get('U'))); - mass_table.put('O', (elementTable.get("C") * 12 + elementTable.get("H") * 21 + elementTable.get("N") * 3 + elementTable.get("O") * 3 + fix_mod_map.get('O'))); - mass_table.put('n', fix_mod_map.get('n')); - mass_table.put('c', fix_mod_map.get('c')); - H2O = elementTable.get("H") * 2 + elementTable.get("O"); - } - - public int mzToBin(double mz) { - return (int) (mz * inverseMzBinSize + one_minus_bin_offset); - } - - public double calResidueMass(String seq) { // n and c are also AA. - double total_mass = 0; - Matcher matcher = mod_aa_pattern.matcher(seq); - while (matcher.find()) { - char aa = matcher.group(1).charAt(0); - double delta_mass = 0; - if (matcher.group(3) != null) { - delta_mass = Double.valueOf(matcher.group(3)); - } - total_mass += mass_table.get(aa) + delta_mass; - } - - return total_mass; - } - - public Set buildChainSet(String pro_seq, short linker_type) { - Map> digest_range_map = digestTrypsin(pro_seq); - Set chain_seq_set = new HashSet<>(); - - for (int i = 0; i <= missed_cleavage; ++i) { - for (int[] digest_range_1 : digest_range_map.get(i)) { - String sub_string = pro_seq.substring(digest_range_1[0], digest_range_1[1]); - if (linker_type == 1 && sub_string.substring(0, sub_string.length() - 1).contains("K")) { - chain_seq_set.add("n" + sub_string + "c"); - } else if (linker_type == 2 && sub_string.substring(0, sub_string.length() - 1).contains("C")) { - chain_seq_set.add("n" + sub_string + "c"); - } - - if (digest_range_1[1] == pro_seq.length()) { - // This is the end of the protein. No digestion site, so the link-sites in any position including C-term can be linked. - if (linker_type == 1 && sub_string.contains("K")) { - chain_seq_set.add("n" + sub_string + "c"); - } else if (linker_type == 2 && sub_string.contains("C")) { - chain_seq_set.add("n" + sub_string + "c"); - } - } - } - if (linker_type == 1) { - // Add N-term peptide - if (digest_range_map.get(i).size() > 0) { - int[] digest_range = digest_range_map.get(i).get(0); - String sub_string = pro_seq.substring(digest_range[0], digest_range[1]); - chain_seq_set.add("n" + sub_string + "c"); - } - } - } - return chain_seq_set; - } - - public Map getmass_table() { - return mass_table; - } - - public double generateTheoFragmentAndCalXCorr(String seq, short linkSite, double additional_mass, int precursor_charge, SparseVector xcorrPL) { - linkSite = (short) Math.max(1, linkSite); - - int localMaxCharge = Math.min(max_charge, Math.max(precursor_charge - 1, 1)); - double[] inverseChargeArray = new double[localMaxCharge]; - for (int charge = 1; charge <= localMaxCharge; ++charge) { - inverseChargeArray[charge - 1] = (double) 1 / (double) charge; - } - - AA[] aaArray = seqToAAList(seq); - - double xcorr = 0; - - // traverse the sequence to get b-ion - double bIonMass = mass_table.get(aaArray[0].aa) + aaArray[0].delta_mass; // add N-term modification - for (int i = 1; i < aaArray.length - 2; ++i) { - bIonMass += mass_table.get(aaArray[i].aa) + aaArray[i].delta_mass; - if (i < linkSite) { - for (double inverseCharge : inverseChargeArray) { - xcorr += xcorrPL.get(mzToBin(bIonMass * inverseCharge + PROTON)); - } - } else { - for (double inverseCharge : inverseChargeArray) { - xcorr += xcorrPL.get(mzToBin((bIonMass + additional_mass) * inverseCharge + PROTON)); - } - } - } - // calculate the last b-ion with C-term modification - bIonMass += mass_table.get(aaArray[aaArray.length - 2].aa) + aaArray[aaArray.length - 2].delta_mass + mass_table.get(aaArray[aaArray.length - 1].aa) + aaArray[aaArray.length - 1].delta_mass; - for (double inverseCharge : inverseChargeArray) { - xcorr += xcorrPL.get(mzToBin((bIonMass + additional_mass) * inverseCharge + PROTON)); // for the fragment containing all amino acids, the additional mass is always included. - } - - // traverse the sequence with reversed order to get y-ion - // the whole sequence - double yIonMass = bIonMass + H2O; - for (double inverseCharge : inverseChargeArray) { - xcorr += xcorrPL.get(mzToBin((yIonMass + additional_mass) * inverseCharge + PROTON)); // for the fragment containing all amino acids, the additional mass is always included. - } - // delete the first amino acid and N-term modification - yIonMass -= mass_table.get(aaArray[0].aa) + aaArray[0].delta_mass + mass_table.get(aaArray[1].aa) + aaArray[1].delta_mass; - if (1 >= linkSite) { - for (double inverseCharge : inverseChargeArray) { - xcorr += xcorrPL.get(mzToBin(yIonMass * inverseCharge + PROTON)); - } - } else { - for (double inverseCharge : inverseChargeArray) { - xcorr += xcorrPL.get(mzToBin((yIonMass + additional_mass) * inverseCharge + PROTON)); - } - } - // rest of the sequence - for (int i = 2; i < aaArray.length - 2; ++i) { - yIonMass -= mass_table.get(aaArray[i].aa) + aaArray[i].delta_mass; - if (i >= linkSite) { // caution: here, it is different from b-ion - for (double inverseCharge : inverseChargeArray) { - xcorr += xcorrPL.get(mzToBin(yIonMass * inverseCharge + PROTON)); - } - } else { - for (double inverseCharge : inverseChargeArray) { - xcorr += xcorrPL.get(mzToBin((yIonMass + additional_mass) * inverseCharge + PROTON)); - } - } - } - - return xcorr * 0.005; - } - - public static AA[] seqToAAList(String seq) { - Matcher matcher = mod_aa_pattern.matcher(seq); - List temp = new LinkedList<>(); - while (matcher.find()) { - char aa = matcher.group(1).charAt(0); - double delta_mass = 0; - if (matcher.group(3) != null) { - delta_mass = Double.valueOf(matcher.group(3)); - } - temp.add(new AA(aa, delta_mass)); - } - return temp.toArray(new AA[temp.size()]); - } - - Map> digestTrypsin(String pro_seq) { - // Cut a protein - List cut_point_list = new LinkedList<>(); - int length = pro_seq.length(); - Pattern cut_pattern = Pattern.compile("[" + cut_site + "](?![" + protect_site + "])"); - int idx_start = 0; - Matcher match_obj = cut_pattern.matcher(pro_seq); - cut_point_list.add(0); - while (idx_start < length) { - if (match_obj.find()) { - int cut_point = match_obj.end(); - cut_point_list.add(cut_point); - idx_start = cut_point; - } else { - cut_point_list.add(length); - break; - } - } - - Collections.sort(cut_point_list); - - // Deal with missed cleavage - Map> digest_range_map = new HashMap<>(5, 1); - for (int time = 0; time <= missed_cleavage; ++time) { - List temp = new LinkedList<>(); - int left_point; - int right_point; - for (int i = 0; i + 1 + time < cut_point_list.size(); ++i) { - left_point = cut_point_list.get(i); - right_point = cut_point_list.get(i + 1 + time); - temp.add(new int[]{left_point, right_point}); - } - digest_range_map.put(time, temp); - } - - return digest_range_map; - } -} diff --git a/src/main/java/proteomics/Types/AA.java b/src/main/java/proteomics/Types/AA.java deleted file mode 100644 index 33016aa..0000000 --- a/src/main/java/proteomics/Types/AA.java +++ /dev/null @@ -1,35 +0,0 @@ -package proteomics.Types; - -import java.util.Locale; - -public class AA { - - public final char aa; - public final double delta_mass; - private final int hashCode; - - public AA(char aa, double delta_mass) { - this.aa = aa; - this.delta_mass = delta_mass; - String toString; - if (Math.abs(delta_mass) > 1e-6) { - toString = String.format(Locale.US, "%c[%.3f]", aa, delta_mass); - } else { - toString = String.valueOf(aa); - } - hashCode = toString.hashCode(); - } - - public int hashCode() { - return hashCode; - } - - public boolean equals(Object other) { - if (other instanceof AA) { - AA temp = (AA) other; - return temp.toString().contentEquals(this.toString()); - } else { - return false; - } - } -} diff --git a/src/main/java/proteomics/Types/SparseBooleanVector.java b/src/main/java/proteomics/Types/SparseBooleanVector.java index 0485551..a399e31 100644 --- a/src/main/java/proteomics/Types/SparseBooleanVector.java +++ b/src/main/java/proteomics/Types/SparseBooleanVector.java @@ -1,5 +1,6 @@ package proteomics.Types; +import ProteomicsLibrary.Types.SparseVector; import com.google.common.collect.HashMultiset; import com.google.common.collect.Multiset; diff --git a/src/main/java/proteomics/Types/SparseVector.java b/src/main/java/proteomics/Types/SparseVector.java deleted file mode 100644 index 59dd9ae..0000000 --- a/src/main/java/proteomics/Types/SparseVector.java +++ /dev/null @@ -1,97 +0,0 @@ -package proteomics.Types; - -import java.util.*; - -public class SparseVector { - - private Map sparse_vector = new HashMap<>(); - - SparseVector(Map sparse_vector) { - for (int i : sparse_vector.keySet()) { - this.sparse_vector.put(i, sparse_vector.get(i)); - } - } - - public SparseVector() {} - - public void add(int i, double v) { - if (Math.abs(v) > 1e-6) { - if (sparse_vector.containsKey(i)) { - sparse_vector.put(i, sparse_vector.get(i) + v); - } else { - sparse_vector.put(i, v); - } - } - } - - public void put(int i, double v) { - if (Math.abs(v) > 1e-6) { - sparse_vector.put(i, v); - } - } - - public double get(int i) { - if (sparse_vector.containsKey(i)) { - return sparse_vector.get(i); - } else { - return 0; - } - } - - public Set getIdxSet() { - return sparse_vector.keySet(); - } - - double getMaxValue() { - List intensity_list = new ArrayList<>(sparse_vector.values()); - intensity_list.sort(Collections.reverseOrder()); - return intensity_list.get(0); - } - - double getMinValue() { - List intensity_list = new ArrayList<>(sparse_vector.values()); - Collections.sort(intensity_list); - return intensity_list.get(0); - } - - double norm2square() { - double output = 0; - for (double v : sparse_vector.values()) { - output += v * v; - } - return output; - } - - double dot(SparseVector other) { - double output = 0; - Map other_vector = other.sparse_vector; - Set intersectedKeys = new HashSet<>(sparse_vector.keySet()); - intersectedKeys.retainAll(other_vector.keySet()); - for (int i : intersectedKeys) { - output += sparse_vector.get(i) * other_vector.get(i); - } - return output; - } - - Map getVectorMap() { - return sparse_vector; - } - - Set getNonzeroIdx() { - return sparse_vector.keySet(); - } - - public int getNonzeroNum() { - return sparse_vector.size(); - } - - public int getMaxIdx() { - int maxIdx = 0; - for (int idx : sparse_vector.keySet()) { - if (idx > maxIdx) { - maxIdx = idx; - } - } - return(maxIdx); - } -} diff --git a/src/test/java/proteomics/TheoSeq/DbToolTest.java b/src/test/java/proteomics/TheoSeq/DbToolTest.java deleted file mode 100644 index 194ca3a..0000000 --- a/src/test/java/proteomics/TheoSeq/DbToolTest.java +++ /dev/null @@ -1,41 +0,0 @@ -package proteomics.TheoSeq; - -import org.junit.BeforeClass; -import org.junit.Test; - -import java.util.HashMap; -import java.util.Map; - -import static org.junit.Assert.assertEquals; - -public class DbToolTest { - private static DbTool db_tool_obj; - - @BeforeClass - public static void setUp() throws Exception { - db_tool_obj = new DbTool(Thread.currentThread().getContextClassLoader().getResource("test.fasta").getPath(), "UniProt"); - } - - @Test - public void returnSeqMap() throws Exception { - Map pro_seq_map = db_tool_obj.getProSeqMap(); - Map ground_truth = new HashMap<>(); - ground_truth.put("Pro1", "ASRIATAAAASKPSLNKF"); - ground_truth.put("Pro2", "STSVNPKLSKT"); - for (String k : pro_seq_map.keySet()) { - assertEquals(pro_seq_map.get(k), ground_truth.get(k)); - } - } - - @Test - public void returnAnnotateMap() throws Exception { - Map pro_annotate_map = db_tool_obj.getProAnnotateMap(); - Map ground_truth = new HashMap<>(); - ground_truth.put("Pro1", "test protein one"); - ground_truth.put("Pro2", "test protein two"); - ground_truth.put("Pro3", "test protein three"); - for (String k : pro_annotate_map.keySet()) { - assertEquals(pro_annotate_map.get(k), ground_truth.get(k)); - } - } -} \ No newline at end of file diff --git a/src/test/java/proteomics/TheoSeq/MassToolTest.java b/src/test/java/proteomics/TheoSeq/MassToolTest.java deleted file mode 100644 index ff1ec90..0000000 --- a/src/test/java/proteomics/TheoSeq/MassToolTest.java +++ /dev/null @@ -1,180 +0,0 @@ -package proteomics.TheoSeq; - -import org.junit.BeforeClass; -import org.junit.Test; -import proteomics.Types.AA; -import proteomics.Types.SparseBooleanVector; - -import java.util.*; - -import static org.junit.Assert.*; - - -public class MassToolTest { - - private static Map fix_mod_map = new HashMap<>(); - - @BeforeClass - public static void setUp() throws Exception { - fix_mod_map.put('G', 0d); - fix_mod_map.put('A', 0d); - fix_mod_map.put('S', 0d); - fix_mod_map.put('P', 0d); - fix_mod_map.put('V', 0d); - fix_mod_map.put('T', 0d); - fix_mod_map.put('C', 57.02146); - fix_mod_map.put('I', 0d); - fix_mod_map.put('L', 0d); - fix_mod_map.put('N', 0d); - fix_mod_map.put('D', 0d); - fix_mod_map.put('Q', 0d); - fix_mod_map.put('K', 0d); - fix_mod_map.put('E', 0d); - fix_mod_map.put('M', 0d); - fix_mod_map.put('H', 0d); - fix_mod_map.put('F', 0d); - fix_mod_map.put('R', 0d); - fix_mod_map.put('Y', 0d); - fix_mod_map.put('W', 0d); - fix_mod_map.put('U', 0d); - fix_mod_map.put('O', 0d); - fix_mod_map.put('n', 60d); - fix_mod_map.put('c', 10d); - } - - @Test - public void calResidueMass() throws Exception { - MassTool mass_tool_obj = new MassTool(1, fix_mod_map, "KR", "P", 1.0005, 0.6); - assertEquals(2503.1357421875, mass_tool_obj.calResidueMass("nGASPVTCILNDQKEMHFRYWc"), 0.001); - } - - @Test - public void mzToBin() throws Exception { - MassTool mass_tool_obj = new MassTool(1, fix_mod_map, "KR", "P", 1.0005, 0.6); - assertEquals(11, mass_tool_obj.mzToBin(11), 1e-6); - assertEquals(0, mass_tool_obj.mzToBin(0), 1e-6); - assertEquals(-5, mass_tool_obj.mzToBin(-5), 1e-6); - } - - @Test - public void buildChainSet() throws Exception { - // 1 missed-cleavage, N-term linkable - MassTool mass_tool_obj = new MassTool(1, fix_mod_map, "KR", "P", 1.0005, 0.6); - Set result = mass_tool_obj.buildChainSet("MRGFASSASRIATAAAASKPSLNASTSVNPKLSKTMDYMRIFSVFVVTLWIIRVDARVFKTY", (short) 1); - Set ground_truth = new HashSet<>(); - ground_truth.add("nMRc"); - ground_truth.add("nMRGFASSASRc"); - ground_truth.add("nGFASSASRIATAAAASKPSLNASTSVNPKc"); - ground_truth.add("nIATAAAASKPSLNASTSVNPKc"); - ground_truth.add("nIATAAAASKPSLNASTSVNPKLSKc"); - ground_truth.add("nLSKTMDYMRc"); - ground_truth.add("nVFKTYc"); - assertEquals(ground_truth, result); - - // 2 missed-cleavage, N-term linkable - mass_tool_obj = new MassTool(2, fix_mod_map, "KR", "P", 1.0005, 0.6); - result = mass_tool_obj.buildChainSet("MRGFASSASRIATAAAASKPSLNASTSVNPKLSKTMDYMRIFSVFVVTLWIIRVDARVFKTY", (short) 1); - ground_truth = new HashSet<>(); - ground_truth.add("nMRc"); - ground_truth.add("nIATAAAASKPSLNASTSVNPKc"); - ground_truth.add("nMRGFASSASRc"); - ground_truth.add("nGFASSASRIATAAAASKPSLNASTSVNPKc"); - ground_truth.add("nIATAAAASKPSLNASTSVNPKLSKc"); - ground_truth.add("nLSKTMDYMRc"); - ground_truth.add("nVFKTYc"); - ground_truth.add("nMRGFASSASRIATAAAASKPSLNASTSVNPKc"); - ground_truth.add("nGFASSASRIATAAAASKPSLNASTSVNPKLSKc"); - ground_truth.add("nIATAAAASKPSLNASTSVNPKLSKTMDYMRc"); - ground_truth.add("nLSKTMDYMRIFSVFVVTLWIIRc"); - ground_truth.add("nVDARVFKTYc"); - assertEquals(ground_truth, result); - } - - @Test - public void buildTheoVector() throws Exception { // todo: complete - - } - - @Test - public void digestTrypsin() { - // 0 missed cleavage - MassTool mass_tool_obj = new MassTool(0, fix_mod_map, "KR", "P", 1.0005, 0.6); - Map> result = mass_tool_obj.digestTrypsin("FGTRHUYGKPHHYRPHGKHUUG"); - Map> ground_truth = new HashMap<>(); - List temp = new LinkedList<>(); - temp.add(new int[]{0, 4}); - temp.add(new int[]{4, 18}); - temp.add(new int[]{18, 22}); - ground_truth.put(0, temp); - assertEquals(ground_truth.size(), result.size()); - for (int k : result.keySet()) { - for (int i = 0; i < ground_truth.get(k).size(); ++i) { - assertArrayEquals(ground_truth.get(k).get(i), result.get(k).get(i)); - } - } - - result = mass_tool_obj.digestTrypsin("FGTRHUYGKPHHYRPHGKHUUR"); - ground_truth = new HashMap<>(); - temp = new LinkedList<>(); - temp.add(new int[]{0, 4}); - temp.add(new int[]{4, 18}); - temp.add(new int[]{18, 22}); - ground_truth.put(0, temp); - assertEquals(ground_truth.size(), result.size()); - for (int k : result.keySet()) { - for (int i = 0; i < ground_truth.get(k).size(); ++i) { - assertArrayEquals(ground_truth.get(k).get(i), result.get(k).get(i)); - } - } - - // 1 missed cleavage - mass_tool_obj = new MassTool(1, fix_mod_map, "KR", "P", 1.0005, 0.6); - result = mass_tool_obj.digestTrypsin("FGTRHUYGKPHHYRPHGKHUUG"); - ground_truth = new HashMap<>(); - temp = new LinkedList<>(); - temp.add(new int[]{0, 4}); - temp.add(new int[]{4, 18}); - temp.add(new int[]{18, 22}); - ground_truth.put(0, temp); - temp = new LinkedList<>(); - temp.add(new int[]{0, 18}); - ground_truth.put(1, temp); - assertEquals(ground_truth.size(), result.size()); - for (int k : result.keySet()) { - for (int i = 0; i < ground_truth.get(k).size(); ++i) { - assertArrayEquals(ground_truth.get(k).get(i), result.get(k).get(i)); - } - } - - result = mass_tool_obj.digestTrypsin("FGTRHUYGKPHHYRPHGKHUUR"); - ground_truth = new HashMap<>(); - temp = new LinkedList<>(); - temp.add(new int[]{0, 4}); - temp.add(new int[]{4, 18}); - temp.add(new int[]{18, 22}); - ground_truth.put(0, temp); - temp = new LinkedList<>(); - temp.add(new int[]{0, 18}); - ground_truth.put(1, temp); - assertEquals(ground_truth.size(), result.size()); - for (int k : result.keySet()) { - for (int i = 0; i < ground_truth.get(k).size(); ++i) { - assertArrayEquals(ground_truth.get(k).get(i), result.get(k).get(i)); - } - } - } - - @Test - public void seqToAAList() { - MassTool mass_tool_obj = new MassTool(1, fix_mod_map, "KR", "P", 1.0005, 0.6); - String seq = "nGHUKc"; - AA[] result = MassTool.seqToAAList(seq); - AA[] ground_truth = new AA[]{new AA('n', 0), new AA('G', 0), new AA('H', 0), new AA('U', 0), new AA('K', 0), new AA('c', 0)}; - assertArrayEquals(ground_truth, result); - - seq = "nGH[3.02]UKc"; - result = MassTool.seqToAAList(seq); - ground_truth = new AA[]{new AA('n', 0), new AA('G', 0), new AA('H', 3.02), new AA('U', 0), new AA('K', 0), new AA('c', 0)}; - assertArrayEquals(ground_truth, result); - } -} \ No newline at end of file diff --git a/src/test/java/proteomics/Types/SparseBooleanVectorTest.java b/src/test/java/proteomics/Types/SparseBooleanVectorTest.java index 91fbac5..a412bc9 100644 --- a/src/test/java/proteomics/Types/SparseBooleanVectorTest.java +++ b/src/test/java/proteomics/Types/SparseBooleanVectorTest.java @@ -4,9 +4,7 @@ import com.google.common.collect.Multiset; import org.junit.Before; import org.junit.Test; - -import java.util.HashSet; -import java.util.Set; +import ProteomicsLibrary.Types.*; import static org.junit.Assert.*; diff --git a/src/test/java/proteomics/Types/SparseVectorTest.java b/src/test/java/proteomics/Types/SparseVectorTest.java index fd7803d..d1ee341 100644 --- a/src/test/java/proteomics/Types/SparseVectorTest.java +++ b/src/test/java/proteomics/Types/SparseVectorTest.java @@ -3,6 +3,7 @@ import org.junit.Before; import org.junit.Test; +import ProteomicsLibrary.Types.SparseVector; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -48,7 +49,7 @@ public void get() throws Exception { @Test public void idxSet() throws Exception { - Set result = vector.getIdxSet(); + Set result = vector.getNonzeroIdx(); Set ground_truth = new HashSet<>(); ground_truth.add(1); ground_truth.add(3); @@ -98,9 +99,4 @@ public void getNonzeroIdx() throws Exception { ground_truth.add(11); assertEquals(ground_truth, vector.getNonzeroIdx()); } - - @Test - public void getNonzeroNum() throws Exception { - assertEquals(3, vector.getNonzeroNum()); - } } \ No newline at end of file