From b7ef3855df18090477774da0d4ea658980d38f7b Mon Sep 17 00:00:00 2001 From: Fengchao Date: Tue, 30 Jan 2018 21:52:33 +0800 Subject: [PATCH] Delete min_precursor_mass and max_precursor_mass parameters. Delete a unnecessary filter. --- .../java/proteomics/Index/BuildIndex.java | 51 +++++++++---------- .../java/proteomics/Spectrum/PreSpectra.java | 17 ------- src/main/resources/parameter.def | 2 - 3 files changed, 23 insertions(+), 47 deletions(-) diff --git a/src/main/java/proteomics/Index/BuildIndex.java b/src/main/java/proteomics/Index/BuildIndex.java index 18da94e..9ceec21 100644 --- a/src/main/java/proteomics/Index/BuildIndex.java +++ b/src/main/java/proteomics/Index/BuildIndex.java @@ -39,7 +39,6 @@ public BuildIndex(Map parameter_map) throws IOException { int missed_cleavage = Integer.valueOf(parameter_map.get("missed_cleavage")); float mz_bin_size = Float.valueOf(parameter_map.get("mz_bin_size")); float one_minus_bin_offset = 1 - Float.valueOf(parameter_map.get("mz_bin_offset")); - float max_precursor_mass = Float.valueOf(parameter_map.get("max_precursor_mass")); ms1_bin_size = Float.valueOf(parameter_map.get("ms1_bin_size")); inverseMs1BinSize = 1 / ms1_bin_size; @@ -146,18 +145,16 @@ public BuildIndex(Map parameter_map) throws IOException { Set linkSiteSet = getLinkSiteSet(seq, proteinNTerm, proteinCTerm, linker_type); if (!linkSiteSet.isEmpty()) { float totalMass = (float) (mass_tool_obj.calResidueMass(seq) + MassTool.H2O); - if (totalMass < max_precursor_mass - linker_mass) { - int bin = massToBin(totalMass); - if (bin_seq_map.containsKey(bin)) { - bin_seq_map.get(bin).add(seq); - } else { - Set temp = new HashSet<>(); - temp.add(seq); - bin_seq_map.put(bin, temp); - } - ChainEntry chainEntry = new ChainEntry(seq, totalMass, linkSiteSet, proteinNTerm, proteinCTerm, "0".hashCode()); - seq_entry_map.put(seq, chainEntry); + int bin = massToBin(totalMass); + if (bin_seq_map.containsKey(bin)) { + bin_seq_map.get(bin).add(seq); + } else { + Set temp = new HashSet<>(); + temp.add(seq); + bin_seq_map.put(bin, temp); } + ChainEntry chainEntry = new ChainEntry(seq, totalMass, linkSiteSet, proteinNTerm, proteinCTerm, "0".hashCode()); + seq_entry_map.put(seq, chainEntry); } // mod containing @@ -167,24 +164,22 @@ public BuildIndex(Map parameter_map) throws IOException { linkSiteSet.add(varSeq.linkSite); if (!linkSiteSet.isEmpty()) { float totalMass = (float) (mass_tool_obj.calResidueMass(varSeq.seq) + MassTool.H2O); - if (totalMass < max_precursor_mass - linker_mass) { - int bin = massToBin(totalMass); - if (bin_seq_map.containsKey(bin)) { - bin_seq_map.get(bin).add(varSeq.seq); - } else { - Set temp = new HashSet<>(); - temp.add(varSeq.seq); - bin_seq_map.put(bin, temp); - } - ChainEntry chainEntry = new ChainEntry(varSeq.seq, totalMass, linkSiteSet, proteinNTerm, proteinCTerm, varSeq.binaryModType); - if (seq_entry_map.containsKey(varSeq.seq)) { - // Binary mod has the higher priority - if (seq_entry_map.get(varSeq.seq).binaryModType == "0".hashCode()) { - seq_entry_map.put(varSeq.seq, chainEntry); - } - } else { + int bin = massToBin(totalMass); + if (bin_seq_map.containsKey(bin)) { + bin_seq_map.get(bin).add(varSeq.seq); + } else { + Set temp = new HashSet<>(); + temp.add(varSeq.seq); + bin_seq_map.put(bin, temp); + } + ChainEntry chainEntry = new ChainEntry(varSeq.seq, totalMass, linkSiteSet, proteinNTerm, proteinCTerm, varSeq.binaryModType); + if (seq_entry_map.containsKey(varSeq.seq)) { + // Binary mod has the higher priority + if (seq_entry_map.get(varSeq.seq).binaryModType == "0".hashCode()) { seq_entry_map.put(varSeq.seq, chainEntry); } + } else { + seq_entry_map.put(varSeq.seq, chainEntry); } } } diff --git a/src/main/java/proteomics/Spectrum/PreSpectra.java b/src/main/java/proteomics/Spectrum/PreSpectra.java index fc1c121..5fded7c 100644 --- a/src/main/java/proteomics/Spectrum/PreSpectra.java +++ b/src/main/java/proteomics/Spectrum/PreSpectra.java @@ -29,9 +29,6 @@ public class PreSpectra { private Set debug_scan_num_set = new HashSet<>(); public PreSpectra(JMzReader spectra_parser, BuildIndex build_index_obj, Map parameter_map, String ext) throws MzXMLParsingException, IOException { - float min_precursor_mass = Float.valueOf(parameter_map.get("min_precursor_mass")); - float max_precursor_mass = Float.valueOf(parameter_map.get("max_precursor_mass")); - // In DEBUG mode, filter out unlisted scan num if (ECL2.debug) { for (String k : parameter_map.keySet()) { @@ -62,22 +59,8 @@ public PreSpectra(JMzReader spectra_parser, BuildIndex build_index_obj, Map max_precursor_mass) || (precursor_mass < min_precursor_mass)) { - continue; - } - Map raw_mz_intensity_map = spectrum.getPeakList(); - int peakCount = 0; - for (double intensity : raw_mz_intensity_map.values()) { - if (intensity > 1e-6) { - ++peakCount; - } - } - if (peakCount < 10) { - continue; - } - if (ECL2.debug) { BufferedWriter writer = new BufferedWriter(new FileWriter(Integer.valueOf(spectrum.getId()) + ".raw.spectrum.csv")); writer.write("mz,intensity\n"); diff --git a/src/main/resources/parameter.def b/src/main/resources/parameter.def index 6022cd2..a93f7c7 100644 --- a/src/main/resources/parameter.def +++ b/src/main/resources/parameter.def @@ -8,8 +8,6 @@ dev = 0 # Output development information. db = small+random50.fasta # The protein database. database_type = UniProt # Different types have different fasta header patterns. Available values: UniProt, SwissProt, TAIR, Others missed_cleavage = 2 # Maximum number of allowed missed cleavage. -min_precursor_mass = 1000 -max_precursor_mass = 12000 min_chain_length = 5 # Minimum length of a peptide chain. max_chain_length = 50