diff --git a/src/main/java/proteomics/Index/BuildIndex.java b/src/main/java/proteomics/Index/BuildIndex.java index d0d8c25..0d0c143 100644 --- a/src/main/java/proteomics/Index/BuildIndex.java +++ b/src/main/java/proteomics/Index/BuildIndex.java @@ -99,7 +99,7 @@ public BuildIndex(Map parameter_map) throws Exception { } // define a new MassTool object - mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, parameter_map.get("cleavage_site").trim(), parameter_map.get("protection_site").trim(), parameter_map.get("is_from_C_term").trim().contentEquals("1"), mz_bin_size * 0.5, one_minus_bin_offset, "N14"); + mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, parameter_map.get("cleavage_site_1").trim(), parameter_map.get("protection_site_1").trim(), parameter_map.get("is_from_C_term_1").trim().contentEquals("1"), parameter_map.getOrDefault("cleavage_site_2", null), parameter_map.getOrDefault("protection_site_2", null), parameter_map.containsKey("is_from_C_term_2") ? parameter_map.get("is_from_C_term_2").trim().contentEquals("1") : null, mz_bin_size * 0.5, one_minus_bin_offset, "N14"); // generate seq_pro_map Map seq_term_map = new HashMap<>(); @@ -139,7 +139,7 @@ public BuildIndex(Map parameter_map) throws Exception { boolean proteinCTerm = seq_term_map.get(seq)[1]; // mod free - Set linkSiteSet = getLinkSiteSet(seq, parameter_map.get("is_from_C_term").trim().contentEquals("1"), parameter_map.get("cleavage_site"), proteinNTerm, proteinCTerm, linker_type); + Set linkSiteSet = getLinkSiteSet(seq, parameter_map.get("is_from_C_term_1").trim().contentEquals("1"), parameter_map.get("cleavage_site_1"), parameter_map.containsKey("is_from_C_term_2") ? parameter_map.get("is_from_C_term_2").trim().contentEquals("1") : null, parameter_map.getOrDefault("cleavage_site_2", null), proteinNTerm, proteinCTerm, linker_type); if (!linkSiteSet.isEmpty()) { double totalMass = (mass_tool_obj.calResidueMass(seq) + mass_tool_obj.H2O); int bin = massToBin(totalMass); @@ -548,28 +548,52 @@ private Set checkKCTermMod(Set varSeqSet) { // eliminate those s } } - private Set getLinkSiteSet(String seq, boolean is_from_C_term, String cleavage_site, boolean n_term, boolean c_term, short linker_type) { + private Set getLinkSiteSet(String seq, boolean is_from_C_term_1, String cleavage_site_1, Boolean is_from_C_term_2, String cleavage_site_2, boolean n_term, boolean c_term, short linker_type) { AA[] aa_list = MassTool.seqToAAList(seq); Set output = new HashSet<>(5, 1); for (int i = 1; i < aa_list.length - 1; ++i) { if (linker_type == 1 && aa_list[i].aa == 'K' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) { - if (is_from_C_term) { - if (i < aa_list.length - 2 || !cleavage_site.contains("K")) { + if (is_from_C_term_2 == null) { + if (is_from_C_term_1 && (i < aa_list.length - 2 || !cleavage_site_1.contains("K"))) { + output.add((short) i); + } else if (!is_from_C_term_1 && (i > 1 || !cleavage_site_1.contains("K"))) { output.add((short) i); } } else { - if (i > 1 || !cleavage_site.contains("K")) { - output.add((short) i); + if (is_from_C_term_1 && (i < aa_list.length - 2 || !cleavage_site_1.contains("K"))) { + if (is_from_C_term_2 && (i < aa_list.length - 2 || !cleavage_site_2.contains("K"))) { + output.add((short) i); + } else if (!is_from_C_term_2 && (i > 1 || !cleavage_site_2.contains("K"))) { + output.add((short) i); + } + } else if (!is_from_C_term_1 && (i > 1 || !cleavage_site_1.contains("K"))) { + if (is_from_C_term_2 && (i < aa_list.length - 2 || !cleavage_site_2.contains("K"))) { + output.add((short) i); + } else if (!is_from_C_term_2 && (i > 1 || !cleavage_site_2.contains("K"))) { + output.add((short) i); + } } } } else if (linker_type == 2 && aa_list[i].aa == 'C' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) { - if (is_from_C_term) { - if (i < aa_list.length - 2 || !cleavage_site.contains("C")) { + if (is_from_C_term_2 == null) { + if (is_from_C_term_1 && (i < aa_list.length - 2 || !cleavage_site_1.contains("C"))) { + output.add((short) i); + } else if (!is_from_C_term_1 && (i > 1 || !cleavage_site_1.contains("C"))) { output.add((short) i); } } else { - if (i > 1 || !cleavage_site.contains("C")) { - output.add((short) i); + if (is_from_C_term_1 && (i < aa_list.length - 2 || !cleavage_site_1.contains("C"))) { + if (is_from_C_term_2 && (i < aa_list.length - 2 || !cleavage_site_2.contains("C"))) { + output.add((short) i); + } else if (!is_from_C_term_2 && (i > 1 || !cleavage_site_2.contains("C"))) { + output.add((short) i); + } + } else if (!is_from_C_term_1 && (i > 1 || !cleavage_site_1.contains("C"))) { + if (is_from_C_term_2 && (i < aa_list.length - 2 || !cleavage_site_2.contains("C"))) { + output.add((short) i); + } else if (!is_from_C_term_2 && (i > 1 || !cleavage_site_2.contains("C"))) { + output.add((short) i); + } } } } diff --git a/src/main/java/proteomics/Parameter/Parameter.java b/src/main/java/proteomics/Parameter/Parameter.java index d476cab..bd8129f 100644 --- a/src/main/java/proteomics/Parameter/Parameter.java +++ b/src/main/java/proteomics/Parameter/Parameter.java @@ -41,10 +41,17 @@ public Parameter(String parameterFile) throws Exception { } else { Matcher enzymeMatcher = enzymePattern.matcher(line); if (enzymeMatcher.matches()) { - parameterMap.put("enzyme_name", enzymeMatcher.group(1).trim()); - parameterMap.put("is_from_C_term", enzymeMatcher.group(2).trim()); - parameterMap.put("cleavage_site", enzymeMatcher.group(3).trim()); - parameterMap.put("protection_site", enzymeMatcher.group(4).trim()); + if (parameterMap.containsKey("enzyme_name_1")) { + parameterMap.put("enzyme_name_2", enzymeMatcher.group(1).trim()); + parameterMap.put("is_from_C_term_2", enzymeMatcher.group(2).trim()); + parameterMap.put("cleavage_site_2", enzymeMatcher.group(3).trim()); + parameterMap.put("protection_site_2", enzymeMatcher.group(4).trim()); + } else { + parameterMap.put("enzyme_name_1", enzymeMatcher.group(1).trim()); + parameterMap.put("is_from_C_term_1", enzymeMatcher.group(2).trim()); + parameterMap.put("cleavage_site_1", enzymeMatcher.group(3).trim()); + parameterMap.put("protection_site_1", enzymeMatcher.group(4).trim()); + } } } } diff --git a/src/main/resources/parameter.def b/src/main/resources/parameter.def index 0368347..ae3aeb1 100644 --- a/src/main/resources/parameter.def +++ b/src/main/resources/parameter.def @@ -66,7 +66,7 @@ O = 0 n = 0 c = 0 -# Enzyme digestion specifications. Only support one enzyme at a time. +# Enzyme digestion specifications. Support up to two enzymes simultaneously. # enzyme name is cut from C-term cleavage site protection site Trypsin 1 KR P # Trypsin/P 1 KR -