From 58b5a7614ce27a57f87da31c1708ef2b5901e0f4 Mon Sep 17 00:00:00 2001 From: Fengchao Date: Thu, 7 Jun 2018 20:23:26 +0800 Subject: [PATCH] Support more enzymes. --- .../java/proteomics/Index/BuildIndex.java | 28 +++++++++++++++---- src/main/resources/parameter.def | 12 ++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/main/java/proteomics/Index/BuildIndex.java b/src/main/java/proteomics/Index/BuildIndex.java index e3e38f7..d0d8c25 100644 --- a/src/main/java/proteomics/Index/BuildIndex.java +++ b/src/main/java/proteomics/Index/BuildIndex.java @@ -99,7 +99,7 @@ public BuildIndex(Map parameter_map) throws Exception { } // define a new MassTool object - mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, "KR", "P", true, mz_bin_size * 0.5, one_minus_bin_offset, "N14"); + mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, parameter_map.get("cleavage_site").trim(), parameter_map.get("protection_site").trim(), parameter_map.get("is_from_C_term").trim().contentEquals("1"), mz_bin_size * 0.5, one_minus_bin_offset, "N14"); // generate seq_pro_map Map seq_term_map = new HashMap<>(); @@ -139,7 +139,7 @@ public BuildIndex(Map parameter_map) throws Exception { boolean proteinCTerm = seq_term_map.get(seq)[1]; // mod free - Set linkSiteSet = getLinkSiteSet(seq, proteinNTerm, proteinCTerm, linker_type); + Set linkSiteSet = getLinkSiteSet(seq, parameter_map.get("is_from_C_term").trim().contentEquals("1"), parameter_map.get("cleavage_site"), proteinNTerm, proteinCTerm, linker_type); if (!linkSiteSet.isEmpty()) { double totalMass = (mass_tool_obj.calResidueMass(seq) + mass_tool_obj.H2O); int bin = massToBin(totalMass); @@ -548,14 +548,30 @@ private Set checkKCTermMod(Set varSeqSet) { // eliminate those s } } - private Set getLinkSiteSet(String seq, boolean n_term, boolean c_term, short linker_type) { + private Set getLinkSiteSet(String seq, boolean is_from_C_term, String cleavage_site, boolean n_term, boolean c_term, short linker_type) { AA[] aa_list = MassTool.seqToAAList(seq); Set output = new HashSet<>(5, 1); - for (int i = 1; i < aa_list.length - 2; ++i) { + for (int i = 1; i < aa_list.length - 1; ++i) { if (linker_type == 1 && aa_list[i].aa == 'K' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) { - output.add((short) i); + if (is_from_C_term) { + if (i < aa_list.length - 2 || !cleavage_site.contains("K")) { + output.add((short) i); + } + } else { + if (i > 1 || !cleavage_site.contains("K")) { + output.add((short) i); + } + } } else if (linker_type == 2 && aa_list[i].aa == 'C' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) { - output.add((short) i); + if (is_from_C_term) { + if (i < aa_list.length - 2 || !cleavage_site.contains("C")) { + output.add((short) i); + } + } else { + if (i > 1 || !cleavage_site.contains("C")) { + output.add((short) i); + } + } } } if (linker_type == 1 && n_term && !output.contains((short) 1) && (Math.abs(aa_list[0].ptmDeltaMass) < varModMassResolution)) { diff --git a/src/main/resources/parameter.def b/src/main/resources/parameter.def index c4de686..5051d0d 100644 --- a/src/main/resources/parameter.def +++ b/src/main/resources/parameter.def @@ -66,6 +66,18 @@ O = 0 n = 0 c = 0 +# Enzyme digestion specificities. Only support one enzyme at a time. +# enzyme name is cut from C-term cleavage site protection site +Trypsin 1 KR P +# Trypsin/P 1 KR - +# TrypsinR 1 R P +# LysC 1 K P +# ArgC 1 R P +# Chymotrypsin 1 FYW P +# GluC 1 DE P +# LysN 0 K - +# AspN 0 D - + # Advanced parameters. # Don't change them unless necessary. single_chain_t = 0 # Single chain score threshold.