Skip to content

Commit

Permalink
Support up to two enzymes simultaneously.
Browse files Browse the repository at this point in the history
  • Loading branch information
fcyu committed Jul 4, 2018
1 parent 17bb260 commit 62947bb
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 16 deletions.
46 changes: 35 additions & 11 deletions src/main/java/proteomics/Index/BuildIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ public BuildIndex(Map<String, String> parameter_map) throws Exception {
}

// define a new MassTool object
mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, parameter_map.get("cleavage_site").trim(), parameter_map.get("protection_site").trim(), parameter_map.get("is_from_C_term").trim().contentEquals("1"), mz_bin_size * 0.5, one_minus_bin_offset, "N14");
mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, parameter_map.get("cleavage_site_1").trim(), parameter_map.get("protection_site_1").trim(), parameter_map.get("is_from_C_term_1").trim().contentEquals("1"), parameter_map.getOrDefault("cleavage_site_2", null), parameter_map.getOrDefault("protection_site_2", null), parameter_map.containsKey("is_from_C_term_2") ? parameter_map.get("is_from_C_term_2").trim().contentEquals("1") : null, mz_bin_size * 0.5, one_minus_bin_offset, "N14");

// generate seq_pro_map
Map<String, boolean[]> seq_term_map = new HashMap<>();
Expand Down Expand Up @@ -139,7 +139,7 @@ public BuildIndex(Map<String, String> parameter_map) throws Exception {
boolean proteinCTerm = seq_term_map.get(seq)[1];

// mod free
Set<Short> linkSiteSet = getLinkSiteSet(seq, parameter_map.get("is_from_C_term").trim().contentEquals("1"), parameter_map.get("cleavage_site"), proteinNTerm, proteinCTerm, linker_type);
Set<Short> linkSiteSet = getLinkSiteSet(seq, parameter_map.get("is_from_C_term_1").trim().contentEquals("1"), parameter_map.get("cleavage_site_1"), parameter_map.containsKey("is_from_C_term_2") ? parameter_map.get("is_from_C_term_2").trim().contentEquals("1") : null, parameter_map.getOrDefault("cleavage_site_2", null), proteinNTerm, proteinCTerm, linker_type);
if (!linkSiteSet.isEmpty()) {
double totalMass = (mass_tool_obj.calResidueMass(seq) + mass_tool_obj.H2O);
int bin = massToBin(totalMass);
Expand Down Expand Up @@ -548,28 +548,52 @@ private Set<String> checkKCTermMod(Set<String> varSeqSet) { // eliminate those s
}
}

private Set<Short> getLinkSiteSet(String seq, boolean is_from_C_term, String cleavage_site, boolean n_term, boolean c_term, short linker_type) {
private Set<Short> getLinkSiteSet(String seq, boolean is_from_C_term_1, String cleavage_site_1, Boolean is_from_C_term_2, String cleavage_site_2, boolean n_term, boolean c_term, short linker_type) {
AA[] aa_list = MassTool.seqToAAList(seq);
Set<Short> output = new HashSet<>(5, 1);
for (int i = 1; i < aa_list.length - 1; ++i) {
if (linker_type == 1 && aa_list[i].aa == 'K' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) {
if (is_from_C_term) {
if (i < aa_list.length - 2 || !cleavage_site.contains("K")) {
if (is_from_C_term_2 == null) {
if (is_from_C_term_1 && (i < aa_list.length - 2 || !cleavage_site_1.contains("K"))) {
output.add((short) i);
} else if (!is_from_C_term_1 && (i > 1 || !cleavage_site_1.contains("K"))) {
output.add((short) i);
}
} else {
if (i > 1 || !cleavage_site.contains("K")) {
output.add((short) i);
if (is_from_C_term_1 && (i < aa_list.length - 2 || !cleavage_site_1.contains("K"))) {
if (is_from_C_term_2 && (i < aa_list.length - 2 || !cleavage_site_2.contains("K"))) {
output.add((short) i);
} else if (!is_from_C_term_2 && (i > 1 || !cleavage_site_2.contains("K"))) {
output.add((short) i);
}
} else if (!is_from_C_term_1 && (i > 1 || !cleavage_site_1.contains("K"))) {
if (is_from_C_term_2 && (i < aa_list.length - 2 || !cleavage_site_2.contains("K"))) {
output.add((short) i);
} else if (!is_from_C_term_2 && (i > 1 || !cleavage_site_2.contains("K"))) {
output.add((short) i);
}
}
}
} else if (linker_type == 2 && aa_list[i].aa == 'C' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) {
if (is_from_C_term) {
if (i < aa_list.length - 2 || !cleavage_site.contains("C")) {
if (is_from_C_term_2 == null) {
if (is_from_C_term_1 && (i < aa_list.length - 2 || !cleavage_site_1.contains("C"))) {
output.add((short) i);
} else if (!is_from_C_term_1 && (i > 1 || !cleavage_site_1.contains("C"))) {
output.add((short) i);
}
} else {
if (i > 1 || !cleavage_site.contains("C")) {
output.add((short) i);
if (is_from_C_term_1 && (i < aa_list.length - 2 || !cleavage_site_1.contains("C"))) {
if (is_from_C_term_2 && (i < aa_list.length - 2 || !cleavage_site_2.contains("C"))) {
output.add((short) i);
} else if (!is_from_C_term_2 && (i > 1 || !cleavage_site_2.contains("C"))) {
output.add((short) i);
}
} else if (!is_from_C_term_1 && (i > 1 || !cleavage_site_1.contains("C"))) {
if (is_from_C_term_2 && (i < aa_list.length - 2 || !cleavage_site_2.contains("C"))) {
output.add((short) i);
} else if (!is_from_C_term_2 && (i > 1 || !cleavage_site_2.contains("C"))) {
output.add((short) i);
}
}
}
}
Expand Down
15 changes: 11 additions & 4 deletions src/main/java/proteomics/Parameter/Parameter.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,17 @@ public Parameter(String parameterFile) throws Exception {
} else {
Matcher enzymeMatcher = enzymePattern.matcher(line);
if (enzymeMatcher.matches()) {
parameterMap.put("enzyme_name", enzymeMatcher.group(1).trim());
parameterMap.put("is_from_C_term", enzymeMatcher.group(2).trim());
parameterMap.put("cleavage_site", enzymeMatcher.group(3).trim());
parameterMap.put("protection_site", enzymeMatcher.group(4).trim());
if (parameterMap.containsKey("enzyme_name_1")) {
parameterMap.put("enzyme_name_2", enzymeMatcher.group(1).trim());
parameterMap.put("is_from_C_term_2", enzymeMatcher.group(2).trim());
parameterMap.put("cleavage_site_2", enzymeMatcher.group(3).trim());
parameterMap.put("protection_site_2", enzymeMatcher.group(4).trim());
} else {
parameterMap.put("enzyme_name_1", enzymeMatcher.group(1).trim());
parameterMap.put("is_from_C_term_1", enzymeMatcher.group(2).trim());
parameterMap.put("cleavage_site_1", enzymeMatcher.group(3).trim());
parameterMap.put("protection_site_1", enzymeMatcher.group(4).trim());
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/parameter.def
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ O = 0
n = 0
c = 0

# Enzyme digestion specifications. Only support one enzyme at a time.
# Enzyme digestion specifications. Support up to two enzymes simultaneously.
# enzyme name is cut from C-term cleavage site protection site
Trypsin 1 KR P
# Trypsin/P 1 KR -
Expand Down

0 comments on commit 62947bb

Please sign in to comment.