Skip to content

Commit

Permalink
Support more enzymes.
Browse files Browse the repository at this point in the history
  • Loading branch information
fcyu committed Jun 7, 2018
1 parent b0b3876 commit 58b5a76
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 6 deletions.
28 changes: 22 additions & 6 deletions src/main/java/proteomics/Index/BuildIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ public BuildIndex(Map<String, String> parameter_map) throws Exception {
}

// define a new MassTool object
mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, "KR", "P", true, mz_bin_size * 0.5, one_minus_bin_offset, "N14");
mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, parameter_map.get("cleavage_site").trim(), parameter_map.get("protection_site").trim(), parameter_map.get("is_from_C_term").trim().contentEquals("1"), mz_bin_size * 0.5, one_minus_bin_offset, "N14");

// generate seq_pro_map
Map<String, boolean[]> seq_term_map = new HashMap<>();
Expand Down Expand Up @@ -139,7 +139,7 @@ public BuildIndex(Map<String, String> parameter_map) throws Exception {
boolean proteinCTerm = seq_term_map.get(seq)[1];

// mod free
Set<Short> linkSiteSet = getLinkSiteSet(seq, proteinNTerm, proteinCTerm, linker_type);
Set<Short> linkSiteSet = getLinkSiteSet(seq, parameter_map.get("is_from_C_term").trim().contentEquals("1"), parameter_map.get("cleavage_site"), proteinNTerm, proteinCTerm, linker_type);
if (!linkSiteSet.isEmpty()) {
double totalMass = (mass_tool_obj.calResidueMass(seq) + mass_tool_obj.H2O);
int bin = massToBin(totalMass);
Expand Down Expand Up @@ -548,14 +548,30 @@ private Set<String> checkKCTermMod(Set<String> varSeqSet) { // eliminate those s
}
}

private Set<Short> getLinkSiteSet(String seq, boolean n_term, boolean c_term, short linker_type) {
private Set<Short> getLinkSiteSet(String seq, boolean is_from_C_term, String cleavage_site, boolean n_term, boolean c_term, short linker_type) {
AA[] aa_list = MassTool.seqToAAList(seq);
Set<Short> output = new HashSet<>(5, 1);
for (int i = 1; i < aa_list.length - 2; ++i) {
for (int i = 1; i < aa_list.length - 1; ++i) {
if (linker_type == 1 && aa_list[i].aa == 'K' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) {
output.add((short) i);
if (is_from_C_term) {
if (i < aa_list.length - 2 || !cleavage_site.contains("K")) {
output.add((short) i);
}
} else {
if (i > 1 || !cleavage_site.contains("K")) {
output.add((short) i);
}
}
} else if (linker_type == 2 && aa_list[i].aa == 'C' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) {
output.add((short) i);
if (is_from_C_term) {
if (i < aa_list.length - 2 || !cleavage_site.contains("C")) {
output.add((short) i);
}
} else {
if (i > 1 || !cleavage_site.contains("C")) {
output.add((short) i);
}
}
}
}
if (linker_type == 1 && n_term && !output.contains((short) 1) && (Math.abs(aa_list[0].ptmDeltaMass) < varModMassResolution)) {
Expand Down
12 changes: 12 additions & 0 deletions src/main/resources/parameter.def
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,18 @@ O = 0
n = 0
c = 0

# Enzyme digestion specificities. Only support one enzyme at a time.
# enzyme name is cut from C-term cleavage site protection site
Trypsin 1 KR P
# Trypsin/P 1 KR -
# TrypsinR 1 R P
# LysC 1 K P
# ArgC 1 R P
# Chymotrypsin 1 FYW P
# GluC 1 DE P
# LysN 0 K -
# AspN 0 D -

# Advanced parameters.
# Don't change them unless necessary.
single_chain_t = 0 # Single chain score threshold.
Expand Down

0 comments on commit 58b5a76

Please sign in to comment.