Skip to content

Commit

Permalink
Support more cross-linkers.
Browse files Browse the repository at this point in the history
  • Loading branch information
fcyu committed Jul 24, 2017
1 parent 9026575 commit bbf133d
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 24 deletions.
37 changes: 26 additions & 11 deletions src/main/java/proteomics/Index/BuildIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public class BuildIndex {
private static final float varModMassResolution = 0.01f;

public final float linker_mass;
public final short linker_type;

private final MassTool mass_tool_obj;
private final Map<String, String> pro_annotate_map;
Expand Down Expand Up @@ -71,13 +72,25 @@ public BuildIndex(Map<String, String> parameter_map) {
fix_mod_map.put('n', Float.valueOf(parameter_map.get("n")));
fix_mod_map.put('c', Float.valueOf(parameter_map.get("c")));

if (Math.abs(fix_mod_map.get('K') - fix_mod_map.get('n')) > 1e-6) { // todo improve
logger.error("K and N-term have different fixed modification. Exit.");
linker_type = Short.valueOf(parameter_map.get("cl_type"));

// check the cross-linker and the fix modification
if (linker_type == 1) {
if (Math.abs(fix_mod_map.get('K') - fix_mod_map.get('n')) > 1e-6) {
linker_mass = 0;
logger.error("The link sites have different fix modifications.");
System.exit(1);
} else {
linker_mass = Float.valueOf(parameter_map.get("cl_mass")) - fix_mod_map.get('K');
}
} else if (linker_type == 2) {
linker_mass = Float.valueOf(parameter_map.get("cl_mass")) - fix_mod_map.get('C');
} else {
linker_mass = 0;
logger.error("The cross-linker type cannot be recognized.");
System.exit(1);
}

linker_mass = Float.valueOf(parameter_map.get("cl_mass")) - 2 * fix_mod_map.get('K'); // todo improve

// read protein database
DbTool db_tool_obj = new DbTool(db_path);
Map<String, String> pro_seq_map = db_tool_obj.getProSeqMap();
Expand Down Expand Up @@ -124,7 +137,7 @@ public BuildIndex(Map<String, String> parameter_map) {
boolean proteinCTerm = seq_term_map.get(seq)[1];

// mod free
Set<Short> linkSiteSet = getLinkSiteSet(seq, proteinNTerm, proteinCTerm);
Set<Short> linkSiteSet = getLinkSiteSet(seq, proteinNTerm, proteinCTerm, linker_type);
if (!linkSiteSet.isEmpty()) {
float totalMass = (float) (mass_tool_obj.calResidueMass(seq) + MassTool.H2O);
if (totalMass < max_precursor_mass - linker_mass) {
Expand Down Expand Up @@ -226,7 +239,7 @@ private Map<String, Set<String>> buildSeqProMap(Map<String, String> pro_seq_map,
Set<String> for_check_duplicate = new HashSet<>();
for (String pro_id : pro_seq_map.keySet()) {
String pro_seq = pro_seq_map.get(pro_id);
Set<String> seq_set = mass_tool_obj.buildChainSet(pro_seq);
Set<String> seq_set = mass_tool_obj.buildChainSet(pro_seq, linker_type);
for (String target_seq : seq_set) {
if ((target_seq.length() >= min_chain_length) && (target_seq.length() <= max_chain_length) && !target_seq.contains("B") && !target_seq.contains("J") && !target_seq.contains("X") && !target_seq.contains("Z")) {
if (!for_check_duplicate.contains(target_seq.replace("L", "I"))) {
Expand Down Expand Up @@ -259,7 +272,7 @@ private Map<String, Set<String>> buildSeqProMap(Map<String, String> pro_seq_map,
for (String pro_id : pro_seq_map.keySet()) {
String pro_seq = pro_seq_map.get(pro_id);
String decoy_pro_seq = (new StringBuilder(pro_seq)).reverse().toString();
Set<String> decoy_seq_set = mass_tool_obj.buildChainSet(decoy_pro_seq);
Set<String> decoy_seq_set = mass_tool_obj.buildChainSet(decoy_pro_seq, linker_type);
for (String decoy_seq : decoy_seq_set) {
if ((decoy_seq.length() >= min_chain_length) && (decoy_seq.length() <= max_chain_length) && !decoy_seq.contains("B") && !decoy_seq.contains("J") && !decoy_seq.contains("X") && !decoy_seq.contains("Z")) {
if (!for_check_duplicate.contains(decoy_seq.replace("L", "I"))) {
Expand Down Expand Up @@ -546,18 +559,20 @@ private Set<String> checkKCTermMod(Set<String> varSeqSet) { // eliminate those s
}
}

private Set<Short> getLinkSiteSet(String seq, boolean n_term, boolean c_term) {
private Set<Short> getLinkSiteSet(String seq, boolean n_term, boolean c_term, short linker_type) {
AA[] aa_list = MassTool.seqToAAList(seq);
Set<Short> output = new HashSet<>(5, 1);
for (int i = 1; i < aa_list.length - 2; ++i) {
if (aa_list[i].aa == 'K' && (Math.abs(aa_list[i].delta_mass) < varModMassResolution)) {
if (linker_type == 1 && aa_list[i].aa == 'K' && (Math.abs(aa_list[i].delta_mass) < varModMassResolution)) {
output.add((short) i);
} else if (linker_type == 2 && aa_list[i].aa == 'C' && (Math.abs(aa_list[i].delta_mass) < varModMassResolution)) {
output.add((short) i);
}
}
if (n_term && !output.contains((short) 1) && (Math.abs(aa_list[0].delta_mass) < varModMassResolution)) {
if (linker_type == 1 && n_term && !output.contains((short) 1) && (Math.abs(aa_list[0].delta_mass) < varModMassResolution)) {
output.add((short) 0);
}
if (c_term && aa_list[aa_list.length - 2].aa == 'K' && (Math.abs(aa_list[aa_list.length - 2].delta_mass) < varModMassResolution)) {
if (linker_type == 1 && c_term && aa_list[aa_list.length - 2].aa == 'K' && (Math.abs(aa_list[aa_list.length - 2].delta_mass) < varModMassResolution)) {
output.add((short) (aa_list.length - 2));
}
return output;
Expand Down
26 changes: 16 additions & 10 deletions src/main/java/proteomics/TheoSeq/MassTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,29 +71,35 @@ public float calResidueMass(String seq) { // n and c are also AA.
return (float) total_mass;
}

public Set<String> buildChainSet(String pro_seq) {
public Set<String> buildChainSet(String pro_seq, short linker_type) {
Map<Integer, List<int[]>> digest_range_map = digestTrypsin(pro_seq);
Set<String> chain_seq_set = new HashSet<>();

for (int i = 0; i <= missed_cleavage; ++i) {
for (int[] digest_range_1 : digest_range_map.get(i)) {
String sub_string = pro_seq.substring(digest_range_1[0], digest_range_1[1]);
if (sub_string.substring(0, sub_string.length() - 1).contains("K")) {
// If there is a K in middle, this peptide is a chain.
if (linker_type == 1 && sub_string.substring(0, sub_string.length() - 1).contains("K")) {
chain_seq_set.add("n" + sub_string + "c");
} else if (linker_type == 2 && sub_string.substring(0, sub_string.length() - 1).contains("C")) {
chain_seq_set.add("n" + sub_string + "c");
}

if (digest_range_1[1] == pro_seq.length()) {
// This is the end of the protein. No digestion site, so "K" in any position including C-term can be linked.
if (sub_string.contains("K")) {
// This is the end of the protein. No digestion site, so the link-sites in any position including C-term can be linked.
if (linker_type == 1 && sub_string.contains("K")) {
chain_seq_set.add("n" + sub_string + "c");
} else if (linker_type == 2 && sub_string.contains("C")) {
chain_seq_set.add("n" + sub_string + "c");
}
}
}
// Add N-term peptide
if (digest_range_map.get(i).size() > 0) {
int[] digest_range = digest_range_map.get(i).get(0);
String sub_string = pro_seq.substring(digest_range[0], digest_range[1]);
chain_seq_set.add("n" + sub_string + "c");
if (linker_type == 1) {
// Add N-term peptide
if (digest_range_map.get(i).size() > 0) {
int[] digest_range = digest_range_map.get(i).get(0);
String sub_string = pro_seq.substring(digest_range[0], digest_range[1]);
chain_seq_set.add("n" + sub_string + "c");
}
}
}
return chain_seq_set;
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/parameter.def
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ mz_bin_size = 0.02
mz_bin_offset = 0

# Cross-linking parameter.
# Cross-linking site can not be modified.
cl_mass = 138.0680796
cl_type = 1 # 1 = Kn-Kn; 2 = C-C

# Var modification
# format: <mass> <residues> <binary>
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/proteomics/TheoSeq/MassToolTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public void mzToBin() throws Exception {
public void buildChainSet() throws Exception {
// 1 missed-cleavage, N-term linkable
MassTool mass_tool_obj = new MassTool(1, fix_mod_map, "KR", "P", 1.0005f, 0.6f);
Set<String> result = mass_tool_obj.buildChainSet("MRGFASSASRIATAAAASKPSLNASTSVNPKLSKTMDYMRIFSVFVVTLWIIRVDARVFKTY");
Set<String> result = mass_tool_obj.buildChainSet("MRGFASSASRIATAAAASKPSLNASTSVNPKLSKTMDYMRIFSVFVVTLWIIRVDARVFKTY", (short) 1);
Set<String> ground_truth = new HashSet<>();
ground_truth.add("nMRc");
ground_truth.add("nMRGFASSASRc");
Expand All @@ -73,7 +73,7 @@ public void buildChainSet() throws Exception {

// 2 missed-cleavage, N-term linkable
mass_tool_obj = new MassTool(2, fix_mod_map, "KR", "P", 1.0005f, 0.6f);
result = mass_tool_obj.buildChainSet("MRGFASSASRIATAAAASKPSLNASTSVNPKLSKTMDYMRIFSVFVVTLWIIRVDARVFKTY");
result = mass_tool_obj.buildChainSet("MRGFASSASRIATAAAASKPSLNASTSVNPKLSKTMDYMRIFSVFVVTLWIIRVDARVFKTY", (short) 1);
ground_truth = new HashSet<>();
ground_truth.add("nMRc");
ground_truth.add("nIATAAAASKPSLNASTSVNPKc");
Expand Down

0 comments on commit bbf133d

Please sign in to comment.