Skip to content

Commit

Permalink
Only two sequences with the same binary mode type can be linked.
Browse files Browse the repository at this point in the history
  • Loading branch information
fcyu committed May 27, 2017
1 parent 5152090 commit 26962e2
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 363 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>hk.ust.bioinformatics</groupId>
<artifactId>ECL2</artifactId>
<version>2.1.4-dev-201705262001</version>
<version>2.1.4-dev-201705271641</version>
<packaging>jar</packaging>

<name>ECL2</name>
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/proteomics/ECL2.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public class ECL2 {
public static final boolean flankingPeaks = true;

private static final Logger logger = LoggerFactory.getLogger(ECL2.class);
public static final String version = "2.1.4-dev-201705262001";
public static final String version = "2.1.4-dev-201705271641";

public static boolean debug;
public static boolean dev;
Expand Down
55 changes: 28 additions & 27 deletions src/main/java/proteomics/Index/BuildIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
import org.slf4j.LoggerFactory;
import proteomics.TheoSeq.DbTool;
import proteomics.TheoSeq.MassTool;
import proteomics.Types.AA;
import proteomics.Types.BinaryModParam;
import proteomics.Types.ChainEntry;
import proteomics.Types.VarModParam;
import proteomics.Types.*;

import java.util.*;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -140,28 +137,36 @@ public BuildIndex(Map<String, String> parameter_map) {
temp.add(seq);
bin_seq_map.put(bin, temp);
}
ChainEntry chainEntry = new ChainEntry(seq, totalMass, linkSiteSet, proteinNTerm, proteinCTerm);
ChainEntry chainEntry = new ChainEntry(seq, totalMass, linkSiteSet, proteinNTerm, proteinCTerm, "0".hashCode());
seq_entry_map.put(seq, chainEntry);
}
}

// mod containing
Set<String> varSeqSet = generateModSeq(seq, linkSiteSet, varModParamSet, binaryModParamSet, varModMaxNum);
for (String varSeq : varSeqSet) {
linkSiteSet = getLinkSiteSet(varSeq, proteinNTerm, proteinCTerm);
Set<VarSequence> varSeqSet = generateModSeq(seq, linkSiteSet, varModParamSet, binaryModParamSet, varModMaxNum);
for (VarSequence varSeq : varSeqSet) {
linkSiteSet = new HashSet<>();
linkSiteSet.add(varSeq.linkSite);
if (!linkSiteSet.isEmpty()) {
float totalMass = (float) (mass_tool_obj.calResidueMass(varSeq) + MassTool.H2O);
float totalMass = (float) (mass_tool_obj.calResidueMass(varSeq.seq) + MassTool.H2O);
if (totalMass < max_precursor_mass - linker_mass) {
int bin = massToBin(totalMass);
if (bin_seq_map.containsKey(bin)) {
bin_seq_map.get(bin).add(varSeq);
bin_seq_map.get(bin).add(varSeq.seq);
} else {
Set<String> temp = new HashSet<>();
temp.add(varSeq);
temp.add(varSeq.seq);
bin_seq_map.put(bin, temp);
}
ChainEntry chainEntry = new ChainEntry(varSeq, totalMass, linkSiteSet, proteinNTerm, proteinCTerm);
seq_entry_map.put(varSeq, chainEntry);
ChainEntry chainEntry = new ChainEntry(varSeq.seq, totalMass, linkSiteSet, proteinNTerm, proteinCTerm, varSeq.binaryModType);
if (seq_entry_map.containsKey(varSeq.seq)) {
// Binary mod has the higher priority
if (seq_entry_map.get(varSeq.seq).binaryModType == "0".hashCode()) {
seq_entry_map.put(varSeq.seq, chainEntry);
}
} else {
seq_entry_map.put(varSeq.seq, chainEntry);
}
}
}
}
Expand Down Expand Up @@ -292,8 +297,8 @@ private Map<String, Set<String>> buildSeqProMap(Map<String, String> pro_seq_map,
return seq_pro_map;
}

private Set<String> generateModSeq(String seq, Set<Short> modFreeListSites, Set<VarModParam> varModParamSet, Set<BinaryModParam> binaryModParamSet, int varModMaxNum) { // todo: check
Set<String> varSeqSet = new HashSet<>();
private Set<VarSequence> generateModSeq(String seq, Set<Short> modFreeListSites, Set<VarModParam> varModParamSet, Set<BinaryModParam> binaryModParamSet, int varModMaxNum) { // todo: check
Set<VarSequence> varSeqSet = new HashSet<>();
for (short linkSite : modFreeListSites) {
// has binary mod
for (BinaryModParam binaryModParam : binaryModParamSet) {
Expand All @@ -318,7 +323,7 @@ private Set<String> generateModSeq(String seq, Set<Short> modFreeListSites, Set<
sb.append(String.format("[%.2f]", idxBinaryModMassMap.get(i).get(0)));
}
}
varSeqSet.add(sb.toString());
varSeqSet.add(new VarSequence(sb.toString(), linkSite, binaryModParam.hashCode()));

if (idxBinaryModMassMap.size() < varModMaxNum) {
// generate sequences containing the binary mod and additional var mod
Expand Down Expand Up @@ -351,7 +356,7 @@ private Set<String> generateModSeq(String seq, Set<Short> modFreeListSites, Set<
Arrays.sort(allIdxArray);
for (int i = 1; i <= Math.min(varModMaxNum - idxBinaryModMassMap.size(), idxVarModMassMap.size()); ++i) {
List<int[]> idxCombinationList = generateIdxCombinations(allIdxArray, i);
Set<String> varSetSubSet = new HashSet<>();
Set<VarSequence> varSetSubSet = new HashSet<>();
for (int[] idxCombination : idxCombinationList) {
int[] allIdxCombination = new int[idxCombination.length + idxBinaryModMassMap.size()];
int j = 0;
Expand All @@ -363,10 +368,10 @@ private Set<String> generateModSeq(String seq, Set<Short> modFreeListSites, Set<
allIdxCombination[j + k] = idxCombination[k];
}
Arrays.sort(allIdxCombination);
varSetSubSet.addAll(generateModSeqSub(seq, allIdxCombination, idxBinaryVarModMassMap));
varSetSubSet.addAll(generateModSeqSub(seq, allIdxCombination, idxBinaryVarModMassMap, linkSite, binaryModParam.hashCode()));
}
if (!varSetSubSet.isEmpty()) {
varSeqSet.addAll(checkKCTermMod(varSetSubSet)); // eliminate those sequence that the middle amino acids having the same mod mass and the n-term and the first amino acid or the c-term and the last amino acid have the same mod mass.
varSeqSet.addAll(varSetSubSet); // eliminate those sequence that the middle amino acids having the same mod mass and the n-term and the first amino acid or the c-term and the last amino acid have the same mod mass.
}
}
}
Expand Down Expand Up @@ -399,12 +404,8 @@ private Set<String> generateModSeq(String seq, Set<Short> modFreeListSites, Set<
Arrays.sort(allIdxArray);
for (int i = 1; i <= Math.min(varModMaxNum, idxVarModMassMap.size()); ++i) {
List<int[]> idxCombinationList = generateIdxCombinations(allIdxArray, i);
Set<String> varSetSubSet = new HashSet<>();
for (int[] idxCombination : idxCombinationList) {
varSetSubSet.addAll(generateModSeqSub(seq, idxCombination, idxVarModMassMap));
}
if (!varSetSubSet.isEmpty()) {
varSeqSet.addAll(checkKCTermMod(varSetSubSet)); // eliminate those sequence that the middle amino acids having the same mod mass and the n-term and the first amino acid or the c-term and the last amino acid have the same mod mass.
varSeqSet.addAll(generateModSeqSub(seq, idxCombination, idxVarModMassMap, linkSite, "0".hashCode()));
}
}
}
Expand All @@ -429,10 +430,10 @@ private List<int[]> generateIdxCombinations(Integer[] allIdxArray, int num) {
return outputList;
}

private Set<String> generateModSeqSub(String seq, int[] idxCombination, Map<Integer, List<Float>> idxModMassMap) {
private Set<VarSequence> generateModSeqSub(String seq, int[] idxCombination, Map<Integer, List<Float>> idxModMassMap, short linkSite, int binaryModType) {
List<Map<Integer, Float>> localIdxModMassMaps = generateLocalIdxModMassMap(idxCombination, idxModMassMap);

Set<String> outputSet = new HashSet<>();
Set<VarSequence> outputSet = new HashSet<>();
for (Map<Integer, Float> localIdxModMassMap : localIdxModMassMaps) {
StringBuilder sb = new StringBuilder(seq.length() * 10);
for (int i = 0; i < seq.length(); ++i) {
Expand All @@ -441,7 +442,7 @@ private Set<String> generateModSeqSub(String seq, int[] idxCombination, Map<Inte
sb.append(String.format("[%.2f]", localIdxModMassMap.get(i)));
}
}
outputSet.add(sb.toString());
outputSet.add(new VarSequence(sb.toString(), linkSite, binaryModType));
}

return outputSet;
Expand Down
91 changes: 47 additions & 44 deletions src/main/java/proteomics/Search/Search.java
Original file line number Diff line number Diff line change
Expand Up @@ -153,58 +153,61 @@ ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL, int spec
}

ChainResultEntry chain_score_entry_2 = binChainMap.get(idx_2);
double score = 0;
if (chain_score_entry_1.getPtmFreeSeq().contentEquals(chain_score_entry_2.getPtmFreeSeq())) {
score = (chain_score_entry_1.getScore() + chain_score_entry_2.getScore()) / 2;
} else {
score = chain_score_entry_1.getScore() + chain_score_entry_2.getScore();
}

// calculate second score
double second_score = 0;
double temp_1 = -1;
if (chain_score_entry_1.getSecondSeq() != null) {
if (chain_score_entry_1.getSecondPtmFreeSeq().contentEquals(chain_score_entry_2.getPtmFreeSeq())) {
temp_1 = (chain_score_entry_1.getSecondScore() + chain_score_entry_2.getScore()) / 2;
} else {
temp_1 = chain_score_entry_1.getSecondScore() + chain_score_entry_2.getScore();
}
}
double temp_2 = -1;
if (chain_score_entry_2.getSecondSeq() != null) {
if (chain_score_entry_1.getPtmFreeSeq().contentEquals(chain_score_entry_2.getSecondPtmFreeSeq())) {
temp_2 = (chain_score_entry_1.getScore() + chain_score_entry_2.getSecondScore()) / 2;
// only two sequences with the same binary mod type can be linked.
if (chain_entry_map.get(chain_score_entry_1.getSeq()).binaryModType == chain_entry_map.get(chain_score_entry_2.getSeq()).binaryModType) {
double score;
if (chain_score_entry_1.getPtmFreeSeq().contentEquals(chain_score_entry_2.getPtmFreeSeq())) {
score = (chain_score_entry_1.getScore() + chain_score_entry_2.getScore()) / 2;
} else {
temp_2 = chain_score_entry_1.getScore() + chain_score_entry_2.getSecondScore();
score = chain_score_entry_1.getScore() + chain_score_entry_2.getScore();
}
}

if (temp_1 > 0) {
if (temp_1 >= temp_2) {
second_score = temp_1;
// calculate second score
double second_score = 0;
double temp_1 = -1;
if (chain_score_entry_1.getSecondSeq() != null) {
if (chain_score_entry_1.getSecondPtmFreeSeq().contentEquals(chain_score_entry_2.getPtmFreeSeq())) {
temp_1 = (chain_score_entry_1.getSecondScore() + chain_score_entry_2.getScore()) / 2;
} else {
temp_1 = chain_score_entry_1.getSecondScore() + chain_score_entry_2.getScore();
}
}
} else if (temp_2 > 0) {
if (temp_2 > temp_1) {
second_score = temp_2;
double temp_2 = -1;
if (chain_score_entry_2.getSecondSeq() != null) {
if (chain_score_entry_1.getPtmFreeSeq().contentEquals(chain_score_entry_2.getSecondPtmFreeSeq())) {
temp_2 = (chain_score_entry_1.getScore() + chain_score_entry_2.getSecondScore()) / 2;
} else {
temp_2 = chain_score_entry_1.getScore() + chain_score_entry_2.getSecondScore();
}
}
}

if (cal_evalue && (resultEntry.getScoreCount() < ECL2.score_point_t)) {
for (double s1 : chain_score_entry_1.getScoreList()) {
for (double s2 : chain_score_entry_2.getScoreList()) {
resultEntry.addToScoreHistogram(s1 + s2);
if (temp_1 > 0) {
if (temp_1 >= temp_2) {
second_score = temp_1;
}
} else if (temp_2 > 0) {
if (temp_2 > temp_1) {
second_score = temp_2;
}
}
}
if (score > resultEntry.getScore()) {
resultEntry.setSecondScore(Math.max(resultEntry.getScore(), second_score));
resultEntry.setScore(score);
resultEntry.setChain1(chain_score_entry_1.getSeq());
resultEntry.setChain2(chain_score_entry_2.getSeq());
resultEntry.setLinkSite1(chain_score_entry_1.getLinkSite());
resultEntry.setLinkSite2(chain_score_entry_2.getLinkSite());
} else if (Math.max(score, second_score) > resultEntry.getSecondScore()) {
resultEntry.setSecondScore(Math.max(score, second_score));

if (cal_evalue && (resultEntry.getScoreCount() < ECL2.score_point_t)) {
for (double s1 : chain_score_entry_1.getScoreList()) {
for (double s2 : chain_score_entry_2.getScoreList()) {
resultEntry.addToScoreHistogram(s1 + s2);
}
}
}
if (score > resultEntry.getScore()) {
resultEntry.setSecondScore(Math.max(resultEntry.getScore(), second_score));
resultEntry.setScore(score);
resultEntry.setChain1(chain_score_entry_1.getSeq());
resultEntry.setChain2(chain_score_entry_2.getSeq());
resultEntry.setLinkSite1(chain_score_entry_1.getLinkSite());
resultEntry.setLinkSite2(chain_score_entry_2.getLinkSite());
} else if (Math.max(score, second_score) > resultEntry.getSecondScore()) {
resultEntry.setSecondScore(Math.max(score, second_score));
}
}
}
}
Expand Down
10 changes: 9 additions & 1 deletion src/main/java/proteomics/Types/ChainEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@ public class ChainEntry {
public final Set<Short> link_site_set;
public final boolean n_term;
public final boolean c_term;
public final int binaryModType;
private final String toString;

public ChainEntry(String seq, float chain_mass, Set<Short> link_site_set, boolean n_term, boolean c_term) {
public ChainEntry(String seq, float chain_mass, Set<Short> link_site_set, boolean n_term, boolean c_term, int binaryModType) {
this.seq = seq;
this.chain_mass = chain_mass;
this.link_site_set = link_site_set;
this.n_term = n_term;
this.c_term = c_term;
this.binaryModType = binaryModType;
toString = seq + "-" + binaryModType;
}

@Override
Expand All @@ -27,4 +31,8 @@ public boolean equals(Object other) {
return false;
}
}

public String toString() {
return toString;
}
}
34 changes: 34 additions & 0 deletions src/main/java/proteomics/Types/VarSequence.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package proteomics.Types;


public class VarSequence {

public final String seq;
public final short linkSite;
public final int binaryModType;
private final String toString;

public VarSequence(String seq, short linkSite, int binaryModType) {
this.seq = seq;
this.linkSite = linkSite;
this.binaryModType = binaryModType;
toString = seq + "-" + linkSite + "-" + binaryModType;
}

public boolean equals(Object other) {
if (other instanceof VarSequence) {
VarSequence temp = (VarSequence) other;
return temp.seq.contentEquals(seq) && (temp.linkSite == linkSite) && (temp.binaryModType == binaryModType);
} else {
return false;
}
}

public String toString() {
return toString;
}

public int hashCode() {
return toString.hashCode();
}
}
2 changes: 1 addition & 1 deletion src/main/resources/parameter.def
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# 2.1.4-dev-201705262001
# 2.1.4-dev-201705271641
# The first line is the parameter file version. Do not change it.
thread_num = 0
debug = 0
Expand Down
Loading

0 comments on commit 26962e2

Please sign in to comment.