Skip to content

Commit

Permalink
Fix a serious bug in CalEValue and improve CalEValue a lot.
Browse files Browse the repository at this point in the history
  • Loading branch information
fcyu committed Jan 31, 2018
1 parent ff49e7e commit 418d314
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 55 deletions.
68 changes: 40 additions & 28 deletions src/main/java/proteomics/Search/CalEValue.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,27 @@
import org.slf4j.LoggerFactory;
import proteomics.ECL2;
import proteomics.Index.BuildIndex;
import proteomics.TheoSeq.MassTool;
import proteomics.Types.*;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Locale;
import java.util.TreeMap;
import java.util.*;

public class CalEValue {

private static final Logger logger = LoggerFactory.getLogger(CalEValue.class);
private static final float maxTolerance = 20;
private static final float toleranceStep = 1;

private ResultEntry result_entry;
private BuildIndex buildIndexObj;
private float linker_mass;

CalEValue(int scan_num, ResultEntry result_entry, BuildIndex buildIndexObj, float linker_mass, float originalTolerance) throws IOException {
this.result_entry = result_entry;
this.buildIndexObj = buildIndexObj;
this.linker_mass = linker_mass;

static void calEValue(int scan_num, ResultEntry result_entry, BuildIndex buildIndexObj, TreeMap<Integer, List<Double>> binScoresMap, float linker_mass, float originalTolerance, SparseVector xcorrPL, double singleChainT) throws IOException {
int gap_num = ECL2.score_point_t - result_entry.getScoreCount();
float tolerance = originalTolerance;
float massWithoutLinker = result_entry.spectrum_mass - linker_mass;
int maxBinIdx = buildIndexObj.massToBin(massWithoutLinker * 0.5f);
while (gap_num > 0 && tolerance <= maxTolerance) {
gap_num = generateRandomRandomScores(gap_num, tolerance, toleranceStep, result_entry.getBinChainMap());
gap_num = generateRandomRandomScores(gap_num, tolerance, toleranceStep, binScoresMap, result_entry.spectrum_mass, massWithoutLinker, result_entry.charge, xcorrPL, buildIndexObj, buildIndexObj.getMassBinSeqMap(), buildIndexObj.getSeqEntryMap(), buildIndexObj.returnMassTool(), result_entry, maxBinIdx, singleChainT);
tolerance += toleranceStep;
}

Expand Down Expand Up @@ -102,7 +96,7 @@ public class CalEValue {
if (null_end_idx > 3 * inverseHistogramBinSize) {
start_idx = Math.max(min_nonzero_idx, (int) (0.75 * null_end_idx));
} else {
start_idx = Math.max(min_nonzero_idx, (int) ( 0.5 * null_end_idx));
start_idx = Math.max(min_nonzero_idx, (int) (0.5 * null_end_idx));
}

// linear regression
Expand Down Expand Up @@ -195,22 +189,21 @@ public class CalEValue {
}
}

private int generateRandomRandomScores(int gap_num, float tolerance, float toleranceStep, TreeMap<Integer, ChainResultEntry> binChainMap) {
int maxBinIdx = buildIndexObj.massToBin((result_entry.spectrum_mass - linker_mass) * 0.5f);
for (int binIdx1 : binChainMap.keySet()) {
private static int generateRandomRandomScores(int gap_num, float tolerance, float toleranceStep, TreeMap<Integer, List<Double>> binScoresMap, float precursorMass, float massWithoutLinker, int precursorCharge, SparseVector xcorrPL, BuildIndex buildIndex, TreeMap<Integer, Set<String>> binSequencesMap, Map<String, ChainEntry> seqEntryMap, MassTool massTool, ResultEntry resultEntry, int maxBinIdx, double singleChainT) {
for (int binIdx1 : binSequencesMap.keySet()) {
if (binIdx1 < maxBinIdx) {
int leftBinIdx1 = buildIndexObj.massToBin(result_entry.spectrum_mass - linker_mass - tolerance - toleranceStep) - maxBinIdx;
int rightBinIdx1 = buildIndexObj.massToBin(result_entry.spectrum_mass - linker_mass - tolerance) - maxBinIdx - 1;
int leftBinIdx2 = buildIndexObj.massToBin(result_entry.spectrum_mass - linker_mass + tolerance) - maxBinIdx + 1;
int rightBinIdx2 = buildIndexObj.massToBin(result_entry.spectrum_mass - linker_mass + tolerance + toleranceStep) - maxBinIdx;
TreeMap<Integer, ChainResultEntry> sub_map = new TreeMap<>();
sub_map.putAll(binChainMap.subMap(leftBinIdx1, true, rightBinIdx1, false));
sub_map.putAll(binChainMap.subMap(leftBinIdx2, false, rightBinIdx2, true));
if (!sub_map.isEmpty()) {
for (double score1 : binChainMap.get(binIdx1).getScoreList()) {
for (int binIdx2 : sub_map.keySet()) {
for (double score2 : binChainMap.get(binIdx2).getScoreList()) {
result_entry.addToScoreHistogram(score1 + score2);
int leftBinIdx1 = buildIndex.massToBin(massWithoutLinker - tolerance - toleranceStep) - binIdx1;
int rightBinIdx1 = buildIndex.massToBin(massWithoutLinker - tolerance) - binIdx1;
int leftBinIdx2 = buildIndex.massToBin(massWithoutLinker + tolerance) - binIdx1;
int rightBinIdx2 = buildIndex.massToBin(massWithoutLinker + tolerance + toleranceStep) - binIdx1;
TreeMap<Integer, Set<String>> subMap = new TreeMap<>();
subMap.putAll(binSequencesMap.subMap(leftBinIdx1, true, rightBinIdx1, false));
subMap.putAll(binSequencesMap.subMap(leftBinIdx2, false, rightBinIdx2, true));
if (!subMap.isEmpty()) {
for (double score1 : subFunction(binIdx1, binScoresMap, binSequencesMap, seqEntryMap, massTool, precursorMass, precursorCharge, xcorrPL, singleChainT)) {
for (int binIdx2 : subMap.keySet()) {
for (double score2 : subFunction(binIdx2, binScoresMap, binSequencesMap, seqEntryMap, massTool, precursorMass, precursorCharge, xcorrPL, singleChainT)) {
resultEntry.addToScoreHistogram(score1 + score2);
--gap_num;
if (gap_num <= 0) {
return gap_num;
Expand All @@ -223,4 +216,23 @@ private int generateRandomRandomScores(int gap_num, float tolerance, float toler
}
return gap_num;
}

private static List<Double> subFunction(int binIdx, TreeMap<Integer, List<Double>> binScoresMap, TreeMap<Integer, Set<String>> binSequencesMap, Map<String, ChainEntry> seqEntryMap, MassTool massTool, float precursorMass, int precursorCharge, SparseVector xcorrPL, double singleChainT) {
List<Double> scoreList = new ArrayList<>();
if (binScoresMap.containsKey(binIdx)) {
scoreList = binScoresMap.get(binIdx);
} else {
for (String seq : binSequencesMap.get(binIdx)) {
ChainEntry chainEntry = seqEntryMap.get(seq);
for (short linkSite : chainEntry.link_site_set) {
double xcorr = massTool.generateTheoFragmentAndCalXCorr(seq, linkSite, precursorMass - chainEntry.chain_mass, precursorCharge, xcorrPL);
if (xcorr > singleChainT) {
scoreList.add(xcorr);
}
}
}
binScoresMap.put(binIdx, scoreList);
}
return scoreList;
}
}
25 changes: 16 additions & 9 deletions src/main/java/proteomics/Search/Search.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public class Search {
private final TreeMap<Integer, Set<String>> bin_seq_map;
private final BuildIndex build_index_obj;
private final int[] C13_correction_range;
private final float single_chain_t;
final float single_chain_t;
private final boolean cal_evalue;

/////////////////////////////////////////public methods////////////////////////////////////////////////////////////
Expand All @@ -47,7 +47,7 @@ public Search(BuildIndex build_index_obj, Map<String, String> parameter_map) {
Arrays.sort(C13_correction_range);
}

ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL) throws IOException {
ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL, TreeMap<Integer, List<Double>> binScoresMap) throws IOException {
int max_chain_bin_idx = Math.min(build_index_obj.massToBin(spectrumEntry.mass_without_linker_mass + C13_correction_range[C13_correction_range.length - 1] * 1.00335483f) + 1 - bin_seq_map.firstKey(), bin_seq_map.lastKey());
int min_chain_bin_idx = bin_seq_map.firstKey();

Expand Down Expand Up @@ -98,7 +98,7 @@ ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL) throws I
// This bin hasn't been visited. Linear scan first.
for (String seq : bin_seq_map.get(idx_1)) {
ChainEntry chainEntry = chain_entry_map.get(seq);
linearScan(spectrumEntry, xcorrPL, chainEntry, idx_1, binChainMap, debugEntryList, devChainScoreMap);
linearScan(spectrumEntry, xcorrPL, chainEntry, idx_1, binChainMap, binScoresMap, debugEntryList, devChainScoreMap);
}
checkedBinSet.add(idx_1);
}
Expand All @@ -110,7 +110,7 @@ ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL) throws I
// this bin hasn't been visited. Linear scan first.
for (String seq : bin_seq_map.get(idx_2)) {
ChainEntry chainEntry = chain_entry_map.get(seq);
linearScan(spectrumEntry, xcorrPL, chainEntry, idx_2, binChainMap, debugEntryList, devChainScoreMap);
linearScan(spectrumEntry, xcorrPL, chainEntry, idx_2, binChainMap, binScoresMap, debugEntryList, devChainScoreMap);
}
checkedBinSet.add(idx_2);
}
Expand Down Expand Up @@ -145,8 +145,8 @@ ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL) throws I
}

if (cal_evalue && (resultEntry.getScoreCount() < ECL2.score_point_t)) {
for (double s1 : chain_score_entry_1.getScoreList()) {
for (double s2 : chain_score_entry_2.getScoreList()) {
for (double s1 : binScoresMap.get(idx_1)) {
for (double s2 : binScoresMap.get(idx_2)) {
resultEntry.addToScoreHistogram(s1 + s2);
}
}
Expand Down Expand Up @@ -273,7 +273,7 @@ public FinalResultEntry convertResultEntry(int scanNum, ResultEntry result_entry
return new FinalResultEntry(scanNum, result_entry.spectrum_id, rank, result_entry.charge, result_entry.spectrum_mz, result_entry.spectrum_mass, theo_mass, result_entry.rt, ppm, result_entry.getScore(), delta_c, final_seq_1, result_entry.getLinkSite1(), String.join(";", pro1Set), final_seq_2, result_entry.getLinkSite2(), String.join(";", pro2Set), cl_type, hit_type, C13_Diff_num, result_entry.getEValue(), result_entry.getScoreCount(), result_entry.getRSquare(), result_entry.getSlope(), result_entry.getIntercept(), result_entry.getStartIdx(), result_entry.getEndIdx(), result_entry.getChainScore1(), result_entry.getChainRank1(), result_entry.getChainScore2(), result_entry.getChainRank2(), result_entry.getCandidateNum(), cal_evalue, spectrumEntry.mgfTitle);
}

private void linearScan(SpectrumEntry spectrumEntry, SparseVector xcorrPL, ChainEntry chainEntry, int binInx, TreeMap<Integer, ChainResultEntry> binChainMap, List<DebugEntry> debugEntryList, Map<String, Double> devChainScoreMap) {
private void linearScan(SpectrumEntry spectrumEntry, SparseVector xcorrPL, ChainEntry chainEntry, int binInx, TreeMap<Integer, ChainResultEntry> binChainMap, TreeMap<Integer, List<Double>> binScoresMap, List<DebugEntry> debugEntryList, Map<String, Double> devChainScoreMap) {
for (short link_site_1 : chainEntry.link_site_set) {
// generate theoretical fragment ion bins and calculate XCorr.
double dot_product = mass_tool_obj.generateTheoFragmentAndCalXCorr(chainEntry.seq, link_site_1, spectrumEntry.precursor_mass - chainEntry.chain_mass, spectrumEntry.precursor_charge, xcorrPL);
Expand All @@ -288,9 +288,17 @@ private void linearScan(SpectrumEntry spectrumEntry, SparseVector xcorrPL, Chain

// Record result
if (dot_product > single_chain_t) {
if (cal_evalue) {
if (binScoresMap.containsKey(binInx)) {
binScoresMap.get(binInx).add(dot_product);
} else {
List<Double> tempList = new ArrayList<>();
tempList.add(dot_product);
binScoresMap.put(binInx, tempList);
}
}
if (binChainMap.containsKey(binInx)) {
ChainResultEntry chain_result_entry = binChainMap.get(binInx);
chain_result_entry.addToScoreList(dot_product, cal_evalue);
if (dot_product > chain_result_entry.getScore()) {
chain_result_entry.setSecondScore(chain_result_entry.getScore());
chain_result_entry.setSecondSeq(chain_result_entry.getSeq());
Expand All @@ -303,7 +311,6 @@ private void linearScan(SpectrumEntry spectrumEntry, SparseVector xcorrPL, Chain
}
} else {
ChainResultEntry chain_result_entry = new ChainResultEntry();
chain_result_entry.addToScoreList(dot_product, cal_evalue);
chain_result_entry.setSeq(chainEntry.seq);
chain_result_entry.setLinkSite(link_site_1);
chain_result_entry.setScore(dot_product);
Expand Down
7 changes: 5 additions & 2 deletions src/main/java/proteomics/Search/SearchWrap.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.Callable;

public class SearchWrap implements Callable<FinalResultEntry> {
Expand Down Expand Up @@ -44,7 +46,8 @@ public FinalResultEntry call() throws IOException {
}
writer.close();
}
ResultEntry resultEntry = search_obj.doSearch(spectrumEntry, xcorrPL);
TreeMap<Integer, List<Double>> binScoresMap = new TreeMap<>();
ResultEntry resultEntry = search_obj.doSearch(spectrumEntry, xcorrPL, binScoresMap);
if (resultEntry != null) {
if (1 - (resultEntry.getSecondScore() / resultEntry.getScore()) >= delta_c_t) {
if (cal_evalue) {
Expand All @@ -54,7 +57,7 @@ public FinalResultEntry call() throws IOException {
} else {
originalTolerance = search_obj.ms1_tolerance;
}
new CalEValue(spectrumEntry.scan_num, resultEntry, build_index_obj, build_index_obj.linker_mass, originalTolerance);
CalEValue.calEValue(spectrumEntry.scan_num, resultEntry, build_index_obj, binScoresMap, build_index_obj.linker_mass, originalTolerance, xcorrPL, search_obj.single_chain_t);
if (resultEntry.getEValue() != 9999) {
return search_obj.convertResultEntry(spectrumEntry.scan_num, resultEntry, seqProMap, spectrumEntry);
} else {
Expand Down
14 changes: 0 additions & 14 deletions src/main/java/proteomics/Types/ChainResultEntry.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
package proteomics.Types;

import java.util.LinkedList;
import java.util.List;

public class ChainResultEntry implements Comparable<ChainResultEntry>{

private String seq;
Expand All @@ -12,7 +9,6 @@ public class ChainResultEntry implements Comparable<ChainResultEntry>{
private int link_site;
private double score;
private double second_score;
private List<Double> score_list = new LinkedList<>(); // contains all chain score for generating the random histogram

public ChainResultEntry() {}

Expand All @@ -26,12 +22,6 @@ public void setSecondSeq(String second_seq) {
secondPtmFreeSeq = second_seq.replaceAll("[^A-Znc]", "");
}

public void addToScoreList(double score, boolean cal_evalue) {
if (cal_evalue) {
score_list.add(score);
}
}

public void setLinkSite(int link_site) {
this.link_site = link_site;
}
Expand Down Expand Up @@ -81,8 +71,4 @@ public int compareTo(ChainResultEntry other) {
return 0;
}
}

public List<Double> getScoreList() {
return score_list;
}
}
6 changes: 4 additions & 2 deletions src/main/java/proteomics/Types/ResultEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,10 @@ public void setCandidateNum(long candidate_num) {

public void addToScoreHistogram(double score) {
try {
++score_histogram[(int) Math.round(score * inverseHistogramBinSize)];
++score_count;
if (score > 0) {
++score_histogram[(int) Math.round(score * inverseHistogramBinSize)];
++score_count;
}
} catch (ArrayIndexOutOfBoundsException ex) {
logger.warn("Score {} is out of the range [0, {}].", score, max_score);
}
Expand Down

0 comments on commit 418d314

Please sign in to comment.