Skip to content

Commit

Permalink
A BIG improvement in generating the random histogram.
Browse files Browse the repository at this point in the history
  • Loading branch information
fcyu committed Jun 8, 2017
1 parent 8ac5b16 commit c0b0b40
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 55 deletions.
64 changes: 15 additions & 49 deletions src/main/java/proteomics/Search/CalEValue.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,11 @@
import org.slf4j.LoggerFactory;
import proteomics.ECL2;
import proteomics.Index.BuildIndex;
import proteomics.TheoSeq.MassTool;
import proteomics.Types.ChainEntry;
import proteomics.Types.ResultEntry;
import proteomics.Types.SparseBooleanVector;
import proteomics.Types.SparseVector;
import proteomics.Types.*;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

public class CalEValue {
Expand All @@ -24,32 +18,18 @@ public class CalEValue {
private static final float toleranceStep = 1;

private ResultEntry result_entry;
private TreeMap<Integer, Set<String>> bin_seq_map;
private Map<String, ChainEntry> seq_entry_map;
private BuildIndex buildIndexObj;
private float linker_mass;
private MassTool mass_tool_obj;
private int max_common_ion_charge;
private SparseVector pl_map_xcorr;
private int specMaxBinIdx;
private Search search_obj;

CalEValue(int scan_num, ResultEntry result_entry, SparseVector pl_map_xcorr, int specMaxBinIdx, BuildIndex buildIndexObj, MassTool mass_tool_obj, float linker_mass, int max_common_ion_charge, float originalTolerance, Search search_obj) {
CalEValue(int scan_num, ResultEntry result_entry, BuildIndex buildIndexObj, float linker_mass, float originalTolerance) {
this.result_entry = result_entry;
this.bin_seq_map = buildIndexObj.getMassBinSeqMap();
this.seq_entry_map = buildIndexObj.getSeqEntryMap();
this.buildIndexObj = buildIndexObj;
this.linker_mass = linker_mass;
this.mass_tool_obj = mass_tool_obj;
this.max_common_ion_charge = max_common_ion_charge;
this.pl_map_xcorr = pl_map_xcorr;
this.specMaxBinIdx = specMaxBinIdx;
this.search_obj = search_obj;

int gap_num = ECL2.score_point_t - result_entry.getScoreCount();
float tolerance = originalTolerance;
while (gap_num > 0 && tolerance <= maxTolerance) {
gap_num = generateRandomRandomScores(gap_num, tolerance, toleranceStep);
gap_num = generateRandomRandomScores(gap_num, tolerance, toleranceStep, result_entry.getBinChainMap());
tolerance += toleranceStep;
}

Expand Down Expand Up @@ -224,39 +204,25 @@ public class CalEValue {
}
}

private int generateRandomRandomScores(int gap_num, float tolerance, float toleranceStep) {
private int generateRandomRandomScores(int gap_num, float tolerance, float toleranceStep, TreeMap<Integer, ChainResultEntry> binChainMap) {
int maxBinIdx = buildIndexObj.massToBin((result_entry.spectrum_mass - linker_mass) / 2);
for (int binIdx1 : bin_seq_map.keySet()) {
for (int binIdx1 : binChainMap.keySet()) {
if (binIdx1 < maxBinIdx) {
int leftBinIdx1 = buildIndexObj.massToBin(result_entry.spectrum_mass - linker_mass - tolerance - toleranceStep) - maxBinIdx;
int rightBinIdx1 = buildIndexObj.massToBin(result_entry.spectrum_mass - linker_mass - tolerance) - maxBinIdx - 1;
int leftBinIdx2 = buildIndexObj.massToBin(result_entry.spectrum_mass - linker_mass + tolerance) - maxBinIdx + 1;
int rightBinIdx2 = buildIndexObj.massToBin(result_entry.spectrum_mass - linker_mass + tolerance + toleranceStep) - maxBinIdx;
TreeMap<Integer, Set<String>> sub_map = new TreeMap<>();
sub_map.putAll(bin_seq_map.subMap(leftBinIdx1, true, rightBinIdx1, false));
sub_map.putAll(bin_seq_map.subMap(leftBinIdx2, false, rightBinIdx2, true));
TreeMap<Integer, ChainResultEntry> sub_map = new TreeMap<>();
sub_map.putAll(binChainMap.subMap(leftBinIdx1, true, rightBinIdx1, false));
sub_map.putAll(binChainMap.subMap(leftBinIdx2, false, rightBinIdx2, true));
if (!sub_map.isEmpty()) {
for (String seq1 : bin_seq_map.get(binIdx1)) {
ChainEntry chainEntry1 = seq_entry_map.get(seq1);
for (short linkSite1 : chainEntry1.link_site_set) {
SparseBooleanVector theoMz1 = mass_tool_obj.buildTheoVector(seq1, linkSite1, result_entry.spectrum_mass - chainEntry1.chain_mass, result_entry.charge, max_common_ion_charge, specMaxBinIdx);
double score1 = theoMz1.dot(pl_map_xcorr) * 0.005;
if (score1 > search_obj.single_chain_t) {
for (int binIdx2 : sub_map.keySet()) {
for (String seq2 : sub_map.get(binIdx2)) {
ChainEntry chainEntry2 = seq_entry_map.get(seq2);
for (short linkSite2 : chainEntry2.link_site_set) {
SparseBooleanVector theoMz2 = mass_tool_obj.buildTheoVector(seq2, linkSite2, result_entry.spectrum_mass - chainEntry2.chain_mass, result_entry.charge, max_common_ion_charge, specMaxBinIdx);
double score2 = theoMz2.dot(pl_map_xcorr) * 0.005;
if (score2 > search_obj.single_chain_t) {
result_entry.addToScoreHistogram(score1 + score2);
--gap_num;
if (gap_num <= 0) {
return gap_num;
}
}
}
}
for (double score1 : binChainMap.get(binIdx1).getScoreList()) {
for (int binIdx2 : sub_map.keySet()) {
for (double score2 : binChainMap.get(binIdx2).getScoreList()) {
result_entry.addToScoreHistogram(score1 + score2);
--gap_num;
if (gap_num <= 0) {
return gap_num;
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/proteomics/Search/Search.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ public class Search {
private final MassTool mass_tool_obj;
private final TreeMap<Integer, Set<String>> bin_seq_map;
private final BuildIndex build_index_obj;
private int[] C13_correction_range;
private Map<Integer, Long> bin_candidate_num_map;
private final int[] C13_correction_range;
private final Map<Integer, Long> bin_candidate_num_map;
final float single_chain_t;
private final boolean cal_evalue;

Expand Down Expand Up @@ -120,7 +120,7 @@ ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL, int spec
int max_v = build_index_obj.massToBin(max_mass) + 1;

long candidate_num = 0;
ResultEntry resultEntry = new ResultEntry(spectrumEntry.spectrum_id, spectrumEntry.precursor_mz, spectrumEntry.precursor_mass, spectrumEntry.rt, spectrumEntry.precursor_charge, cal_evalue);
ResultEntry resultEntry = new ResultEntry(spectrumEntry.spectrum_id, spectrumEntry.precursor_mz, spectrumEntry.precursor_mass, spectrumEntry.rt, spectrumEntry.precursor_charge, cal_evalue, binChainMap);
for (int idx_1 : binChainMap.keySet()) {
if (idx_1 > max_v) {
break;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/proteomics/Search/SearchWrap.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public FinalResultEntry call() {
} else {
originalTolerance = search_obj.ms1_tolerance;
}
new CalEValue(spectrumEntry.scan_num, resultEntry, xcorrPL, specMaxBinIdx, build_index_obj, mass_tool_obj, build_index_obj.linker_mass, max_common_ion_charge, originalTolerance, search_obj);
new CalEValue(spectrumEntry.scan_num, resultEntry, build_index_obj, build_index_obj.linker_mass, originalTolerance);
if (resultEntry.getEValue() != 9999) {
return search_obj.convertResultEntry(spectrumEntry.scan_num, resultEntry, seqProMap);
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/proteomics/Types/ChainResultEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public class ChainResultEntry implements Comparable<ChainResultEntry>{
private int link_site;
private double score;
private double second_score;
private List<Double> score_list = new LinkedList<>();
private List<Double> score_list = new LinkedList<>(); // contains all chain score for generating the random histogram

public ChainResultEntry() {}

Expand Down
11 changes: 10 additions & 1 deletion src/main/java/proteomics/Types/ResultEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.TreeMap;


public class ResultEntry{

Expand All @@ -16,6 +18,8 @@ public class ResultEntry{
public final float rt;
public final int charge;

private final TreeMap<Integer, ChainResultEntry> binChainMap;

private String chain_seq_1;
private String chain_seq_2;
private double score;
Expand All @@ -38,7 +42,7 @@ public class ResultEntry{
private double chain_score_2;
private int chain_rank_2;

public ResultEntry(String spectrum_id, float spectrum_mz, float spectrum_mass, float rt, int charge, boolean cal_evalue) {
public ResultEntry(String spectrum_id, float spectrum_mz, float spectrum_mass, float rt, int charge, boolean cal_evalue, TreeMap<Integer, ChainResultEntry> binChainMap) {
if (cal_evalue) {
score_histogram = new int[(int) Math.round(max_score / histogram_bin_size) + 1]; // start from zero score.
}
Expand All @@ -47,6 +51,7 @@ public ResultEntry(String spectrum_id, float spectrum_mz, float spectrum_mass, f
this.spectrum_mass = spectrum_mass;
this.rt = rt;
this.charge = charge;
this.binChainMap = binChainMap;
}

public void setChain1(String chain_seq_1) {
Expand Down Expand Up @@ -184,4 +189,8 @@ public int getChainRank2() {
public long getCandidateNum() {
return candidate_num;
}

public TreeMap<Integer, ChainResultEntry> getBinChainMap() {
return binChainMap;
}
}

0 comments on commit c0b0b40

Please sign in to comment.