diff --git a/src/main/java/proteomics/ECL2.java b/src/main/java/proteomics/ECL2.java index b238b60..141e08f 100644 --- a/src/main/java/proteomics/ECL2.java +++ b/src/main/java/proteomics/ECL2.java @@ -20,7 +20,6 @@ public class ECL2 { - public static final boolean cal_evalue = true; public static final double delta_c_t = 0; public static final int score_point_t = 15000; public static final boolean flankingPeaks = true; @@ -50,6 +49,10 @@ public static void main(String[] args) { Parameter parameter = new Parameter(parameter_path); Map parameter_map = parameter.returnParameterMap(); int max_common_ion_charge = Integer.valueOf(parameter_map.get("max_common_ion_charge")); + boolean cal_evalue = true; + if (parameter_map.containsKey("cal_evalue") && parameter_map.get("cal_evalue").trim().contentEquals("0")) { + cal_evalue = false; + } debug = parameter_map.get("debug").contentEquals("1"); dev = parameter_map.get("dev").contentEquals("1"); @@ -109,7 +112,7 @@ public static void main(String[] args) { Search search_obj = new Search(build_index_obj, parameter_map); List> temp_result_list = new LinkedList<>(); for (int scanNum : scanNumArray) { - temp_result_list.add(thread_pool.submit(new SearchWrap(search_obj, num_spectrum_map.get(scanNum), build_index_obj, mass_tool_obj, max_common_ion_charge, build_index_obj.getSeqProMap()))); + temp_result_list.add(thread_pool.submit(new SearchWrap(search_obj, num_spectrum_map.get(scanNum), build_index_obj, mass_tool_obj, max_common_ion_charge, build_index_obj.getSeqProMap(), cal_evalue))); } // check progress every minute @@ -175,22 +178,22 @@ public static void main(String[] args) { // save result logger.info("Estimating q-value..."); List> picked_result = pickResult(final_search_results); - CalFDR cal_fdr_obj = new CalFDR(picked_result.get(0)); - List intra_result = cal_fdr_obj.includeStats(); + CalFDR cal_fdr_obj = new CalFDR(picked_result.get(0), cal_evalue); + List intra_result = cal_fdr_obj.includeStats(cal_evalue); intra_result.sort(Collections.reverseOrder()); - cal_fdr_obj = new CalFDR(picked_result.get(1)); - List inter_result = cal_fdr_obj.includeStats(); + cal_fdr_obj = new CalFDR(picked_result.get(1), cal_evalue); + List inter_result = cal_fdr_obj.includeStats(cal_evalue); inter_result.sort(Collections.reverseOrder()); logger.info("Saving results..."); - saveTargetResult(intra_result, build_index_obj.getProAnnotateMap(), spectra_path, true); - saveTargetResult(inter_result, build_index_obj.getProAnnotateMap(), spectra_path, false); - saveDecoyResult(intra_result, build_index_obj.getProAnnotateMap(), spectra_path, true); - saveDecoyResult(inter_result, build_index_obj.getProAnnotateMap(), spectra_path, false); + saveTargetResult(intra_result, build_index_obj.getProAnnotateMap(), spectra_path, true, cal_evalue); + saveTargetResult(inter_result, build_index_obj.getProAnnotateMap(), spectra_path, false, cal_evalue); + saveDecoyResult(intra_result, build_index_obj.getProAnnotateMap(), spectra_path, true, cal_evalue); + saveDecoyResult(inter_result, build_index_obj.getProAnnotateMap(), spectra_path, false, cal_evalue); } logger.info("Done."); } - private static void saveTargetResult(List result, Map pro_annotate_map, String id_file_name, boolean is_intra) { + private static void saveTargetResult(List result, Map pro_annotate_map, String id_file_name, boolean is_intra, boolean cal_evalue) { try { BufferedWriter writer; if (is_intra) { @@ -234,7 +237,7 @@ private static void saveTargetResult(List result, Map result, Map pro_annotate_map, String id_file_name, boolean is_intra) { + private static void saveDecoyResult(List result, Map pro_annotate_map, String id_file_name, boolean is_intra, boolean cal_evalue) { try { BufferedWriter writer; if (is_intra) { diff --git a/src/main/java/proteomics/Search/Search.java b/src/main/java/proteomics/Search/Search.java index 46aca13..e489b8c 100644 --- a/src/main/java/proteomics/Search/Search.java +++ b/src/main/java/proteomics/Search/Search.java @@ -27,6 +27,7 @@ public class Search { private int[] C13_correction_range; private Map bin_candidate_num_map; final float single_chain_t; + private final boolean cal_evalue; /////////////////////////////////////////public methods//////////////////////////////////////////////////////////// public Search(BuildIndex build_index_obj, Map parameter_map) { @@ -39,12 +40,19 @@ public Search(BuildIndex build_index_obj, Map parameter_map) { ms1_tolerance = Float.valueOf(parameter_map.get("ms1_tolerance")); bin_seq_map = build_index_obj.getMassBinSeqMap(); bin_candidate_num_map = build_index_obj.getBinCandidateNumMap(); + if (parameter_map.containsKey("single_chain_t")) { single_chain_t = Float.valueOf(parameter_map.get("single_chain_t")); } else { single_chain_t = 0.1f; } + if (parameter_map.containsKey("cal_evalue") && parameter_map.get("cal_evalue").trim().contentEquals("0")) { + cal_evalue = false; + } else { + cal_evalue = true; + } + String[] temp = parameter_map.get("C13_correction_range").split(","); C13_correction_range = new int[temp.length]; for (int i = 0; i < temp.length; ++i) { @@ -112,7 +120,7 @@ ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL, int spec int max_v = build_index_obj.massToBin(max_mass) + 1; long candidate_num = 0; - ResultEntry resultEntry = new ResultEntry(spectrumEntry.spectrum_id, spectrumEntry.precursor_mz, spectrumEntry.precursor_mass, spectrumEntry.rt, spectrumEntry.precursor_charge); + ResultEntry resultEntry = new ResultEntry(spectrumEntry.spectrum_id, spectrumEntry.precursor_mz, spectrumEntry.precursor_mass, spectrumEntry.rt, spectrumEntry.precursor_charge, cal_evalue); for (int idx_1 : binChainMap.keySet()) { if (idx_1 > max_v) { break; @@ -181,7 +189,7 @@ ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL, int spec } } - if (ECL2.cal_evalue && (resultEntry.getScoreCount() < ECL2.score_point_t)) { + if (cal_evalue && (resultEntry.getScoreCount() < ECL2.score_point_t)) { for (double s1 : chain_score_entry_1.getScoreList()) { for (double s2 : chain_score_entry_2.getScoreList()) { resultEntry.addToScoreHistogram(s1 + s2); @@ -285,7 +293,7 @@ public FinalResultEntry convertResultEntry(int scanNum, ResultEntry result_entry String final_seq_1 = addFixMod(chain_seq_1, result_entry.getLinkSite1()); String final_seq_2 = addFixMod(chain_seq_2, result_entry.getLinkSite2()); - return new FinalResultEntry(scanNum, result_entry.spectrum_id, rank, result_entry.charge, result_entry.spectrum_mz, result_entry.spectrum_mass, theo_mass, result_entry.rt, ppm, result_entry.getScore(), delta_c, final_seq_1, result_entry.getLinkSite1(), pro_1, final_seq_2, result_entry.getLinkSite2(), pro_2, cl_type, hit_type, C13_Diff_num, result_entry.getEValue(), result_entry.getScoreCount(), result_entry.getRSquare(), result_entry.getSlope(), result_entry.getIntercept(), result_entry.getStartIdx(), result_entry.getEndIdx(), result_entry.getChainScore1(), result_entry.getChainRank1(), result_entry.getChainScore2(), result_entry.getChainRank2(), result_entry.getCandidateNum()); + return new FinalResultEntry(scanNum, result_entry.spectrum_id, rank, result_entry.charge, result_entry.spectrum_mz, result_entry.spectrum_mass, theo_mass, result_entry.rt, ppm, result_entry.getScore(), delta_c, final_seq_1, result_entry.getLinkSite1(), pro_1, final_seq_2, result_entry.getLinkSite2(), pro_2, cl_type, hit_type, C13_Diff_num, result_entry.getEValue(), result_entry.getScoreCount(), result_entry.getRSquare(), result_entry.getSlope(), result_entry.getIntercept(), result_entry.getStartIdx(), result_entry.getEndIdx(), result_entry.getChainScore1(), result_entry.getChainRank1(), result_entry.getChainScore2(), result_entry.getChainRank2(), result_entry.getCandidateNum(), cal_evalue); } private void linearScan(SpectrumEntry spectrumEntry, SparseVector xcorrPL, int specMaxBinIdx, ChainEntry chainEntry, int binInx, TreeMap binChainMap, List debugEntryList, Map devChainScoreMap) { @@ -308,7 +316,7 @@ private void linearScan(SpectrumEntry spectrumEntry, SparseVector xcorrPL, int s if (dot_product > single_chain_t) { if (binChainMap.containsKey(binInx)) { ChainResultEntry chain_result_entry = binChainMap.get(binInx); - chain_result_entry.addToScoreList(dot_product); + chain_result_entry.addToScoreList(dot_product, cal_evalue); if (dot_product > chain_result_entry.getScore()) { chain_result_entry.setSecondScore(chain_result_entry.getScore()); chain_result_entry.setSecondSeq(chain_result_entry.getSeq()); @@ -321,7 +329,7 @@ private void linearScan(SpectrumEntry spectrumEntry, SparseVector xcorrPL, int s } } else { ChainResultEntry chain_result_entry = new ChainResultEntry(); - chain_result_entry.addToScoreList(dot_product); + chain_result_entry.addToScoreList(dot_product, cal_evalue); chain_result_entry.setSeq(chainEntry.seq); chain_result_entry.setLinkSite(link_site_1); chain_result_entry.setScore(dot_product); diff --git a/src/main/java/proteomics/Search/SearchWrap.java b/src/main/java/proteomics/Search/SearchWrap.java index d2e599b..a23e582 100644 --- a/src/main/java/proteomics/Search/SearchWrap.java +++ b/src/main/java/proteomics/Search/SearchWrap.java @@ -26,8 +26,9 @@ public class SearchWrap implements Callable { private final int max_common_ion_charge; private final PreSpectrum preSpectrumObj; private final Map> seqProMap; + private final boolean cal_evalue; - public SearchWrap(Search search_obj, SpectrumEntry spectrumEntry, BuildIndex build_index_obj, MassTool mass_tool_obj, int max_common_ion_charge, Map> seqProMap) { + public SearchWrap(Search search_obj, SpectrumEntry spectrumEntry, BuildIndex build_index_obj, MassTool mass_tool_obj, int max_common_ion_charge, Map> seqProMap, boolean cal_evalue) { this.search_obj = search_obj; this.spectrumEntry = spectrumEntry; this.build_index_obj = build_index_obj; @@ -35,6 +36,7 @@ public SearchWrap(Search search_obj, SpectrumEntry spectrumEntry, BuildIndex bui this.max_common_ion_charge = max_common_ion_charge; preSpectrumObj = new PreSpectrum(mass_tool_obj); this.seqProMap = seqProMap; + this.cal_evalue = cal_evalue; } @Override @@ -74,7 +76,7 @@ public FinalResultEntry call() { } } if (1 - (resultEntry.getSecondScore() / resultEntry.getScore()) > ECL2.delta_c_t) { - if (ECL2.cal_evalue) { + if (cal_evalue) { float originalTolerance; if (search_obj.ms1_tolerance_unit == 1) { originalTolerance = resultEntry.spectrum_mass * search_obj.ms1_tolerance * 1e-6f; diff --git a/src/main/java/proteomics/Types/ChainResultEntry.java b/src/main/java/proteomics/Types/ChainResultEntry.java index 5556314..e7d3034 100644 --- a/src/main/java/proteomics/Types/ChainResultEntry.java +++ b/src/main/java/proteomics/Types/ChainResultEntry.java @@ -1,7 +1,5 @@ package proteomics.Types; -import proteomics.ECL2; - import java.util.LinkedList; import java.util.List; @@ -28,8 +26,8 @@ public void setSecondSeq(String second_seq) { secondPtmFreeSeq = second_seq.replaceAll("[^A-Znc]", ""); } - public void addToScoreList(double score) { - if (ECL2.cal_evalue) { + public void addToScoreList(double score, boolean cal_evalue) { + if (cal_evalue) { score_list.add(score); } } diff --git a/src/main/java/proteomics/Types/FinalResultEntry.java b/src/main/java/proteomics/Types/FinalResultEntry.java index 22302ba..b9fc863 100644 --- a/src/main/java/proteomics/Types/FinalResultEntry.java +++ b/src/main/java/proteomics/Types/FinalResultEntry.java @@ -1,6 +1,5 @@ package proteomics.Types; -import proteomics.ECL2; public class FinalResultEntry implements Comparable { @@ -44,7 +43,9 @@ public class FinalResultEntry implements Comparable { public double chain_score_2; public int chain_rank_2; - public FinalResultEntry(int scan_num, String spectrum_id, int rank, int charge, float spectrum_mz, float spectrum_mass, float peptide_mass, float rt, float ppm, double score, double delta_c, String seq_1, int link_site_1, String pro_id_1, String seq_2, int link_site_2, String pro_id_2, String cl_type, int hit_type, int C13_correction, double e_value, int point_count, float r_square, float slope, float intercept, int start_idx, int end_idx, double chain_score_1, int chain_rank_1, double chain_score_2, int chain_rank_2, long candidate_num) { + private final boolean cal_evalue; + + public FinalResultEntry(int scan_num, String spectrum_id, int rank, int charge, float spectrum_mz, float spectrum_mass, float peptide_mass, float rt, float ppm, double score, double delta_c, String seq_1, int link_site_1, String pro_id_1, String seq_2, int link_site_2, String pro_id_2, String cl_type, int hit_type, int C13_correction, double e_value, int point_count, float r_square, float slope, float intercept, int start_idx, int end_idx, double chain_score_1, int chain_rank_1, double chain_score_2, int chain_rank_2, long candidate_num, boolean cal_evalue) { this.scan_num = scan_num; this.spectrum_id = spectrum_id; this.rank = rank; @@ -92,10 +93,12 @@ public FinalResultEntry(int scan_num, String spectrum_id, int rank, int charge, this.candidate_num = candidate_num; toString = scan_num + "-" + seq_1 + "-" + link_site_1 + "-" + seq_2 + link_site_2; + + this.cal_evalue = cal_evalue; } public int compareTo(FinalResultEntry other) { - if (ECL2.cal_evalue) { + if (cal_evalue) { if (this.negative_log10_evalue > other.negative_log10_evalue) { return 1; } else if (this.negative_log10_evalue < other.negative_log10_evalue) { diff --git a/src/main/java/proteomics/Types/ResultEntry.java b/src/main/java/proteomics/Types/ResultEntry.java index 7457bf4..a441b49 100644 --- a/src/main/java/proteomics/Types/ResultEntry.java +++ b/src/main/java/proteomics/Types/ResultEntry.java @@ -2,7 +2,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import proteomics.ECL2; + public class ResultEntry{ @@ -38,8 +38,8 @@ public class ResultEntry{ private double chain_score_2; private int chain_rank_2; - public ResultEntry(String spectrum_id, float spectrum_mz, float spectrum_mass, float rt, int charge) { - if (ECL2.cal_evalue) { + public ResultEntry(String spectrum_id, float spectrum_mz, float spectrum_mass, float rt, int charge, boolean cal_evalue) { + if (cal_evalue) { score_histogram = new int[(int) Math.round(max_score / histogram_bin_size) + 1]; // start from zero score. } this.spectrum_id = spectrum_id; diff --git a/src/main/java/proteomics/Validation/CalFDR.java b/src/main/java/proteomics/Validation/CalFDR.java index 19967aa..a2ea899 100644 --- a/src/main/java/proteomics/Validation/CalFDR.java +++ b/src/main/java/proteomics/Validation/CalFDR.java @@ -1,7 +1,6 @@ package proteomics.Validation; -import proteomics.ECL2; import proteomics.Types.FinalResultEntry; import java.util.List; @@ -15,9 +14,9 @@ public class CalFDR { private float[] qvalue_array = null; private List results; - public CalFDR(List results) { + public CalFDR(List results, boolean cal_evalue) { this.results = results; - if (ECL2.cal_evalue) { + if (cal_evalue) { precision = 0.1f; } else { precision = 0.001f; @@ -25,7 +24,7 @@ public CalFDR(List results) { // find the max score for (FinalResultEntry entry : results) { - if (ECL2.cal_evalue) { + if (cal_evalue) { if (entry.negative_log10_evalue > max_score) { max_score = entry.negative_log10_evalue; } @@ -52,7 +51,7 @@ public CalFDR(List results) { for (FinalResultEntry re : results) { if (re.hit_type == 1) { int idx; - if (ECL2.cal_evalue) { + if (cal_evalue) { idx = (int) Math.floor((re.negative_log10_evalue - min_score) / precision); } else { idx = (int) Math.floor((re.score - min_score) / precision); @@ -60,7 +59,7 @@ public CalFDR(List results) { ++decoy_count_vector[idx]; } else if (re.hit_type == 0) { int idx; - if (ECL2.cal_evalue) { + if (cal_evalue) { idx = (int) Math.floor((re.negative_log10_evalue - min_score) / precision); } else { idx = (int) Math.floor((re.score - min_score) / precision); @@ -68,7 +67,7 @@ public CalFDR(List results) { ++target_count_vector[idx]; } else { int idx; - if (ECL2.cal_evalue) { + if (cal_evalue) { idx = (int) Math.floor((re.negative_log10_evalue - min_score) / precision); } else { idx = (int) Math.floor((re.score - min_score) / precision); @@ -116,10 +115,10 @@ public CalFDR(List results) { } } - public List includeStats() { + public List includeStats(boolean cal_evalue) { for (FinalResultEntry re : results) { if (re.hit_type == 0) { - if (ECL2.cal_evalue) { + if (cal_evalue) { int idx = (int) Math.floor((re.negative_log10_evalue - min_score) / precision); re.qvalue = qvalue_array[idx]; } else {