From 4f04394974202d0c080342c5f757bc786bff14b9 Mon Sep 17 00:00:00 2001 From: Fengchao Date: Tue, 30 Jan 2018 14:20:41 +0800 Subject: [PATCH] Change "/" to "*" for a better performance. --- .../java/proteomics/Index/BuildIndex.java | 4 +- .../java/proteomics/Search/CalEValue.java | 4 +- src/main/java/proteomics/Search/Search.java | 6 +-- .../java/proteomics/TheoSeq/MassTool.java | 46 ++++++++++--------- .../java/proteomics/Types/ResultEntry.java | 10 ++-- .../java/proteomics/Validation/CalFDR.java | 24 +++++----- 6 files changed, 50 insertions(+), 44 deletions(-) diff --git a/src/main/java/proteomics/Index/BuildIndex.java b/src/main/java/proteomics/Index/BuildIndex.java index c3ef2b7..464ed25 100644 --- a/src/main/java/proteomics/Index/BuildIndex.java +++ b/src/main/java/proteomics/Index/BuildIndex.java @@ -29,6 +29,7 @@ public class BuildIndex { private Map seq_entry_map = new HashMap<>(); private Map> seqProMap; private final float ms1_bin_size; + private final float inverseMs1BinSize; public BuildIndex(Map parameter_map) throws IOException { // initialize parameters @@ -45,6 +46,7 @@ public BuildIndex(Map parameter_map) throws IOException { } else { ms1_bin_size = 0.001f; } + inverseMs1BinSize = 1 / ms1_bin_size; // Read fix modification fix_mod_map.put('G', Float.valueOf(parameter_map.get("G"))); @@ -221,7 +223,7 @@ public float binToRightMass(int bin_idx) { } public int massToBin(float mass) { - return (int) Math.floor(mass / ms1_bin_size); + return (int) Math.floor(mass * inverseMs1BinSize); } private Map> buildSeqProMap(Map pro_seq_map, Map seq_term_map, int min_chain_length, int max_chain_length) { diff --git a/src/main/java/proteomics/Search/CalEValue.java b/src/main/java/proteomics/Search/CalEValue.java index 423119b..9910521 100644 --- a/src/main/java/proteomics/Search/CalEValue.java +++ b/src/main/java/proteomics/Search/CalEValue.java @@ -39,7 +39,7 @@ public class CalEValue { } int[] score_histogram = result_entry.getScoreHistogram(); - double histogram_bin_size = result_entry.getHistogramBinSize(); + double inverseHistogramBinSize = ResultEntry.getInverseHistogramBinSize(); int min_nonzero_idx = 0; for (int i = 0; i < score_histogram.length; ++i) { @@ -179,7 +179,7 @@ public class CalEValue { result_entry.setEValue(9999); logger.debug("Estimating E-value failed. Scan: {}, mass: {}, slope: {}, intercept: {}, R square: {}, point num: {}.",scan_num, result_entry.spectrum_mass, optimal_slope, optimal_intercept, max_r_square, result_entry.getScoreCount()); } else { - result_entry.setEValue(Math.exp((optimal_slope * Math.round(result_entry.getScore() / histogram_bin_size) + optimal_intercept) + Math.log((double) result_entry.getCandidateNum() / (double) result_entry.getScoreCount()))); // double point precision limitation. + result_entry.setEValue(Math.exp((optimal_slope * Math.round(result_entry.getScore() * inverseHistogramBinSize) + optimal_intercept) + Math.log((double) result_entry.getCandidateNum() / (double) result_entry.getScoreCount()))); // double point precision limitation. result_entry.setEValueDetails((float) max_r_square, (float) optimal_slope, (float) optimal_intercept, optimal_start_idx, null_end_idx); } diff --git a/src/main/java/proteomics/Search/Search.java b/src/main/java/proteomics/Search/Search.java index 5128351..051bb51 100644 --- a/src/main/java/proteomics/Search/Search.java +++ b/src/main/java/proteomics/Search/Search.java @@ -130,7 +130,7 @@ ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL) throws I if (chain_entry_map.get(chain_score_entry_1.getSeq()).binaryModType == chain_entry_map.get(chain_score_entry_2.getSeq()).binaryModType) { double score; if (chain_score_entry_1.getPtmFreeSeq().contentEquals(chain_score_entry_2.getPtmFreeSeq())) { - score = (chain_score_entry_1.getScore() + chain_score_entry_2.getScore()) / 2; + score = (chain_score_entry_1.getScore() + chain_score_entry_2.getScore()) * 0.5; ++candidate_num; } else { score = chain_score_entry_1.getScore() + chain_score_entry_2.getScore(); @@ -142,7 +142,7 @@ ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL) throws I double temp_1 = -1; if (chain_score_entry_1.getSecondSeq() != null) { if (chain_score_entry_1.getSecondPtmFreeSeq().contentEquals(chain_score_entry_2.getPtmFreeSeq())) { - temp_1 = (chain_score_entry_1.getSecondScore() + chain_score_entry_2.getScore()) / 2; + temp_1 = (chain_score_entry_1.getSecondScore() + chain_score_entry_2.getScore()) * 0.5; } else { temp_1 = chain_score_entry_1.getSecondScore() + chain_score_entry_2.getScore(); } @@ -150,7 +150,7 @@ ResultEntry doSearch(SpectrumEntry spectrumEntry, SparseVector xcorrPL) throws I double temp_2 = -1; if (chain_score_entry_2.getSecondSeq() != null) { if (chain_score_entry_1.getPtmFreeSeq().contentEquals(chain_score_entry_2.getSecondPtmFreeSeq())) { - temp_2 = (chain_score_entry_1.getScore() + chain_score_entry_2.getSecondScore()) / 2; + temp_2 = (chain_score_entry_1.getScore() + chain_score_entry_2.getSecondScore()) * 0.5; } else { temp_2 = chain_score_entry_1.getScore() + chain_score_entry_2.getSecondScore(); } diff --git a/src/main/java/proteomics/TheoSeq/MassTool.java b/src/main/java/proteomics/TheoSeq/MassTool.java index 7ca8a26..c545bb0 100644 --- a/src/main/java/proteomics/TheoSeq/MassTool.java +++ b/src/main/java/proteomics/TheoSeq/MassTool.java @@ -17,11 +17,11 @@ public class MassTool { private final int missed_cleavage; private final String cut_site; private final String protect_site; - private double mz_bin_size = 1.0005; - private double one_minus_bin_offset = 0.4; + private final double inverseMzBinSize; + private final double one_minus_bin_offset; public MassTool(int missed_cleavage, Map fix_mod_map, String cut_site, String protect_site, float mz_bin_size, float one_minus_bin_offset) { - this.mz_bin_size = mz_bin_size; + inverseMzBinSize = 1 / mz_bin_size; this.missed_cleavage = missed_cleavage; this.cut_site = cut_site; this.protect_site = protect_site; @@ -53,7 +53,7 @@ public MassTool(int missed_cleavage, Map fix_mod_map, String c } public int mzToBin(double mz) { - return (int) (mz / mz_bin_size + one_minus_bin_offset); + return (int) (mz * inverseMzBinSize + one_minus_bin_offset); } public float calResidueMass(String seq) { // n and c are also AA. @@ -113,9 +113,13 @@ public SparseBooleanVector buildTheoVector(String seq, short linkSite, float add linkSite = (short) Math.max(1, linkSite); int localMaxCharge = Math.min(max_charge, Math.max(precursor_charge - 1, 1)); + double[] inverseChargeArray = new double[localMaxCharge]; + for (int charge = 1; charge <= localMaxCharge; ++charge) { + inverseChargeArray[charge - 1] = (double) 1 / (double) charge; + } SparseBooleanVector outputVector = new SparseBooleanVector(); - + AA[] aaArray = seqToAAList(seq); // traverse the sequence to get b-ion @@ -123,48 +127,48 @@ public SparseBooleanVector buildTheoVector(String seq, short linkSite, float add for (int i = 1; i < aaArray.length - 2; ++i) { bIonMass += mass_table.get(aaArray[i].aa) + aaArray[i].delta_mass; if (i < linkSite) { - for (int charge = 1; charge <= localMaxCharge; ++charge) { - outputVector.put(mzToBin(bIonMass / charge + 1.00727646688)); + for (double inverseCharge : inverseChargeArray) { + outputVector.put(mzToBin(bIonMass * inverseCharge + 1.00727646688)); } } else { - for (int charge = 1; charge <= localMaxCharge; ++charge) { - outputVector.put(mzToBin((bIonMass + additional_mass) / charge + 1.00727646688)); + for (double inverseCharge : inverseChargeArray) { + outputVector.put(mzToBin((bIonMass + additional_mass) * inverseCharge + 1.00727646688)); } } } // calculate the last b-ion with C-term modification bIonMass += mass_table.get(aaArray[aaArray.length - 2].aa) + aaArray[aaArray.length - 2].delta_mass + mass_table.get(aaArray[aaArray.length - 1].aa) + aaArray[aaArray.length - 1].delta_mass; - for (int charge = 1; charge <= localMaxCharge; ++charge) { - outputVector.put(mzToBin((bIonMass + additional_mass) / charge + 1.00727646688)); // for the fragment containing all amino acids, the additional mass is always included. + for (double inverseCharge : inverseChargeArray) { + outputVector.put(mzToBin((bIonMass + additional_mass) * inverseCharge + 1.00727646688)); // for the fragment containing all amino acids, the additional mass is always included. } // traverse the sequence with reversed order to get y-ion // the whole sequence double yIonMass = bIonMass + H2O; - for (int charge = 1; charge <= localMaxCharge; ++charge) { - outputVector.put(mzToBin((yIonMass + additional_mass) / charge + 1.00727646688)); // for the fragment containing all amino acids, the additional mass is always included. + for (double inverseCharge : inverseChargeArray) { + outputVector.put(mzToBin((yIonMass + additional_mass) * inverseCharge + 1.00727646688)); // for the fragment containing all amino acids, the additional mass is always included. } // delete the first amino acid and N-term modification yIonMass -= mass_table.get(aaArray[0].aa) + aaArray[0].delta_mass + mass_table.get(aaArray[1].aa) + aaArray[1].delta_mass; if (1 >= linkSite) { - for (int charge = 1; charge <= localMaxCharge; ++charge) { - outputVector.put(mzToBin(yIonMass / charge + 1.00727646688)); + for (double inverseCharge : inverseChargeArray) { + outputVector.put(mzToBin(yIonMass * inverseCharge + 1.00727646688)); } } else { - for (int charge = 1; charge <= localMaxCharge; ++charge) { - outputVector.put(mzToBin((yIonMass + additional_mass) / charge + 1.00727646688)); + for (double inverseCharge : inverseChargeArray) { + outputVector.put(mzToBin((yIonMass + additional_mass) * inverseCharge + 1.00727646688)); } } // rest of the sequence for (int i = 2; i < aaArray.length - 2; ++i) { yIonMass -= mass_table.get(aaArray[i].aa) + aaArray[i].delta_mass; if (i >= linkSite) { // caution: here, it is different from b-ion - for (int charge = 1; charge <= localMaxCharge; ++charge) { - outputVector.put(mzToBin(yIonMass / charge + 1.00727646688)); + for (double inverseCharge : inverseChargeArray) { + outputVector.put(mzToBin(yIonMass * inverseCharge + 1.00727646688)); } } else { - for (int charge = 1; charge <= localMaxCharge; ++charge) { - outputVector.put(mzToBin((yIonMass + additional_mass) / charge + 1.00727646688)); + for (double inverseCharge : inverseChargeArray) { + outputVector.put(mzToBin((yIonMass + additional_mass) * inverseCharge + 1.00727646688)); } } } diff --git a/src/main/java/proteomics/Types/ResultEntry.java b/src/main/java/proteomics/Types/ResultEntry.java index 2456378..265c594 100644 --- a/src/main/java/proteomics/Types/ResultEntry.java +++ b/src/main/java/proteomics/Types/ResultEntry.java @@ -9,7 +9,7 @@ public class ResultEntry{ private static final Logger logger = LoggerFactory.getLogger(ResultEntry.class); - public static final double histogram_bin_size = 0.05f; + public static final double inverseHistogramBinSize = 20; private static final double max_score = 20; public final String spectrum_id; @@ -44,7 +44,7 @@ public class ResultEntry{ public ResultEntry(String spectrum_id, float spectrum_mz, float spectrum_mass, float rt, int charge, boolean cal_evalue, TreeMap binChainMap) { if (cal_evalue) { - score_histogram = new int[(int) Math.round(max_score / histogram_bin_size) + 1]; // start from zero score. + score_histogram = new int[(int) Math.round(max_score * inverseHistogramBinSize) + 1]; // start from zero score. } this.spectrum_id = spectrum_id; this.spectrum_mz = spectrum_mz; @@ -88,7 +88,7 @@ public void setCandidateNum(long candidate_num) { public void addToScoreHistogram(double score) { try { - ++score_histogram[(int) Math.round(score / histogram_bin_size)]; + ++score_histogram[(int) Math.round(score * inverseHistogramBinSize)]; ++score_count; } catch (ArrayIndexOutOfBoundsException ex) { logger.warn("Score {} is out of the range [0, {}].", score, max_score); @@ -138,8 +138,8 @@ public int[] getScoreHistogram() { return score_histogram; } - public double getHistogramBinSize() { - return histogram_bin_size; + public static double getInverseHistogramBinSize() { + return inverseHistogramBinSize; } public double getEValue() { diff --git a/src/main/java/proteomics/Validation/CalFDR.java b/src/main/java/proteomics/Validation/CalFDR.java index bf977a2..e136bbc 100644 --- a/src/main/java/proteomics/Validation/CalFDR.java +++ b/src/main/java/proteomics/Validation/CalFDR.java @@ -7,7 +7,7 @@ public class CalFDR { - private final float precision; + private final float inversePrecision; private double min_score = 999; private double max_score = -999; @@ -17,9 +17,9 @@ public class CalFDR { public CalFDR(List results, boolean cal_evalue) { this.results = results; if (cal_evalue) { - precision = 0.1f; + inversePrecision = 10; } else { - precision = 0.001f; + inversePrecision = 1000; } // find the max score @@ -41,7 +41,7 @@ public CalFDR(List results, boolean cal_evalue) { } } - final int array_length = 1 + (int) Math.ceil((max_score - min_score) / precision); + final int array_length = 1 + (int) Math.ceil((max_score - min_score) * inversePrecision); float[] decoy_count_vector = new float[array_length]; float[] target_count_vector = new float[array_length]; float[] fuse_count_vector = new float[array_length]; @@ -52,25 +52,25 @@ public CalFDR(List results, boolean cal_evalue) { if (re.hit_type == 1) { int idx; if (cal_evalue) { - idx = (int) Math.floor((re.negative_log10_evalue - min_score) / precision); + idx = (int) Math.floor((re.negative_log10_evalue - min_score) * inversePrecision); } else { - idx = (int) Math.floor((re.score - min_score) / precision); + idx = (int) Math.floor((re.score - min_score) * inversePrecision); } ++decoy_count_vector[idx]; } else if (re.hit_type == 0) { int idx; if (cal_evalue) { - idx = (int) Math.floor((re.negative_log10_evalue - min_score) / precision); + idx = (int) Math.floor((re.negative_log10_evalue - min_score) * inversePrecision); } else { - idx = (int) Math.floor((re.score - min_score) / precision); + idx = (int) Math.floor((re.score - min_score) * inversePrecision); } ++target_count_vector[idx]; } else { int idx; if (cal_evalue) { - idx = (int) Math.floor((re.negative_log10_evalue - min_score) / precision); + idx = (int) Math.floor((re.negative_log10_evalue - min_score) * inversePrecision); } else { - idx = (int) Math.floor((re.score - min_score) / precision); + idx = (int) Math.floor((re.score - min_score) * inversePrecision); } ++fuse_count_vector[idx]; } @@ -119,10 +119,10 @@ public List includeStats(boolean cal_evalue) { for (FinalResultEntry re : results) { if (re.hit_type == 0) { if (cal_evalue) { - int idx = (int) Math.floor((re.negative_log10_evalue - min_score) / precision); + int idx = (int) Math.floor((re.negative_log10_evalue - min_score) * inversePrecision); re.qvalue = qvalue_array[idx]; } else { - int idx = (int) Math.floor((re.score - min_score) / precision); + int idx = (int) Math.floor((re.score - min_score) * inversePrecision); re.qvalue = qvalue_array[idx]; } }