From a133aee6da288dc7abb382989b6c3c9b29c1f024 Mon Sep 17 00:00:00 2001 From: Fengchao Date: Thu, 18 May 2017 18:45:48 +0800 Subject: [PATCH] Output raw spectrum, normalized spectrum, xcorr spectrum, and two chains' theoretical spectra in debug mode. --- .../java/proteomics/Search/SearchWrap.java | 38 +++++++++++++++++-- .../java/proteomics/Spectrum/PreSpectra.java | 33 ++++++++++++++-- .../proteomics/Types/SparseBooleanVector.java | 4 ++ .../java/proteomics/Types/SparseVector.java | 2 +- .../proteomics/Types/SparseVectorTest.java | 2 +- 5 files changed, 70 insertions(+), 9 deletions(-) diff --git a/src/main/java/proteomics/Search/SearchWrap.java b/src/main/java/proteomics/Search/SearchWrap.java index 4a62748..d924aca 100644 --- a/src/main/java/proteomics/Search/SearchWrap.java +++ b/src/main/java/proteomics/Search/SearchWrap.java @@ -6,11 +6,11 @@ import proteomics.Index.BuildIndex; import proteomics.Spectrum.PreSpectrum; import proteomics.TheoSeq.MassTool; -import proteomics.Types.FinalResultEntry; -import proteomics.Types.ResultEntry; -import proteomics.Types.SparseVector; -import proteomics.Types.SpectrumEntry; +import proteomics.Types.*; +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; import java.util.Map; import java.util.Set; import java.util.concurrent.Callable; @@ -40,8 +40,38 @@ public SearchWrap(Search search_obj, SpectrumEntry spectrumEntry, BuildIndex bui @Override public FinalResultEntry call() { SparseVector xcorrPL = preSpectrumObj.prepareXcorr(spectrumEntry.originalPlMap, spectrumEntry.precursor_mass); + if (ECL2.debug) { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(spectrumEntry.scan_num + ".xcorr.spectrum.csv"))) { + writer.write("bin_idx,intensity\n"); + for (int idx : xcorrPL.getIdxSet()) { + writer.write(idx + "," + xcorrPL.get(idx) + "\n"); + } + } catch (IOException ex) { + ex.printStackTrace(); + logger.error(ex.getMessage()); + System.exit(1); + } + } ResultEntry resultEntry = search_obj.doSearch(spectrumEntry, xcorrPL); if (resultEntry != null) { + if (ECL2.debug) { + SparseBooleanVector chainVector1 = mass_tool_obj.buildTheoVector(MassTool.seqToAAList(resultEntry.getChain1()), (short) resultEntry.getLinkSite1(), spectrumEntry.precursor_mass - mass_tool_obj.calResidueMass(MassTool.seqToAAList(resultEntry.getChain1())), spectrumEntry.precursor_charge, max_common_ion_charge); + SparseBooleanVector chainVector2 = mass_tool_obj.buildTheoVector(MassTool.seqToAAList(resultEntry.getChain2()), (short) resultEntry.getLinkSite2(), spectrumEntry.precursor_mass - mass_tool_obj.calResidueMass(MassTool.seqToAAList(resultEntry.getChain2())), spectrumEntry.precursor_charge, max_common_ion_charge); + try (BufferedWriter writer = new BufferedWriter(new FileWriter(spectrumEntry.scan_num + ".chain.spectrum.csv"))) { + writer.write("chain1 bin idx\n"); + for (int idx : chainVector1.getIdxSet()) { + writer.write(idx + "\n"); + } + writer.write("chain2 bin idx\n"); + for (int idx : chainVector2.getIdxSet()) { + writer.write(idx + "\n"); + } + } catch (IOException ex) { + ex.printStackTrace(); + logger.error(ex.getMessage()); + System.exit(1); + } + } if (1 - (resultEntry.getSecondScore() / resultEntry.getScore()) > ECL2.delta_c_t) { if (ECL2.cal_evalue) { float e_value_precursor_mass_tol; diff --git a/src/main/java/proteomics/Spectrum/PreSpectra.java b/src/main/java/proteomics/Spectrum/PreSpectra.java index 381ab2d..8fe5f84 100644 --- a/src/main/java/proteomics/Spectrum/PreSpectra.java +++ b/src/main/java/proteomics/Spectrum/PreSpectra.java @@ -11,9 +11,7 @@ import uk.ac.ebi.pride.tools.mzxml_parser.MzXMLParsingException; import uk.ac.ebi.pride.tools.mzxml_parser.mzxml.model.Scan; -import java.io.IOException; -import java.io.OutputStream; -import java.io.PrintStream; +import java.io.*; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -84,12 +82,41 @@ public void write(int b) throws IOException {} Map raw_mz_intensity_map = spectrum.getPeakList(); + if (ECL2.debug) { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(Integer.valueOf(spectrum.getId()) + ".raw.spectrum.csv"))) { + writer.write("mz,intensity\n"); + for (double mz : raw_mz_intensity_map.keySet()) { + if (Math.abs(raw_mz_intensity_map.get(mz)) > 1e-6) { + writer.write(mz + "," + raw_mz_intensity_map.get(mz) + "\n"); + } + } + } catch (IOException ex) { + ex.printStackTrace(); + logger.error(ex.getMessage()); + System.exit(1); + } + } + if (raw_mz_intensity_map.size() < min_peak_num) { logger.debug("Scan {} doesn't contain enough peak number ({}). Skip.", spectrum.getId(), min_peak_num); continue; } TreeMap originalPlMap = pre_spectrum_obj.preSpectrum(raw_mz_intensity_map, precursor_mass); + + if (ECL2.debug) { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(Integer.valueOf(spectrum.getId()) + ".normalized.spectrum.csv"))) { + writer.write("mz,intensity\n"); + for (float mz : originalPlMap.keySet()) { + writer.write(mz + "," + originalPlMap.get(mz) + "\n"); + } + } catch (IOException ex) { + ex.printStackTrace(); + logger.error(ex.getMessage()); + System.exit(1); + } + } + if (originalPlMap.size() <= min_peak_num) { continue; } diff --git a/src/main/java/proteomics/Types/SparseBooleanVector.java b/src/main/java/proteomics/Types/SparseBooleanVector.java index 4cae618..c558606 100644 --- a/src/main/java/proteomics/Types/SparseBooleanVector.java +++ b/src/main/java/proteomics/Types/SparseBooleanVector.java @@ -47,6 +47,10 @@ public boolean contains(int v) { return sparse_vector.contains(v); } + public Set getIdxSet() { + return sparse_vector; + } + public boolean equals(Object other) { if (other instanceof SparseBooleanVector) { SparseBooleanVector temp = (SparseBooleanVector) other; diff --git a/src/main/java/proteomics/Types/SparseVector.java b/src/main/java/proteomics/Types/SparseVector.java index 17b39b1..eac93ec 100644 --- a/src/main/java/proteomics/Types/SparseVector.java +++ b/src/main/java/proteomics/Types/SparseVector.java @@ -38,7 +38,7 @@ public float get(int i) { } } - Set idxSet() { + public Set getIdxSet() { return sparse_vector.keySet(); } diff --git a/src/test/java/proteomics/Types/SparseVectorTest.java b/src/test/java/proteomics/Types/SparseVectorTest.java index 443f138..e1c63cb 100644 --- a/src/test/java/proteomics/Types/SparseVectorTest.java +++ b/src/test/java/proteomics/Types/SparseVectorTest.java @@ -48,7 +48,7 @@ public void get() throws Exception { @Test public void idxSet() throws Exception { - Set result = vector.idxSet(); + Set result = vector.getIdxSet(); Set ground_truth = new HashSet<>(); ground_truth.add(1); ground_truth.add(3);