From 100653506844865f111ee4957f467f18e60fae77 Mon Sep 17 00:00:00 2001 From: Fengchao Date: Sun, 4 Feb 2018 17:02:22 +0800 Subject: [PATCH] Improve SQL recording. --- src/main/java/proteomics/ECL2.java | 49 +++++- .../java/proteomics/Search/SearchWrap.java | 153 +++++++++++++----- .../java/proteomics/Spectrum/PreSpectra.java | 1 + 3 files changed, 157 insertions(+), 46 deletions(-) diff --git a/src/main/java/proteomics/ECL2.java b/src/main/java/proteomics/ECL2.java index c626a9f..8da56a3 100644 --- a/src/main/java/proteomics/ECL2.java +++ b/src/main/java/proteomics/ECL2.java @@ -176,22 +176,61 @@ private ECL2(String parameter_path, String spectra_path, String dbName) throws I int precursorCharge = sqlResultSet.getInt("precursorCharge"); double massWithoutLinker = sqlResultSet.getDouble("massWithoutLinker"); double precursorMass = sqlResultSet.getDouble("precursorMass"); - taskList.add(thread_pool.submit(new SearchWrap(search_obj, build_index_obj, mass_tool_obj, cal_evalue, delta_c_t, flankingPeaks, spectra_parser, lock, scanId, precursorCharge, massWithoutLinker, precursorMass, sqlConnection))); + taskList.add(thread_pool.submit(new SearchWrap(search_obj, build_index_obj, mass_tool_obj, cal_evalue, delta_c_t, flankingPeaks, spectra_parser, lock, scanId, precursorCharge, massWithoutLinker, precursorMass, sqlPath))); } sqlResultSet.close(); sqlStatement.close(); // check progress every minute, record results,and delete finished tasks. + PreparedStatement sqlPreparedStatement = sqlConnection.prepareStatement("REPLACE INTO spectraTable (scanNum, scanId, precursorCharge, precursorMz, precursorMass, rt, massWithoutLinker, mgfTitle, isotopeCorrectionNum, ms1PearsonCorrelationCoefficient, theoMass, score, deltaC, rank, ppm, seq1, linkSite1, proId1, seq2, linkSite2, proId2, clType, hitType, eValue, candidateNum, pointCount, rSquare, slope, intercept, startIdx, endIdx, chainScore1, chainRank1, chainScore2, chainRank2) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); + sqlConnection.setAutoCommit(false); int lastProgress = 0; int resultCount = 0; int totalCount = taskList.size(); int count = 0; while (count < totalCount) { // record search results and delete finished ones. - for (Future task : taskList) { List> toBeDeleteTaskList = new ArrayList<>(totalCount - count); + for (Future task : taskList) { if (task.isDone()) { - if (task.get()) { + if (task.get() != null) { + SearchWrap.Entry entry = task.get(); + sqlPreparedStatement.setInt(1, entry.scanNum); + sqlPreparedStatement.setString(2, entry.scanId); + sqlPreparedStatement.setInt(3, entry.precursorCharge); + sqlPreparedStatement.setDouble(4, entry.precursorMz); + sqlPreparedStatement.setDouble(5, entry.precursorMass); + sqlPreparedStatement.setInt(6, entry.rt); + sqlPreparedStatement.setDouble(7, entry.massWithoutLinker); + sqlPreparedStatement.setString(8, entry.mgfTitle); + sqlPreparedStatement.setInt(9, entry.isotopeCorrectionNum); + sqlPreparedStatement.setDouble(10, entry.ms1PearsonCorrelationCoefficient); + sqlPreparedStatement.setDouble(11, entry.theoMass); + sqlPreparedStatement.setDouble(12, entry.score); + sqlPreparedStatement.setDouble(13, entry.delteC); + sqlPreparedStatement.setInt(14, entry.rank); + sqlPreparedStatement.setDouble(15, entry.ppm); + sqlPreparedStatement.setString(16, entry.seq1); + sqlPreparedStatement.setInt(17, entry.linkSite1); + sqlPreparedStatement.setString(18, entry.pro1); + sqlPreparedStatement.setString(19, entry.seq2); + sqlPreparedStatement.setInt(20, entry.linkSite2); + sqlPreparedStatement.setString(21, entry.pro2); + sqlPreparedStatement.setString(22, entry.clType); + sqlPreparedStatement.setInt(23, entry.hitType); + sqlPreparedStatement.setDouble(24, entry.eValue); + sqlPreparedStatement.setLong(25, entry.candidateNum); + sqlPreparedStatement.setInt(26, entry.scoreCount); + sqlPreparedStatement.setDouble(27, entry.rSquare); + sqlPreparedStatement.setDouble(28, entry.slope); + sqlPreparedStatement.setDouble(29, entry.intercept); + sqlPreparedStatement.setInt(30, entry.startIdx); + sqlPreparedStatement.setInt(31, entry.endIdx); + sqlPreparedStatement.setDouble(32, entry.chainScore1); + sqlPreparedStatement.setInt(33, entry.chainRank1); + sqlPreparedStatement.setDouble(34, entry.chainScore2); + sqlPreparedStatement.setInt(35, entry.chainRank2); + sqlPreparedStatement.executeUpdate(); ++resultCount; } toBeDeleteTaskList.add(task); @@ -203,6 +242,8 @@ private ECL2(String parameter_path, String spectra_path, String dbName) throws I taskList.removeAll(toBeDeleteTaskList); taskList.trimToSize(); + sqlConnection.commit(); + int progress = count * 20 / totalCount; if (progress != lastProgress) { logger.info("Searching {}%...", progress * 5); @@ -228,6 +269,8 @@ private ECL2(String parameter_path, String spectra_path, String dbName) throws I System.err.println("Pool did not terminate"); } + sqlConnection.commit(); + sqlConnection.setAutoCommit(true); sqlConnection.close(); if (lock.isLocked()) { lock.unlock(); diff --git a/src/main/java/proteomics/Search/SearchWrap.java b/src/main/java/proteomics/Search/SearchWrap.java index 9a37563..b729105 100644 --- a/src/main/java/proteomics/Search/SearchWrap.java +++ b/src/main/java/proteomics/Search/SearchWrap.java @@ -18,7 +18,7 @@ import static proteomics.ECL2.isotopeCorrectionArray; -public class SearchWrap implements Callable { +public class SearchWrap implements Callable { private final Search search_obj; private final BuildIndex build_index_obj; @@ -31,9 +31,9 @@ public class SearchWrap implements Callable { private final int precursorCharge; private final double massWithoutLinker; private final double precursorMass; - private final Connection sqlConnection; + private final String sqlPath; - public SearchWrap(Search search_obj, BuildIndex build_index_obj, MassTool mass_tool_obj, boolean cal_evalue, double delta_c_t, boolean flankingPeaks, JMzReader spectraParser, ReentrantLock lock, String scanId, int precursorCharge, double massWithoutLinker, double precursorMass, Connection sqlConnection) { + public SearchWrap(Search search_obj, BuildIndex build_index_obj, MassTool mass_tool_obj, boolean cal_evalue, double delta_c_t, boolean flankingPeaks, JMzReader spectraParser, ReentrantLock lock, String scanId, int precursorCharge, double massWithoutLinker, double precursorMass, String sqlPath) { this.search_obj = search_obj; this.build_index_obj = build_index_obj; preSpectrumObj = new PreSpectrum(mass_tool_obj, flankingPeaks); @@ -45,11 +45,11 @@ public SearchWrap(Search search_obj, BuildIndex build_index_obj, MassTool mass_t this.precursorCharge = precursorCharge; this.massWithoutLinker = massWithoutLinker; this.precursorMass = precursorMass; - this.sqlConnection = sqlConnection; + this.sqlPath = sqlPath; } @Override - public Boolean call() throws IOException, JMzReaderException, SQLException { + public Entry call() throws IOException, JMzReaderException, SQLException { Map rawPLMap; try { lock.lock(); @@ -81,24 +81,22 @@ public Boolean call() throws IOException, JMzReaderException, SQLException { } CalEValue.calEValue(scanId, resultEntry, build_index_obj, binScoresMap, precursorCharge, massWithoutLinker, precursorMass, originalTolerance, xcorrPL, search_obj.single_chain_t); if (resultEntry.getEValue() != 9999) { - recordResult(scanId, resultEntry, precursorMass); - return true; + return recordResult(resultEntry); } else { - return false; + return null; } } else { - recordResult(scanId, resultEntry, precursorMass); - return true; + return recordResult(resultEntry); } } else { - return false; + return null; } } else { - return false; + return null; } } - private void recordResult(String scanId, ResultEntry result_entry, double precursorMass) throws SQLException { + private Entry recordResult(ResultEntry result_entry) throws SQLException { Map chainEntryMap = build_index_obj.getSeqEntryMap(); Map> seqProMap = build_index_obj.getSeqProMap(); @@ -111,8 +109,8 @@ private void recordResult(String scanId, ResultEntry result_entry, double precur double theo_mass = chain_entry_1.chain_mass + chain_entry_2.chain_mass + build_index_obj.linker_mass; int C13_Diff_num = getC13Num(precursorMass, theo_mass); - precursorMass += C13_Diff_num * MassTool.C13_DIFF; - double ppm = (precursorMass - theo_mass) * 1e6 / theo_mass; + double precursorMassNew = precursorMass + C13_Diff_num * MassTool.C13_DIFF; + double ppm = (precursorMassNew - theo_mass) * 1e6 / theo_mass; Set pro1Set = new TreeSet<>(); boolean isDecoy1 = false; @@ -170,37 +168,28 @@ private void recordResult(String scanId, ResultEntry result_entry, double precur String final_seq_1 = addFixMod(chain_seq_1, result_entry.getLinkSite1()); String final_seq_2 = addFixMod(chain_seq_2, result_entry.getLinkSite2()); - Statement sqlStatement = sqlConnection.createStatement(); - ResultSet sqlResultSet = sqlStatement.executeQuery(String.format(Locale.US, "SELECT scanNum, scanId, precursorCharge, precursorMz, precursorMass, rt, massWithoutLinker, mgfTitle, isotopeCorrectionNum, ms1PearsonCorrelationCoefficient, score, hitType FROM spectraTable WHERE scanId='%s'", scanId)); - if (sqlResultSet.next()) { - boolean needUpdate = false; - int hitTypeOld = sqlResultSet.getInt("hitType"); - if (!sqlResultSet.wasNull()) { - double scoreOld = sqlResultSet.getDouble("score"); - if (result_entry.getScore() > scoreOld || (result_entry.getScore() == scoreOld && hitTypeOld != 0 && hit_type == 1)) { - needUpdate = true; - } - } else { - needUpdate = true; - } - if (needUpdate) { - int scanNum = sqlResultSet.getInt("scanNum"); - int precursorCharge = sqlResultSet.getInt("precursorCharge"); - double precursorMz = sqlResultSet.getDouble("precursorMz"); - int rt = sqlResultSet.getInt("rt"); - double massWithoutLinker = sqlResultSet.getDouble("massWithoutLinker"); - String mgtTitle = sqlResultSet.getString("mgfTitle"); - int isotopeCorrectionNum = sqlResultSet.getInt("isotopeCorrectionNum"); - double ms1PearsonCorrelationCoefficient = sqlResultSet.getDouble("ms1PearsonCorrelationCoefficient"); - sqlStatement.executeUpdate(String.format(Locale.US, "DELETE FROM spectraTable WHERE scanId=%s", scanId)); - sqlStatement.executeUpdate(String.format(Locale.US, "INSERT INTO spectraTable (scanNum, scanId, precursorCharge, precursorMz, precursorMass, rt, massWithoutLinker, mgfTitle, isotopeCorrectionNum, ms1PearsonCorrelationCoefficient, theoMass, score, deltaC, rank, ppm, seq1, linkSite1, proId1, seq2, linkSite2, proId2, clType, hitType, eValue, candidateNum, pointCount, rSquare, slope, intercept, startIdx, endIdx, chainScore1, chainRank1, chainScore2, chainRank2) VALUES (%d, '%s', %d, %f, %f, %d, %f, '%s', %d, %f, %f, %f, %f, %d, %f, '%s', %d, '%s', '%s', %d, '%s', '%s', %d, %f, %d, %d, %f, %f, %f, %d, %d, %f, %d, %f, %d)", scanNum, scanId, precursorCharge, precursorMz, precursorMass, rt, massWithoutLinker, mgtTitle, isotopeCorrectionNum, ms1PearsonCorrelationCoefficient, theo_mass, result_entry.getScore(), delta_c, rank, ppm, final_seq_1, result_entry.getLinkSite1(), String.join(";", pro1Set), final_seq_2, result_entry.getLinkSite2(), String.join(";", pro2Set), cl_type, hit_type, result_entry.getEValue(), result_entry.getCandidateNum(), result_entry.getScoreCount(), result_entry.getRSquare(), result_entry.getSlope(), result_entry.getIntercept(), result_entry.getStartIdx(), result_entry.getEndIdx(), result_entry.getChainScore1(), result_entry.getChainRank1(), result_entry.getChainScore2(), result_entry.getChainRank2())); - } + Connection localSqlConnection = DriverManager.getConnection(sqlPath); + Statement localSqlStatement = localSqlConnection.createStatement(); + ResultSet localSqlResultSet = localSqlStatement.executeQuery(String.format(Locale.US, "SELECT scanNum, scanId, precursorCharge, precursorMz, precursorMass, rt, massWithoutLinker, mgfTitle, isotopeCorrectionNum, ms1PearsonCorrelationCoefficient, score, hitType FROM spectraTable WHERE scanId='%s'", scanId)); + if (localSqlResultSet.next()) { + int scanNum = localSqlResultSet.getInt("scanNum"); + int precursorCharge = localSqlResultSet.getInt("precursorCharge"); + double precursorMz = localSqlResultSet.getDouble("precursorMz"); + int rt = localSqlResultSet.getInt("rt"); + double massWithoutLinker = localSqlResultSet.getDouble("massWithoutLinker"); + String mgfTitle = localSqlResultSet.getString("mgfTitle"); + int isotopeCorrectionNum = localSqlResultSet.getInt("isotopeCorrectionNum"); + double ms1PearsonCorrelationCoefficient = localSqlResultSet.getDouble("ms1PearsonCorrelationCoefficient"); + // sqlStr = String.format(Locale.US, "REPLACE INTO spectraTable (scanNum, scanId, precursorCharge, precursorMz, precursorMass, rt, massWithoutLinker, mgfTitle, isotopeCorrectionNum, ms1PearsonCorrelationCoefficient, theoMass, score, deltaC, rank, ppm, seq1, linkSite1, proId1, seq2, linkSite2, proId2, clType, hitType, eValue, candidateNum, pointCount, rSquare, slope, intercept, startIdx, endIdx, chainScore1, chainRank1, chainScore2, chainRank2) VALUES (%d, '%s', %d, %f, %f, %d, %f, '%s', %d, %f, %f, %f, %f, %d, %f, '%s', %d, '%s', '%s', %d, '%s', '%s', %d, %f, %d, %d, %f, %f, %f, %d, %d, %f, %d, %f, %d)", scanNum, scanId, precursorCharge, precursorMz, precursorMass, rt, massWithoutLinker, mgfTitle, isotopeCorrectionNum, ms1PearsonCorrelationCoefficient, theo_mass, result_entry.getScore(), delta_c, rank, ppm, final_seq_1, result_entry.getLinkSite1(), String.join(";", pro1Set), final_seq_2, result_entry.getLinkSite2(), String.join(";", pro2Set), cl_type, hit_type, result_entry.getEValue(), result_entry.getCandidateNum(), result_entry.getScoreCount(), result_entry.getRSquare(), result_entry.getSlope(), result_entry.getIntercept(), result_entry.getStartIdx(), result_entry.getEndIdx(), result_entry.getChainScore1(), result_entry.getChainRank1(), result_entry.getChainScore2(), result_entry.getChainRank2()); + Entry entry = new Entry(scanNum, scanId, precursorCharge, precursorMz, precursorMass, rt, massWithoutLinker, mgfTitle, isotopeCorrectionNum, ms1PearsonCorrelationCoefficient, theo_mass, result_entry.getScore(), delta_c, rank, ppm, final_seq_1, result_entry.getLinkSite1(), String.join(";", pro1Set), final_seq_2, result_entry.getLinkSite2(), String.join(";", pro2Set), cl_type, hit_type, result_entry.getEValue(), result_entry.getCandidateNum(), result_entry.getScoreCount(), result_entry.getRSquare(), result_entry.getSlope(), result_entry.getIntercept(), result_entry.getStartIdx(), result_entry.getEndIdx(), result_entry.getChainScore1(), result_entry.getChainRank1(), result_entry.getChainScore2(), result_entry.getChainRank2()); + + localSqlResultSet.close(); + localSqlStatement.close(); + localSqlConnection.close(); + return entry; } else { throw new NullPointerException(String.format(Locale.US, "There is no record %s in the spectraTable.", scanId)); } - - sqlResultSet.close(); - sqlStatement.close(); } private int getC13Num(double exp_mass, double theo_mass) { @@ -238,4 +227,82 @@ private String addFixMod(String seq, int linkSite) { } return sb.toString(); } + + + public class Entry { + + public final int scanNum; + public final String scanId; + public final int precursorCharge; + public final double precursorMz; + public final double precursorMass; + public final int rt; + public final double massWithoutLinker; + public final String mgfTitle; + public final int isotopeCorrectionNum; + public final double ms1PearsonCorrelationCoefficient; + public final double theoMass; + public final double score; + public final double delteC; + public final int rank; + public final double ppm; + public final String seq1; + public final int linkSite1; + public final String pro1; + public final String seq2; + public final int linkSite2; + public final String pro2; + public final String clType; + public final int hitType; + public final double eValue; + public final long candidateNum; + public final int scoreCount; + public final double rSquare; + public final double slope; + public final double intercept; + public final int startIdx; + public final int endIdx; + public final double chainScore1; + public final int chainRank1; + public final double chainScore2; + public final int chainRank2; + + public Entry(int scanNum, String scanId, int precursorCharge, double precursorMz, double precursorMass, int rt, double massWithoutLinker, String mgfTitle, int isotopeCorrectionNum, double ms1PearsonCorrelationCoefficient, double theoMass, double score, double delteC, int rank, double ppm, String seq1, int linkSite1, String pro1, String seq2, int linkSite2, String pro2, String clType, int hitType, double eValue, long candidateNum, int scoreCount, double rSquare, double slope, double intercept, int startIdx, int endIdx, double chainScore1, int chainRank1, double chainScore2, int chainRank2) { + this.scanNum = scanNum; + this.scanId = scanId; + this.precursorCharge = precursorCharge; + this.precursorMz = precursorMz; + this.precursorMass = precursorMass; + this.rt = rt; + this.massWithoutLinker = massWithoutLinker; + this.mgfTitle = mgfTitle; + this.isotopeCorrectionNum = isotopeCorrectionNum; + this.ms1PearsonCorrelationCoefficient = ms1PearsonCorrelationCoefficient; + this.theoMass = theoMass; + this.score = score; + this.delteC = delteC; + this.rank = rank; + this.ppm = ppm; + this.seq1 = seq1; + this.linkSite1 = linkSite1; + this.pro1 = pro1; + this.seq2 = seq2; + this.linkSite2 = linkSite2; + this.pro2 = pro2; + this.clType = clType; + this.hitType = hitType; + this.eValue = eValue; + this.candidateNum = candidateNum; + this.scoreCount = scoreCount; + this.rSquare = rSquare; + this.slope = slope; + this.intercept = intercept; + this.startIdx = startIdx; + this.endIdx = endIdx; + this.chainScore1 = chainScore1; + this.chainRank1 = chainRank1; + this.chainScore2 = chainScore2; + this.chainRank2 = chainRank2; + } + } } diff --git a/src/main/java/proteomics/Spectrum/PreSpectra.java b/src/main/java/proteomics/Spectrum/PreSpectra.java index 3f0262e..ceb6c46 100644 --- a/src/main/java/proteomics/Spectrum/PreSpectra.java +++ b/src/main/java/proteomics/Spectrum/PreSpectra.java @@ -47,6 +47,7 @@ public PreSpectra(JMzReader spectra_parser, double ms1Tolerance, double leftInve // prepare SQL database Connection sqlConnection = DriverManager.getConnection(sqlPath); Statement sqlStatement = sqlConnection.createStatement(); + sqlStatement.executeUpdate("PRAGMA journal_mode=WAL"); sqlStatement.executeUpdate("DROP TABLE IF EXISTS spectraTable"); sqlStatement.executeUpdate("CREATE TABLE spectraTable (scanNum INTEGER NOT NULL, scanId TEXT PRIMARY KEY, precursorCharge INTEGER NOT NULL, precursorMz REAL NOT NULL, precursorMass REAL NOT NULL, rt INTEGER NOT NULL, massWithoutLinker REAL NOT NULL, mgfTitle TEXT NOT NULL, isotopeCorrectionNum INTEGER NOT NULL, ms1PearsonCorrelationCoefficient REAL NOT NULL, theoMass REAL, score REAL, deltaC REAL, rank INTEGER, ppm REAL, seq1 TEXT, linkSite1 INTEGER, proId1 TEXT, seq2 TEXT, linkSite2 INTEGER, proId2 TEXT, clType TEXT, hitType INTEGER, eValue REAL, candidateNum INTEGER, pointCount INTEGER, rSquare REAL, slope REAL, intercept REAL, startIdx INTEGER, endIdx INTEGER, chainScore1 REAL, chainRank1 INTEGER, chainScore2 REAL, chainRank2 INTEGER)"); sqlStatement.close();