Skip to content

Commit

Permalink
Using ProteomicsLibrary.
Browse files Browse the repository at this point in the history
  • Loading branch information
fcyu committed Mar 12, 2018
1 parent 24e559b commit c100de8
Show file tree
Hide file tree
Showing 17 changed files with 44 additions and 839 deletions.
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@
<artifactId>sqlite-jdbc</artifactId>
<version>3.21.0.1</version>
</dependency>
<dependency>
<groupId>hk.ust.bioinformatics</groupId>
<artifactId>ProteomicsLibrary</artifactId>
<version>1.0</version>
</dependency>
</dependencies>

<repositories>
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/proteomics/ECL2.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import proteomics.Search.SearchWrap;
import proteomics.Search.Search;
import proteomics.Spectrum.PreSpectra;
import proteomics.TheoSeq.MassTool;
import ProteomicsLibrary.MassTool;
import proteomics.Validation.CalFDR;
import uk.ac.ebi.pride.tools.jmzreader.JMzReader;
import uk.ac.ebi.pride.tools.mgf_parser.MgfFile;
Expand Down
40 changes: 20 additions & 20 deletions src/main/java/proteomics/Index/BuildIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import org.apache.commons.math3.util.CombinatoricsUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import proteomics.TheoSeq.DbTool;
import proteomics.TheoSeq.MassTool;
import ProteomicsLibrary.*;
import ProteomicsLibrary.Types.*;
import proteomics.Types.*;

import java.util.*;
Expand Down Expand Up @@ -90,17 +90,17 @@ public BuildIndex(Map<String, String> parameter_map) throws Exception {
DbTool db_tool_obj = new DbTool(db_path, parameter_map.get("database_type"));
if (parameter_map.get("append_contaminants").contentEquals("1")) {
DbTool contaminantsDb = new DbTool(null, "contaminants");
pro_seq_map = contaminantsDb.getProSeqMap();
pro_seq_map.putAll(db_tool_obj.getProSeqMap()); // using the target sequence to replace contaminant sequence if there is conflict.
pro_annotate_map = contaminantsDb.getProAnnotateMap();
pro_annotate_map.putAll(db_tool_obj.getProAnnotateMap()); // using the target sequence to replace contaminant sequence if there is conflict.
pro_seq_map = contaminantsDb.getProteinSequenceMap();
pro_seq_map.putAll(db_tool_obj.getProteinSequenceMap()); // using the target sequence to replace contaminant sequence if there is conflict.
pro_annotate_map = contaminantsDb.getProteinAnnotateMap();
pro_annotate_map.putAll(db_tool_obj.getProteinAnnotateMap()); // using the target sequence to replace contaminant sequence if there is conflict.
} else {
pro_seq_map = db_tool_obj.getProSeqMap();
pro_annotate_map = db_tool_obj.getProAnnotateMap();
pro_seq_map = db_tool_obj.getProteinSequenceMap();
pro_annotate_map = db_tool_obj.getProteinAnnotateMap();
}

// define a new MassTool object
mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, "KR", "P", mz_bin_size, one_minus_bin_offset);
mass_tool_obj = new MassTool(missed_cleavage, fix_mod_map, "KR", "P", true, mz_bin_size * 0.5, one_minus_bin_offset, "N14", "[]");

// generate seq_pro_map
Map<String, boolean[]> seq_term_map = new HashMap<>();
Expand Down Expand Up @@ -142,7 +142,7 @@ public BuildIndex(Map<String, String> parameter_map) throws Exception {
// mod free
Set<Short> linkSiteSet = getLinkSiteSet(seq, proteinNTerm, proteinCTerm, linker_type);
if (!linkSiteSet.isEmpty()) {
double totalMass = (mass_tool_obj.calResidueMass(seq) + MassTool.H2O);
double totalMass = (mass_tool_obj.calResidueMass(seq) + mass_tool_obj.H2O);
int bin = massToBin(totalMass);
if (bin_seq_map.containsKey(bin)) {
bin_seq_map.get(bin).add(seq);
Expand All @@ -161,7 +161,7 @@ public BuildIndex(Map<String, String> parameter_map) throws Exception {
linkSiteSet = new HashSet<>(5, 1);
linkSiteSet.add(varSeq.linkSite);
if (!linkSiteSet.isEmpty()) {
double totalMass = (mass_tool_obj.calResidueMass(varSeq.seq) + MassTool.H2O);
double totalMass = (mass_tool_obj.calResidueMass(varSeq.seq) + mass_tool_obj.H2O);
int bin = massToBin(totalMass);
if (bin_seq_map.containsKey(bin)) {
bin_seq_map.get(bin).add(varSeq.seq);
Expand Down Expand Up @@ -517,21 +517,21 @@ private List<Map<Integer, Double>> generateLocalIdxModMassMap(int[] idxArray, Ma
private Set<String> checkKCTermMod(Set<String> varSeqSet) { // eliminate those sequence that the middle amino acids having the same mod mass and the n-term and the first amino acid or the c-term and the last amino acid have the same mod mass. todo: check
String[] varSeqArray = varSeqSet.toArray(new String[varSeqSet.size()]);
Arrays.sort(varSeqArray); // Make sure that nK[].... is before n[]K..., so that n[]K... will be kept.
int seqLength = MassTool.seqToAAList(varSeqArray[0]).length;
int seqLength = MassTool.seqToAAList(varSeqArray[0], "[]").length;
AA[][] aaArrays = new AA[varSeqArray.length][seqLength];
for (int i = 0; i < varSeqArray.length; ++i) {
aaArrays[i] = MassTool.seqToAAList(varSeqArray[i]);
aaArrays[i] = MassTool.seqToAAList(varSeqArray[i], "[]");
}

if (aaArrays.length > 1) {
Set<String> keptSeqSet = new HashSet<>();
for (int i = 0; i < aaArrays.length - 1; ++i) {
boolean keep = true;
for (int j = i + 1; j < aaArrays.length; ++j) {
if ((Math.abs(aaArrays[i][0].delta_mass - aaArrays[j][1].delta_mass) < varModMassResolution) && (Math.abs(aaArrays[i][1].delta_mass - aaArrays[j][0].delta_mass) < varModMassResolution) && (Math.abs(aaArrays[i][seqLength - 2].delta_mass - aaArrays[j][seqLength - 1].delta_mass) < varModMassResolution) && (Math.abs(aaArrays[i][seqLength - 1].delta_mass - aaArrays[j][seqLength - 2].delta_mass) < varModMassResolution)) {
if ((Math.abs(aaArrays[i][0].ptmDeltaMass - aaArrays[j][1].ptmDeltaMass) < varModMassResolution) && (Math.abs(aaArrays[i][1].ptmDeltaMass - aaArrays[j][0].ptmDeltaMass) < varModMassResolution) && (Math.abs(aaArrays[i][seqLength - 2].ptmDeltaMass - aaArrays[j][seqLength - 1].ptmDeltaMass) < varModMassResolution) && (Math.abs(aaArrays[i][seqLength - 1].ptmDeltaMass - aaArrays[j][seqLength - 2].ptmDeltaMass) < varModMassResolution)) {
keep = false;
for (int k = 2; k < seqLength - 2; ++k) {
if (Math.abs(aaArrays[i][k].delta_mass - aaArrays[j][k].delta_mass) > varModMassResolution) {
if (Math.abs(aaArrays[i][k].ptmDeltaMass - aaArrays[j][k].ptmDeltaMass) > varModMassResolution) {
keep = true;
break;
}
Expand All @@ -553,19 +553,19 @@ private Set<String> checkKCTermMod(Set<String> varSeqSet) { // eliminate those s
}

private Set<Short> getLinkSiteSet(String seq, boolean n_term, boolean c_term, short linker_type) {
AA[] aa_list = MassTool.seqToAAList(seq);
AA[] aa_list = MassTool.seqToAAList(seq, "[]");
Set<Short> output = new HashSet<>(5, 1);
for (int i = 1; i < aa_list.length - 2; ++i) {
if (linker_type == 1 && aa_list[i].aa == 'K' && (Math.abs(aa_list[i].delta_mass) < varModMassResolution)) {
if (linker_type == 1 && aa_list[i].aa == 'K' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) {
output.add((short) i);
} else if (linker_type == 2 && aa_list[i].aa == 'C' && (Math.abs(aa_list[i].delta_mass) < varModMassResolution)) {
} else if (linker_type == 2 && aa_list[i].aa == 'C' && (Math.abs(aa_list[i].ptmDeltaMass) < varModMassResolution)) {
output.add((short) i);
}
}
if (linker_type == 1 && n_term && !output.contains((short) 1) && (Math.abs(aa_list[0].delta_mass) < varModMassResolution)) {
if (linker_type == 1 && n_term && !output.contains((short) 1) && (Math.abs(aa_list[0].ptmDeltaMass) < varModMassResolution)) {
output.add((short) 0);
}
if (linker_type == 1 && c_term && aa_list[aa_list.length - 2].aa == 'K' && (Math.abs(aa_list[aa_list.length - 2].delta_mass) < varModMassResolution)) {
if (linker_type == 1 && c_term && aa_list[aa_list.length - 2].aa == 'K' && (Math.abs(aa_list[aa_list.length - 2].ptmDeltaMass) < varModMassResolution)) {
output.add((short) (aa_list.length - 2));
}
return output;
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/proteomics/Search/CalEValue.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import org.slf4j.LoggerFactory;
import proteomics.ECL2;
import proteomics.Index.BuildIndex;
import proteomics.TheoSeq.MassTool;
import ProteomicsLibrary.MassTool;
import ProteomicsLibrary.Types.*;
import proteomics.Types.*;

import java.io.BufferedWriter;
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/proteomics/Search/Search.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
package proteomics.Search;

import proteomics.ECL2;
import proteomics.TheoSeq.MassTool;
import ProteomicsLibrary.MassTool;
import ProteomicsLibrary.Types.*;
import proteomics.Types.*;
import proteomics.Index.BuildIndex;

Expand Down
11 changes: 6 additions & 5 deletions src/main/java/proteomics/Search/SearchWrap.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import proteomics.ECL2;
import proteomics.Index.BuildIndex;
import proteomics.Spectrum.PreSpectrum;
import proteomics.TheoSeq.MassTool;
import ProteomicsLibrary.MassTool;
import ProteomicsLibrary.Types.*;
import proteomics.Types.*;
import uk.ac.ebi.pride.tools.jmzreader.JMzReader;
import uk.ac.ebi.pride.tools.jmzreader.JMzReaderException;
Expand Down Expand Up @@ -68,7 +69,7 @@ public Entry call() throws IOException, JMzReaderException, SQLException {
if (ECL2.debug) {
BufferedWriter writer = new BufferedWriter(new FileWriter(scanId + ".xcorr.spectrum.csv"));
writer.write("bin_idx,intensity\n");
for (int idx : xcorrPL.getIdxSet()) {
for (int idx : xcorrPL.getNonzeroIdx()) {
writer.write(idx + "," + xcorrPL.get(idx) + "\n");
}
writer.close();
Expand Down Expand Up @@ -214,7 +215,7 @@ private int getC13Num(double exp_mass, double theo_mass) {

private String addFixMod(String seq, int linkSite) {
Map<Character, Double> fix_mod_map = build_index_obj.getFixModMap();
AA[] aaList = MassTool.seqToAAList(seq);
AA[] aaList = MassTool.seqToAAList(seq, "[]");
StringBuilder sb = new StringBuilder(seq.length() * 3);
for (int i = 0; i < aaList.length; ++i) {
AA aa = aaList[i];
Expand All @@ -223,8 +224,8 @@ private String addFixMod(String seq, int linkSite) {
} else if (Math.abs(fix_mod_map.get(aa.aa)) > 1e-6) {
sb.append(String.format(Locale.US, "%c[%.3f]", aa.aa, fix_mod_map.get(aa.aa)));
} else {
if (Math.abs(aa.delta_mass) > 1e-6) {
sb.append(String.format(Locale.US, "%c[%.3f]", aa.aa, aa.delta_mass));
if (Math.abs(aa.ptmDeltaMass) > 1e-6) {
sb.append(String.format(Locale.US, "%c[%.3f]", aa.aa, aa.ptmDeltaMass));
} else {
sb.append(aa.aa);
}
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/proteomics/Spectrum/PreSpectra.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import org.slf4j.LoggerFactory;
import proteomics.ECL2;
import proteomics.Index.BuildIndex;
import proteomics.TheoSeq.MassTool;
import ProteomicsLibrary.MassTool;
import uk.ac.ebi.pride.tools.jmzreader.JMzReader;
import uk.ac.ebi.pride.tools.jmzreader.JMzReaderException;
import uk.ac.ebi.pride.tools.jmzreader.model.*;
Expand Down Expand Up @@ -42,7 +42,7 @@ public PreSpectra(JMzReader spectra_parser, double ms1Tolerance, double leftInve
}
}

IsotopeDistribution isotopeDistribution = new IsotopeDistribution(build_index_obj.returnMassTool().elementTable, 0, "N14");
IsotopeDistribution isotopeDistribution = new IsotopeDistribution(build_index_obj.returnMassTool().getElementTable(), 0, "N14");

// prepare SQL database
Connection sqlConnection = DriverManager.getConnection(sqlPath);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/proteomics/Spectrum/PreSpectrum.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package proteomics.Spectrum;

import proteomics.ECL2;
import proteomics.TheoSeq.MassTool;
import proteomics.Types.SparseVector;
import ProteomicsLibrary.MassTool;
import ProteomicsLibrary.Types.*;

import java.io.BufferedWriter;
import java.io.FileWriter;
Expand Down
85 changes: 0 additions & 85 deletions src/main/java/proteomics/TheoSeq/DbTool.java

This file was deleted.

Loading

0 comments on commit c100de8

Please sign in to comment.