Skip to content

Commit

Permalink
Add a databaseType for parsing the header. Add to methods in DbTool.
Browse files Browse the repository at this point in the history
  • Loading branch information
fcyu committed Nov 28, 2017
1 parent cedd170 commit db9a769
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 4 deletions.
6 changes: 5 additions & 1 deletion src/main/java/proteomics/Index/BuildIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,11 @@ public BuildIndex(Map<String, String> parameter_map) {
}

// read protein database
DbTool db_tool_obj = new DbTool(db_path);
String databaseType = "UniProt";
if (parameter_map.containsKey("database_type")) {
databaseType = parameter_map.get("database_type");
}
DbTool db_tool_obj = new DbTool(db_path, databaseType);
Map<String, String> pro_seq_map = db_tool_obj.getProSeqMap();
pro_annotate_map = db_tool_obj.getProAnnotateMap();

Expand Down
47 changes: 45 additions & 2 deletions src/main/java/proteomics/TheoSeq/DbTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,19 @@ public class DbTool {
private Map<String, String> pro_seq_map = new HashMap<>();
private Map<String, String> pro_annotate_map = new HashMap<>();

public DbTool(String db_name) {
public DbTool(String db_name, String databaseType) {
String id = "";
String annotate;
StringBuilder seq = new StringBuilder(99999);

boolean new_pro = true;

Pattern header_pattern = Pattern.compile(">([^\\s]*)(.*)");
Pattern header_pattern = Pattern.compile("^>([^\\s]+)[\\s|]+(.+)");;
if (databaseType.contentEquals("TAIR")) {
header_pattern = Pattern.compile("^>([^\\s]+)[\\s|]+(.+)$");
} else if (databaseType.contentEquals("UniProt") || databaseType.contentEquals("SwissProt")) {
header_pattern = Pattern.compile("^>[^|]+\\|(.+)\\|(.+)$");
}

try (BufferedReader db_reader = new BufferedReader(new FileReader(db_name))) {
String line;
Expand Down Expand Up @@ -65,4 +70,42 @@ public Map<String, String> getProSeqMap() {
public Map<String, String> getProAnnotateMap() {
return pro_annotate_map;
}

public Set<Integer> findPeptideLocation(String proteinId, String peptide) throws NullPointerException {
peptide = peptide.trim().replaceAll("[^A-Z]+", "");
Set<Integer> output = new HashSet<>();
int idx = pro_seq_map.get(proteinId).indexOf(peptide);
while (idx >= 0) {
output.add(idx);
idx = pro_seq_map.get(proteinId).indexOf(peptide, idx + 1);
}
if (!output.isEmpty()) {
return output;
} else {
throw new NullPointerException(String.format(Locale.US, "Cannot find the peptide %s from the protein %s.", peptide, proteinId));
}
}

public static Set<String> reduceProteinIdSet(Set<String> input) {
if (input.size() == 1) {
return input;
} else {
Map<String, Integer> tempMap = new HashMap<>();
for (String s : input) {
String[] tempArray = s.split("\\.");
if (tempMap.containsKey(tempArray[0])) {
if (tempMap.get(tempArray[0]) > Integer.valueOf(tempArray[1])) {
tempMap.put(tempArray[0], Integer.valueOf(tempArray[1]));
}
} else {
tempMap.put(tempArray[0], Integer.valueOf(tempArray[1]));
}
}
Set<String> output = new HashSet<>();
for (String s : tempMap.keySet()) {
output.add(s + "." + tempMap.get(s));
}
return output;
}
}
}
2 changes: 1 addition & 1 deletion src/test/java/proteomics/TheoSeq/DbToolTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public class DbToolTest {

@BeforeClass
public static void setUp() throws Exception {
db_tool_obj = new DbTool(Thread.currentThread().getContextClassLoader().getResource("test.fasta").getPath());
db_tool_obj = new DbTool(Thread.currentThread().getContextClassLoader().getResource("test.fasta").getPath(), "UniProt");
}

@Test
Expand Down

0 comments on commit db9a769

Please sign in to comment.