-
Notifications
You must be signed in to change notification settings - Fork 231
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1049 from shawntanzk/doi-wikiversioned-extractor
Add DOI and WikipediaVersioned Extractor
- Loading branch information
Showing
13 changed files
with
516 additions
and
0 deletions.
There are no files selected for viewing
20 changes: 20 additions & 0 deletions
20
protege-editor-owl/src/main/java/org/protege/editor/owl/ui/renderer/DOILinkExtractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package org.protege.editor.owl.ui.renderer; | ||
|
||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* A RegEx based link extractor for the 'DOI:' prefix. | ||
* 25 Mar 2022 | ||
*/ | ||
public class DOILinkExtractor { | ||
|
||
public static final Pattern DOI_ID_PATTERN = Pattern.compile("DOI:\\s*([^\\s]+)", Pattern.CASE_INSENSITIVE); | ||
|
||
public static final String DOI_URL_BASE = "https://doi.org/"; | ||
|
||
public static final String replacementString = DOI_URL_BASE + "$1"; | ||
|
||
public static RegExBasedLinkExtractor createExtractor() { | ||
return new RegExBasedLinkExtractor("DOI", DOI_ID_PATTERN, replacementString); | ||
} | ||
} |
20 changes: 20 additions & 0 deletions
20
protege-editor-owl/src/main/java/org/protege/editor/owl/ui/renderer/OMIMLinkExtractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package org.protege.editor.owl.ui.renderer; | ||
|
||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* A RegEx based link extractor for the 'OMIM:' prefix. | ||
* 25 Mar 2022 | ||
*/ | ||
public class OMIMLinkExtractor { | ||
|
||
public static final Pattern OMIM_ID_PATTERN = Pattern.compile("OMIM:\\s*(\\d+)", Pattern.CASE_INSENSITIVE); | ||
|
||
public static final String OMIM_URL_BASE = "https://omim.org/entry/"; | ||
|
||
public static final String replacementString = OMIM_URL_BASE + "$1"; | ||
|
||
public static RegExBasedLinkExtractor createExtractor() { | ||
return new RegExBasedLinkExtractor("OMIM", OMIM_ID_PATTERN, replacementString); | ||
} | ||
} |
20 changes: 20 additions & 0 deletions
20
protege-editor-owl/src/main/java/org/protege/editor/owl/ui/renderer/OMIMPSLinkExtractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package org.protege.editor.owl.ui.renderer; | ||
|
||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* A RegEx based link extractor for the 'OMIMPS:' prefix. | ||
* 25 Mar 2022 | ||
*/ | ||
public class OMIMPSLinkExtractor { | ||
|
||
public static final Pattern OMIMPS_ID_PATTERN = Pattern.compile("OMIMPS:\\s*(\\d+)", Pattern.CASE_INSENSITIVE); | ||
|
||
public static final String OMIMPS_URL_BASE = "https://www.omim.org/phenotypicSeries/"; | ||
|
||
public static final String replacementString = OMIMPS_URL_BASE + "$1"; | ||
|
||
public static RegExBasedLinkExtractor createExtractor() { | ||
return new RegExBasedLinkExtractor("OMIMPS", OMIMPS_ID_PATTERN, replacementString); | ||
} | ||
} |
20 changes: 20 additions & 0 deletions
20
protege-editor-owl/src/main/java/org/protege/editor/owl/ui/renderer/ORCIDLinkExtractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package org.protege.editor.owl.ui.renderer; | ||
|
||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* A RegEx based link extractor for the 'ORCID:' prefix. | ||
* 25 Mar 2022 | ||
*/ | ||
public class ORCIDLinkExtractor { | ||
|
||
public static final Pattern ORCID_ID_PATTERN = Pattern.compile("ORCID:\\s*([^\\s]+)", Pattern.CASE_INSENSITIVE); | ||
|
||
public static final String ORCID_URL_BASE = "https://orcid.org/"; | ||
|
||
public static final String replacementString = ORCID_URL_BASE + "$1"; | ||
|
||
public static RegExBasedLinkExtractor createExtractor() { | ||
return new RegExBasedLinkExtractor("ORCID", ORCID_ID_PATTERN, replacementString); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
20 changes: 20 additions & 0 deletions
20
...ge-editor-owl/src/main/java/org/protege/editor/owl/ui/renderer/OrphanetLinkExtractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package org.protege.editor.owl.ui.renderer; | ||
|
||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* A RegEx based link extractor for the 'Orphanet:' prefix. | ||
* 25 Mar 2022 | ||
*/ | ||
public class OrphanetLinkExtractor { | ||
|
||
public static final Pattern ORPHANET_ID_PATTERN = Pattern.compile("Orphanet:\\s*(\\d+)", Pattern.CASE_INSENSITIVE); | ||
|
||
public static final String ORPHANET_URL_BASE = "https://www.orpha.net/consor/www/cgi-bin/OC_Exp.php?Expert="; | ||
|
||
public static final String replacementString = ORPHANET_URL_BASE + "$1"; | ||
|
||
public static RegExBasedLinkExtractor createExtractor() { | ||
return new RegExBasedLinkExtractor("Orphanet", ORPHANET_ID_PATTERN, replacementString); | ||
} | ||
} |
20 changes: 20 additions & 0 deletions
20
...owl/src/main/java/org/protege/editor/owl/ui/renderer/WikipediaVersionedLinkExtractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package org.protege.editor.owl.ui.renderer; | ||
|
||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* A RegEx based link extractor for the 'WikipediaVersioned:' prefix. | ||
* 25 Mar 2022 | ||
*/ | ||
public class WikipediaVersionedLinkExtractor { | ||
|
||
private static final Pattern WIKI_VERSIONED_PATTERN = Pattern.compile("WikipediaVersioned:([^\\s]+)", Pattern.CASE_INSENSITIVE); | ||
|
||
public static final String WIKI_VERSIONED_URL_BASE = "https://wikipedia.org/wiki/index.php?title="; | ||
|
||
public static final String WIKI_VERSIONED_REPLACEMENT = WIKI_VERSIONED_URL_BASE + "$1"; | ||
|
||
public static RegExBasedLinkExtractor createExtractor() { | ||
return new RegExBasedLinkExtractor("WikipediaVersioned", WIKI_VERSIONED_PATTERN, WIKI_VERSIONED_REPLACEMENT); | ||
} | ||
} |
65 changes: 65 additions & 0 deletions
65
...ditor-owl/src/test/java/org/protege/editor/owl/ui/renderer/DOILinkExtractor_TestCase.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package org.protege.editor.owl.ui.renderer; | ||
|
||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
import java.util.Optional; | ||
|
||
import static org.hamcrest.MatcherAssert.assertThat; | ||
import static org.hamcrest.core.Is.is; | ||
|
||
/** | ||
* Unit tests for the DOI link extraction. | ||
* 25 Mar 2022 | ||
*/ | ||
public class DOILinkExtractor_TestCase { | ||
|
||
private RegExBasedLinkExtractor extractor; | ||
|
||
@Before | ||
public void setUp() { | ||
extractor = DOILinkExtractor.createExtractor(); | ||
} | ||
|
||
@Test | ||
public void shouldExtractId() { | ||
String id = "DOI:10.1101/2021.10.10.463703"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(DOILinkExtractor.DOI_URL_BASE + "10.1101/2021.10.10.463703"))); | ||
} | ||
|
||
@Test | ||
public void shouldIgnoreCase_AllLowerCase() { | ||
String id = "doi:10.1101/2021.10.10.463703"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(DOILinkExtractor.DOI_URL_BASE + "10.1101/2021.10.10.463703"))); | ||
} | ||
|
||
@Test | ||
public void shouldIgnoreCase_MixedCase() { | ||
String id = "DoI:10.1101/2021.10.10.463703"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(DOILinkExtractor.DOI_URL_BASE + "10.1101/2021.10.10.463703"))); | ||
} | ||
|
||
@Test | ||
public void shouldNotExtractIdInCaseOfMissingPrefix() { | ||
String id = "10.1101/2021.10.10.463703"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId.isPresent(), is(false)); | ||
} | ||
|
||
@Test | ||
public void shouldAllowWhiteSpaceAfterPrefix() { | ||
String id = "doi: 10.1101/2021.10.10.463703"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(DOILinkExtractor.DOI_URL_BASE + "10.1101/2021.10.10.463703"))); | ||
} | ||
|
||
@Test | ||
public void shouldNotExtractIdInCaseOfWhiteSpace() { | ||
String id = "doi:123 456"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId.isPresent(), is(false)); | ||
} | ||
} |
65 changes: 65 additions & 0 deletions
65
...itor-owl/src/test/java/org/protege/editor/owl/ui/renderer/OMIMLinkExtractor_TestCase.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package org.protege.editor.owl.ui.renderer; | ||
|
||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
import java.util.Optional; | ||
|
||
import static org.hamcrest.MatcherAssert.assertThat; | ||
import static org.hamcrest.core.Is.is; | ||
|
||
/** | ||
* Unit tests for the OMIM link extraction. | ||
* 25 Mar 2022 | ||
*/ | ||
public class OMIMLinkExtractor_TestCase { | ||
|
||
private RegExBasedLinkExtractor extractor; | ||
|
||
@Before | ||
public void setUp() { | ||
extractor = OMIMLinkExtractor.createExtractor(); | ||
} | ||
|
||
@Test | ||
public void shouldExtractId() { | ||
String id = "OMIM:300376"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(OMIMLinkExtractor.OMIM_URL_BASE + "300376"))); | ||
} | ||
|
||
@Test | ||
public void shouldIgnoreCase_AllLowerCase() { | ||
String id = "omim:300376"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(OMIMLinkExtractor.OMIM_URL_BASE + "300376"))); | ||
} | ||
|
||
@Test | ||
public void shouldIgnoreCase_MixedCase() { | ||
String id = "OMim:300376"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(OMIMLinkExtractor.OMIM_URL_BASE + "300376"))); | ||
} | ||
|
||
@Test | ||
public void shouldNotExtractIdInCaseOfMissingPrefix() { | ||
String id = "300376"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId.isPresent(), is(false)); | ||
} | ||
|
||
@Test | ||
public void shouldAllowWhiteSpaceAfterPrefix() { | ||
String id = "OMIM: 300376"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(OMIMLinkExtractor.OMIM_URL_BASE + "300376"))); | ||
} | ||
|
||
@Test | ||
public void shouldNotExtractIdInCaseOfWhiteSpace() { | ||
String id = "OMIM:300 376"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId.isPresent(), is(false)); | ||
} | ||
} |
65 changes: 65 additions & 0 deletions
65
...or-owl/src/test/java/org/protege/editor/owl/ui/renderer/OMIMPSLinkExtractor_TestCase.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package org.protege.editor.owl.ui.renderer; | ||
|
||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
import java.util.Optional; | ||
|
||
import static org.hamcrest.MatcherAssert.assertThat; | ||
import static org.hamcrest.core.Is.is; | ||
|
||
/** | ||
* Unit tests for the OMIMPS link extraction. | ||
* 25 Mar 2022 | ||
*/ | ||
public class OMIMPSLinkExtractor_TestCase { | ||
|
||
private RegExBasedLinkExtractor extractor; | ||
|
||
@Before | ||
public void setUp() { | ||
extractor = OMIMPSLinkExtractor.createExtractor(); | ||
} | ||
|
||
@Test | ||
public void shouldExtractId() { | ||
String id = "OMIMPS:236100"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(OMIMPSLinkExtractor.OMIMPS_URL_BASE + "236100"))); | ||
} | ||
|
||
@Test | ||
public void shouldIgnoreCase_AllLowerCase() { | ||
String id = "omimps:236100"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(OMIMPSLinkExtractor.OMIMPS_URL_BASE + "236100"))); | ||
} | ||
|
||
@Test | ||
public void shouldIgnoreCase_MixedCase() { | ||
String id = "OmimPS:236100"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(OMIMPSLinkExtractor.OMIMPS_URL_BASE + "236100"))); | ||
} | ||
|
||
@Test | ||
public void shouldNotExtractIdInCaseOfMissingPrefix() { | ||
String id = "236100"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId.isPresent(), is(false)); | ||
} | ||
|
||
@Test | ||
public void shouldAllowWhiteSpaceAfterPrefix() { | ||
String id = "OMIMPS: 236100"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId, is(Optional.of(OMIMPSLinkExtractor.OMIMPS_URL_BASE + "236100"))); | ||
} | ||
|
||
@Test | ||
public void shouldNotExtractIdInCaseOfWhiteSpace() { | ||
String id = "OMIMPS:236 100"; | ||
Optional<String> extractedId = extractor.extractLinkLiteral(id); | ||
assertThat(extractedId.isPresent(), is(false)); | ||
} | ||
} |
Oops, something went wrong.