Skip to content

Commit

Permalink
Merge pull request #1049 from shawntanzk/doi-wikiversioned-extractor
Browse files Browse the repository at this point in the history
Add DOI and WikipediaVersioned Extractor
  • Loading branch information
matthewhorridge committed May 23, 2022
2 parents c3f7602 + 4fbe1d0 commit 0aab1be
Show file tree
Hide file tree
Showing 13 changed files with 516 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.protege.editor.owl.ui.renderer;

import java.util.regex.Pattern;

/**
* A RegEx based link extractor for the 'DOI:' prefix.
* 25 Mar 2022
*/
public class DOILinkExtractor {

public static final Pattern DOI_ID_PATTERN = Pattern.compile("DOI:\\s*([^\\s]+)", Pattern.CASE_INSENSITIVE);

public static final String DOI_URL_BASE = "https://doi.org/";

public static final String replacementString = DOI_URL_BASE + "$1";

public static RegExBasedLinkExtractor createExtractor() {
return new RegExBasedLinkExtractor("DOI", DOI_ID_PATTERN, replacementString);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.protege.editor.owl.ui.renderer;

import java.util.regex.Pattern;

/**
* A RegEx based link extractor for the 'OMIM:' prefix.
* 25 Mar 2022
*/
public class OMIMLinkExtractor {

public static final Pattern OMIM_ID_PATTERN = Pattern.compile("OMIM:\\s*(\\d+)", Pattern.CASE_INSENSITIVE);

public static final String OMIM_URL_BASE = "https://omim.org/entry/";

public static final String replacementString = OMIM_URL_BASE + "$1";

public static RegExBasedLinkExtractor createExtractor() {
return new RegExBasedLinkExtractor("OMIM", OMIM_ID_PATTERN, replacementString);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.protege.editor.owl.ui.renderer;

import java.util.regex.Pattern;

/**
* A RegEx based link extractor for the 'OMIMPS:' prefix.
* 25 Mar 2022
*/
public class OMIMPSLinkExtractor {

public static final Pattern OMIMPS_ID_PATTERN = Pattern.compile("OMIMPS:\\s*(\\d+)", Pattern.CASE_INSENSITIVE);

public static final String OMIMPS_URL_BASE = "https://www.omim.org/phenotypicSeries/";

public static final String replacementString = OMIMPS_URL_BASE + "$1";

public static RegExBasedLinkExtractor createExtractor() {
return new RegExBasedLinkExtractor("OMIMPS", OMIMPS_ID_PATTERN, replacementString);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.protege.editor.owl.ui.renderer;

import java.util.regex.Pattern;

/**
* A RegEx based link extractor for the 'ORCID:' prefix.
* 25 Mar 2022
*/
public class ORCIDLinkExtractor {

public static final Pattern ORCID_ID_PATTERN = Pattern.compile("ORCID:\\s*([^\\s]+)", Pattern.CASE_INSENSITIVE);

public static final String ORCID_URL_BASE = "https://orcid.org/";

public static final String replacementString = ORCID_URL_BASE + "$1";

public static RegExBasedLinkExtractor createExtractor() {
return new RegExBasedLinkExtractor("ORCID", ORCID_ID_PATTERN, replacementString);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,14 @@ public class OWLAnnotationCellRenderer2 extends PageCellRenderer {

private final List<LinkExtractor> linkExtractors = Arrays.asList(
PubMedLinkExtractor.createExtractor(),
OrphanetLinkExtractor.createExtractor(),
OMIMLinkExtractor.createExtractor(),
OMIMPSLinkExtractor.createExtractor(),
ISBN10LinkExtractor.createExtractor(),
WikipediaLinkExtractor.createExtractor(),
WikipediaVersionedLinkExtractor.createExtractor(),
DOILinkExtractor.createExtractor(),
ORCIDLinkExtractor.createExtractor(),
IdentifiersDotOrgLinkExtractor.createExtractor(),
OboFoundryLinkExtractor.createLinkExtractor());

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.protege.editor.owl.ui.renderer;

import java.util.regex.Pattern;

/**
* A RegEx based link extractor for the 'Orphanet:' prefix.
* 25 Mar 2022
*/
public class OrphanetLinkExtractor {

public static final Pattern ORPHANET_ID_PATTERN = Pattern.compile("Orphanet:\\s*(\\d+)", Pattern.CASE_INSENSITIVE);

public static final String ORPHANET_URL_BASE = "https://www.orpha.net/consor/www/cgi-bin/OC_Exp.php?Expert=";

public static final String replacementString = ORPHANET_URL_BASE + "$1";

public static RegExBasedLinkExtractor createExtractor() {
return new RegExBasedLinkExtractor("Orphanet", ORPHANET_ID_PATTERN, replacementString);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.protege.editor.owl.ui.renderer;

import java.util.regex.Pattern;

/**
* A RegEx based link extractor for the 'WikipediaVersioned:' prefix.
* 25 Mar 2022
*/
public class WikipediaVersionedLinkExtractor {

private static final Pattern WIKI_VERSIONED_PATTERN = Pattern.compile("WikipediaVersioned:([^\\s]+)", Pattern.CASE_INSENSITIVE);

public static final String WIKI_VERSIONED_URL_BASE = "https://wikipedia.org/wiki/index.php?title=";

public static final String WIKI_VERSIONED_REPLACEMENT = WIKI_VERSIONED_URL_BASE + "$1";

public static RegExBasedLinkExtractor createExtractor() {
return new RegExBasedLinkExtractor("WikipediaVersioned", WIKI_VERSIONED_PATTERN, WIKI_VERSIONED_REPLACEMENT);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package org.protege.editor.owl.ui.renderer;

import org.junit.Before;
import org.junit.Test;

import java.util.Optional;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.Is.is;

/**
* Unit tests for the DOI link extraction.
* 25 Mar 2022
*/
public class DOILinkExtractor_TestCase {

private RegExBasedLinkExtractor extractor;

@Before
public void setUp() {
extractor = DOILinkExtractor.createExtractor();
}

@Test
public void shouldExtractId() {
String id = "DOI:10.1101/2021.10.10.463703";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(DOILinkExtractor.DOI_URL_BASE + "10.1101/2021.10.10.463703")));
}

@Test
public void shouldIgnoreCase_AllLowerCase() {
String id = "doi:10.1101/2021.10.10.463703";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(DOILinkExtractor.DOI_URL_BASE + "10.1101/2021.10.10.463703")));
}

@Test
public void shouldIgnoreCase_MixedCase() {
String id = "DoI:10.1101/2021.10.10.463703";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(DOILinkExtractor.DOI_URL_BASE + "10.1101/2021.10.10.463703")));
}

@Test
public void shouldNotExtractIdInCaseOfMissingPrefix() {
String id = "10.1101/2021.10.10.463703";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId.isPresent(), is(false));
}

@Test
public void shouldAllowWhiteSpaceAfterPrefix() {
String id = "doi: 10.1101/2021.10.10.463703";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(DOILinkExtractor.DOI_URL_BASE + "10.1101/2021.10.10.463703")));
}

@Test
public void shouldNotExtractIdInCaseOfWhiteSpace() {
String id = "doi:123 456";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId.isPresent(), is(false));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package org.protege.editor.owl.ui.renderer;

import org.junit.Before;
import org.junit.Test;

import java.util.Optional;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.Is.is;

/**
* Unit tests for the OMIM link extraction.
* 25 Mar 2022
*/
public class OMIMLinkExtractor_TestCase {

private RegExBasedLinkExtractor extractor;

@Before
public void setUp() {
extractor = OMIMLinkExtractor.createExtractor();
}

@Test
public void shouldExtractId() {
String id = "OMIM:300376";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(OMIMLinkExtractor.OMIM_URL_BASE + "300376")));
}

@Test
public void shouldIgnoreCase_AllLowerCase() {
String id = "omim:300376";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(OMIMLinkExtractor.OMIM_URL_BASE + "300376")));
}

@Test
public void shouldIgnoreCase_MixedCase() {
String id = "OMim:300376";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(OMIMLinkExtractor.OMIM_URL_BASE + "300376")));
}

@Test
public void shouldNotExtractIdInCaseOfMissingPrefix() {
String id = "300376";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId.isPresent(), is(false));
}

@Test
public void shouldAllowWhiteSpaceAfterPrefix() {
String id = "OMIM: 300376";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(OMIMLinkExtractor.OMIM_URL_BASE + "300376")));
}

@Test
public void shouldNotExtractIdInCaseOfWhiteSpace() {
String id = "OMIM:300 376";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId.isPresent(), is(false));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package org.protege.editor.owl.ui.renderer;

import org.junit.Before;
import org.junit.Test;

import java.util.Optional;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.Is.is;

/**
* Unit tests for the OMIMPS link extraction.
* 25 Mar 2022
*/
public class OMIMPSLinkExtractor_TestCase {

private RegExBasedLinkExtractor extractor;

@Before
public void setUp() {
extractor = OMIMPSLinkExtractor.createExtractor();
}

@Test
public void shouldExtractId() {
String id = "OMIMPS:236100";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(OMIMPSLinkExtractor.OMIMPS_URL_BASE + "236100")));
}

@Test
public void shouldIgnoreCase_AllLowerCase() {
String id = "omimps:236100";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(OMIMPSLinkExtractor.OMIMPS_URL_BASE + "236100")));
}

@Test
public void shouldIgnoreCase_MixedCase() {
String id = "OmimPS:236100";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(OMIMPSLinkExtractor.OMIMPS_URL_BASE + "236100")));
}

@Test
public void shouldNotExtractIdInCaseOfMissingPrefix() {
String id = "236100";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId.isPresent(), is(false));
}

@Test
public void shouldAllowWhiteSpaceAfterPrefix() {
String id = "OMIMPS: 236100";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId, is(Optional.of(OMIMPSLinkExtractor.OMIMPS_URL_BASE + "236100")));
}

@Test
public void shouldNotExtractIdInCaseOfWhiteSpace() {
String id = "OMIMPS:236 100";
Optional<String> extractedId = extractor.extractLinkLiteral(id);
assertThat(extractedId.isPresent(), is(false));
}
}
Loading

0 comments on commit 0aab1be

Please sign in to comment.