diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..efb484c --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,12 @@ +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +[target.aarch64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] +# Required for the `sophia` crate to build on macOS diff --git a/.gitignore b/.gitignore index db23feb..0f91028 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ js/LICENSE # R *.rds +.RData # Docs lib/docs/dist diff --git a/deny.toml b/deny.toml index 4b45a02..28206ee 100644 --- a/deny.toml +++ b/deny.toml @@ -28,7 +28,8 @@ ignore = [ # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html [licenses] -unlicensed = "deny" +# unlicensed = "deny" +unlicensed = "warn" # ring license is not detected # See https://spdx.org/licenses/ for list of possible licenses allow = [ #"MIT", diff --git a/js/src/api.rs b/js/src/api.rs index 468483f..f69833b 100644 --- a/js/src/api.rs +++ b/js/src/api.rs @@ -83,6 +83,17 @@ impl ConverterJs { }) } + /// Load `Converter` from extended prefix map JSON string or URL + #[wasm_bindgen(static_method_of = ConverterJs, js_name = fromExtendedPrefixMap)] + pub fn from_extended_prefix_map(prefix_map: String) -> Promise { + future_to_promise(async move { + match Converter::from_extended_prefix_map(&*prefix_map).await { + Ok(converter) => Ok(JsValue::from(ConverterJs { converter })), + Err(e) => Err(JsValue::from_str(&e.to_string())), + } + }) + } + /// Load `Converter` from JSON-LD string or URL #[wasm_bindgen(static_method_of = ConverterJs, js_name = fromJsonld)] pub fn from_jsonld(jsonld: String) -> Promise { @@ -94,11 +105,11 @@ impl ConverterJs { }) } - /// Load `Converter` from extended prefix map JSON string or URL - #[wasm_bindgen(static_method_of = ConverterJs, js_name = fromExtendedPrefixMap)] - pub fn from_extended_prefix_map(prefix_map: String) -> Promise { + /// Load `Converter` from JSON-LD string or URL + #[wasm_bindgen(static_method_of = ConverterJs, js_name = fromShacl)] + pub fn from_shacl(shacl: String) -> Promise { future_to_promise(async move { - match Converter::from_extended_prefix_map(&*prefix_map).await { + match Converter::from_shacl(&*shacl).await { Ok(converter) => Ok(JsValue::from(ConverterJs { converter })), Err(e) => Err(JsValue::from_str(&e.to_string())), } @@ -171,6 +182,72 @@ impl ConverterJs { Ok(JsValue::from(js_array)) } + /// Standardize prefix + #[wasm_bindgen(js_name = standardizePrefix)] + pub fn standardize_prefix(&self, prefix: String) -> Result { + self.converter + .standardize_prefix(&prefix) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + /// Standardize a CURIE + #[wasm_bindgen(js_name = standardizeCurie)] + pub fn standardize_curie(&self, curie: String) -> Result { + self.converter + .standardize_curie(&curie) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + /// Standardize a URI + #[wasm_bindgen(js_name = standardizeUri)] + pub fn standardize_uri(&self, uri: String) -> Result { + self.converter + .standardize_uri(&uri) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + #[wasm_bindgen(js_name = getPrefixes)] + pub fn get_prefixes(&self, include_synonyms: Option) -> Vec { + self.converter + .get_prefixes(include_synonyms.unwrap_or(false)) + } + + #[wasm_bindgen(js_name = getUriPrefixes)] + pub fn get_uri_prefixes(&self, include_synonyms: Option) -> Vec { + self.converter + .get_uri_prefixes(include_synonyms.unwrap_or(false)) + } + + /// Write the `Converter` as a simple prefix map JSON + #[wasm_bindgen(js_name = writePrefixMap)] + pub fn write_prefix_map(&self) -> String { + format!("{:?}", self.converter.write_prefix_map()) + } + + /// Write the `Converter` as a extended prefix map JSON + #[wasm_bindgen(js_name = writeExtendedPrefixMap)] + pub fn write_extended_prefix_map(&self) -> Result { + Ok((self + .converter + .write_extended_prefix_map() + .map_err(|e| JsValue::from_str(&e.to_string()))?) + .to_string()) + } + + /// Write the `Converter` prefix map as JSON-LD context + #[wasm_bindgen(js_name = writeJsonld)] + pub fn write_jsonld(&self) -> String { + format!("{}", self.converter.write_jsonld()) + } + + /// Write the `Converter` prefix map as SHACL prefixes definition + #[wasm_bindgen(js_name = writeShacl)] + pub fn write_shacl(&self) -> Result { + self.converter + .write_shacl() + .map_err(|e| JsValue::from_str(&e.to_string())) + } + #[wasm_bindgen(js_name = toString)] pub fn to_string(&self) -> String { self.converter.to_string() diff --git a/js/tests/curies.test.ts b/js/tests/curies.test.ts index ec95457..ad350dd 100644 --- a/js/tests/curies.test.ts +++ b/js/tests/curies.test.ts @@ -22,6 +22,14 @@ describe('Tests for the curies npm package', () => { "http://purl.obolibrary.org/obo/DOID_1234", "http://identifiers.org/DOID:1234" ])).toEqual(["OBO:1234", "DOID:1234", undefined]); + expect(converter.getPrefixes().length).toBe(2) + expect(converter.getUriPrefixes().length).toBe(2) + + console.log(converter.writeExtendedPrefixMap()) + expect(converter.writeExtendedPrefixMap().startsWith("[{")).toBe(true); + expect(converter.writeShacl().startsWith("PREFIX")).toBe(true); + expect(converter.writePrefixMap().length).toBeGreaterThan(10); + expect(converter.writeJsonld().length).toBeGreaterThan(10); }); test('from prefix map', async () => { @@ -56,6 +64,19 @@ describe('Tests for the curies npm package', () => { expect(converter.expand("DOID:1234")).toBe("http://purl.obolibrary.org/obo/DOID_1234"); }); + test('from SHACL', async () => { + const converter = await Converter.fromShacl(`@prefix sh: . + @prefix xsd: . + [ + sh:declare + [ sh:prefix "dc" ; sh:namespace "http://purl.org/dc/elements/1.1/"^^xsd:anyURI ], + [ sh:prefix "dcterms" ; sh:namespace "http://purl.org/dc/terms/"^^xsd:anyURI ], + [ sh:prefix "foaf" ; sh:namespace "http://xmlns.com/foaf/0.1/"^^xsd:anyURI ], + [ sh:prefix "xsd" ; sh:namespace "http://www.w3.org/2001/XMLSchema#"^^xsd:anyURI ] + ] .`); + expect(converter.expand("foaf:name")).toBe("http://xmlns.com/foaf/0.1/name"); + }); + test('from extended prefix map', async () => { const converter = await Converter.fromExtendedPrefixMap(`[ { @@ -99,6 +120,9 @@ describe('Tests for the curies npm package', () => { const converter = await getBioregistryConverter(); expect(converter.compress("http://purl.obolibrary.org/obo/DOID_1234")).toBe("doid:1234"); expect(converter.expand("doid:1234")).toBe("http://purl.obolibrary.org/obo/DOID_1234"); + expect(converter.standardizePrefix("gomf")).toBe("go"); + expect(converter.standardizeCurie("gomf:0032571")).toBe("go:0032571"); + expect(converter.standardizeUri("http://amigo.geneontology.org/amigo/term/GO:0032571")).toBe("http://purl.obolibrary.org/obo/GO_0032571"); }); test('get GO converter', async () => { diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 4c99416..cd4d89e 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -20,6 +20,7 @@ serde_json = "1.0" async-trait = "0.1" regex = "1.10" reqwest = { version = "0.12", default-features = false } +sophia = { version = "0.8.0" } [dev-dependencies] tokio = { version = "1.34", features = ["rt-multi-thread", "macros"] } diff --git a/lib/docs/docs/architecture.md b/lib/docs/docs/architecture.md index 64a1f6d..ce32740 100644 --- a/lib/docs/docs/architecture.md +++ b/lib/docs/docs/architecture.md @@ -2,9 +2,33 @@ This page presents the project architecture and some technical details. -### โœจ Features +## ๐Ÿ—ƒ๏ธ Folder structure -List of features availability per language binding, based on features defined in [curies.readthedocs.io](https://curies.readthedocs.io): +``` +curies.rs/ +โ”œโ”€โ”€ lib/ +โ”‚ โ”œโ”€โ”€ src/ +โ”‚ โ”‚ โ””โ”€โ”€ ๐Ÿฆ€ Source code for the core Rust crate +โ”‚ โ”œโ”€โ”€ tests/ +โ”‚ โ”‚ โ””โ”€โ”€ ๐Ÿงช Tests for the core Rust crate +โ”‚ โ””โ”€โ”€ docs/ +โ”‚ โ””โ”€โ”€ ๐Ÿ“– Markdown and HTML files for the documentation website +โ”œโ”€โ”€ python/ +โ”‚ โ””โ”€โ”€ ๐Ÿ Python bindings +โ”œโ”€โ”€ js/ +โ”‚ โ””โ”€โ”€ ๐ŸŸจ JavaScript bindings +โ”œโ”€โ”€ r/ +โ”‚ โ””โ”€โ”€ ๐Ÿ“ˆ R bindings +โ”œโ”€โ”€ scripts/ +โ”‚ โ””โ”€โ”€ ๐Ÿ› ๏ธ Development scripts (build, test, gen docs) +โ””โ”€โ”€ .github/ + โ””โ”€โ”€ workflows/ + โ””โ”€โ”€ โš™๏ธ Automated CI/CD workflows for testing and building releases +``` + +## โœจ Features + +List of features available per language binding, based on features defined in [curies.readthedocs.io](https://curies.readthedocs.io) | Feature | Rust (core) | Python | JS | R | | ------------------------------------------------ | ----------- | ------ | ---- | ---- | @@ -12,52 +36,27 @@ List of features availability per language binding, based on features defined in | expand | โœ… | โœ… | โœ… | โœ… | | compress_list | โœ… | โœ… | โœ… | | | expand_list | โœ… | โœ… | โœ… | | -| standardize (prefix, curie, uri) | | | | | +| standardize (prefix, curie, uri) | โœ… | โœ… | โœ… | | | chain converters | โœ… | โœ… | โœ… | | | Record object and converter.add_record() | โœ… | โœ… | โœ… | | -| converter.add_prefix(prefix, ns) | โœ… | | โœ… | | -| converter.get_prefixes() and .get_uri_prefixes() | โœ… | | | | +| converter.add_prefix(prefix, ns) | โœ… | โœ… | โœ… | | +| converter.get_prefixes() and .get_uri_prefixes() | โœ… | โœ… | โœ… | | | Load from prefix map | โœ… | โœ… | โœ… | | | Load from extended prefix map | โœ… | โœ… | โœ… | | -| Load from JSON-LD | โœ… | โœ… | โœ… | | -| Load from SHACL shape | | | | | +| Load from JSON-LD context | โœ… | โœ… | โœ… | | +| Load from SHACL prefix definition | โœ… | โœ… | โœ… | | | Load OBO converter | โœ… | โœ… | โœ… | | | Load GO converter | โœ… | โœ… | โœ… | | | Load Bioregistry converter | โœ… | โœ… | โœ… | โœ… | | Load Monarch converter | โœ… | โœ… | โœ… | | -| Write converter to prefix map | โœ… | | | | -| Write converter to extended prefix map | โœ… | | | | -| Write converter to JSON-LD | โœ… | | | | +| Write converter to prefix map | โœ… | โœ… | โœ… | | +| Write converter to extended prefix map | โœ… | โœ… | โœ… | | +| Write converter to JSON-LD | โœ… | โœ… | โœ… | | +| Write converter to SHACL | โœ… | โœ… | โœ… | | | Prefixes discovery | | | | | -โš ๏ธ Important differences between rust core and bindings: +## โš ๏ธโ€‹ Differences between rust core and language bindings -1. **Load prefix map**, extended prefix map and JSON-LD can take `HashMap` as input in rust. But for JS and python, we currently need to pass it as `String` (we need to figure out how to pass arbitrary objects). You can pass either a URL or a JSON object as string, the lib will automatically retrieve the content of the URL if it is one. The original python lib was taking directly JSON objects for all loaders, apart from SHACL which takes a URL (which was not convenient when wanting to provide a local SHACL file) +1. The **functions to Load** prefix map, extended prefix map and JSON-LD can take `HashMap` as input in rust. But for JS and python, we currently need to pass it as `String` (we need to figure out how to pass arbitrary objects). You can pass either a URL or a JSON object as string, the lib will automatically retrieve the content of the URL if it is one. The original python lib was taking directly JSON objects for all loaders, apart from SHACL which takes a URL (which was not convenient when wanting to provide a local SHACL file) 2. In rust **chain()** is a static function taking a list of converters, `chained = Converter::chain([conv1, conv2])`. In JS and python we cannot easily pass a list of complex objects like converters, so chain is a normal function that takes 1 converter to chain: `chained = conv1.chain(conv2)` -3. Currently **write** prefix map returns a HashMap, write extended map returns a JSON as string, and write JSON-LD returns `serde::json` type. In the original python lib it was writing to a file. - -### ๐Ÿ—ƒ๏ธ Folder structure - -``` -curies.rs/ -โ”œโ”€โ”€ lib/ -โ”‚ โ”œโ”€โ”€ src/ -โ”‚ โ”‚ โ””โ”€โ”€ ๐Ÿฆ€ Source code for the core Rust crate. -โ”‚ โ”œโ”€โ”€ tests/ -โ”‚ โ”‚ โ””โ”€โ”€ ๐Ÿงช Tests for the core Rust crate. -โ”‚ โ””โ”€โ”€ docs/ -โ”‚ โ””โ”€โ”€ ๐Ÿ“– Markdown and HTML files for the documentation website. -โ”œโ”€โ”€ python/ -โ”‚ โ””โ”€โ”€ ๐Ÿ Python bindings -โ”œโ”€โ”€ js/ -โ”‚ โ””โ”€โ”€ ๐ŸŸจ JavaScript bindings -โ”œโ”€โ”€ r/ -โ”‚ โ””โ”€โ”€ ๐Ÿ“ˆ R bindings -โ”œโ”€โ”€ scripts/ -โ”‚ โ””โ”€โ”€ ๐Ÿ› ๏ธ Development scripts (build, test, gen docs). -โ””โ”€โ”€ .github/ - โ””โ”€โ”€ workflows/ - โ””โ”€โ”€ โš™๏ธ Automated CI/CD workflows. -``` - -### +3. In the rust lib, currently the **functions to write** prefix map returns a HashMap, write extended map returns a JSON as string, and write JSON-LD returns `serde::json` type. The JS and python equivalent directly return a string. In the original python lib it was writing to a file. diff --git a/lib/docs/docs/contributing.md b/lib/docs/docs/contributing.md index 40f10be..0126edd 100644 --- a/lib/docs/docs/contributing.md +++ b/lib/docs/docs/contributing.md @@ -1,4 +1,4 @@ -# ๐Ÿ› ๏ธ Development +# ๐Ÿ› ๏ธ Contributing [![Build](https://github.com/biopragmatics/curies.rs/actions/workflows/build.yml/badge.svg)](https://github.com/biopragmatics/curies.rs/actions/workflows/build.yml) [![Lint and Test](https://github.com/biopragmatics/curies.rs/actions/workflows/test.yml/badge.svg)](https://github.com/biopragmatics/curies.rs/actions/workflows/test.yml) [![codecov](https://codecov.io/gh/biopragmatics/curies.rs/graph/badge.svg?token=BF15PSO6GN)](https://codecov.io/gh/biopragmatics/curies.rs) [![dependency status](https://deps.rs/repo/github/biopragmatics/curies.rs/status.svg)](https://deps.rs/repo/github/biopragmatics/curies.rs) @@ -28,8 +28,7 @@ cd curies.rs Install development dependencies: ```bash -rustup update -cargo install wasm-pack cargo-tarpaulin cargo-deny cargo-outdated +./scripts/install-dev.sh ``` > If you are using VSCode we strongly recommend to install the [`rust-lang.rust-analyzer`](https://marketplace.visualstudio.com/items?itemName=rust-lang.rust-analyzer) extension. diff --git a/lib/docs/docs/javascript.md b/lib/docs/docs/javascript.md index f1d5026..db513bf 100644 --- a/lib/docs/docs/javascript.md +++ b/lib/docs/docs/javascript.md @@ -59,6 +59,21 @@ async function main() { // Expand and compress list of CURIEs and URIs const curies = converter.compressList(["http://purl.obolibrary.org/obo/DOID_1234"]); const uris = converter.expandList(["doid:1234"]); + + // Standardize prefix, CURIEs, and URIs using the preferred alternative + console.log(converter.standardizePrefix("gomf")) + console.log(converter.standardizeCurie("gomf:0032571")) + console.log(converter.standardizeUri("http://amigo.geneontology.org/amigo/term/GO:0032571")) + + // Get the list of prefixes or URI prefixes, argument include_synonyms default to False + const prefixes_without_syn = converter.getPrefixes() + const uri_prefixes_with_syn = converter.getUriPrefixes(true) + + // Output the converter prefix map as a string in different serialization format + const epm = converter.writeExtendedPrefixMap() + const pm = converter.writePrefixMap() + const jsonld = converter.writeJsonld() + const shacl = converter.writeShacl() } main(); ``` diff --git a/lib/docs/docs/python.md b/lib/docs/docs/python.md index a219072..922ae2e 100644 --- a/lib/docs/docs/python.md +++ b/lib/docs/docs/python.md @@ -28,12 +28,21 @@ uri = converter.expand("DOID:1234") # Compress/expand a list curies = converter.compress_list(["http://purl.obolibrary.org/obo/DOID_1234"]) -uris = converter.expand_list(["DOID:1234"]) +uris = converter.expand_list(["DOID:1234", "doid:1235"]) + +# Standardize prefix, CURIEs, and URIs using the preferred alternative +assert converter.standardize_prefix("gomf") == "go" +assert converter.standardize_curie("gomf:0032571") == "go:0032571" +assert converter.standardize_uri("http://amigo.geneontology.org/amigo/term/GO:0032571") == "http://purl.obolibrary.org/obo/GO_0032571" + +# Get the list of prefixes or URI prefixes, argument include_synonyms default to False +prefixes_without_syn = converter.get_prefixes() +uri_prefixes_with_syn = converter.get_uri_prefixes(True) ``` -## ๐ŸŒ€ Converter initialization +## ๐ŸŒ€ Load a converter -There are many ways to initialize a CURIE/URI converter. +There are many ways to load a CURIE/URI converter. ### ๐Ÿ“ฆ Import a predefined converter @@ -71,24 +80,14 @@ from curies_rs import get_monarch_converter converter = get_monarch_converter() ``` -### ๐Ÿ“‚ Load converter from prefix map +### ๐Ÿ“‚ Load from file Converter can be loaded from a prefix map, an extended prefix map (which enables to provide more information for each prefix), or a JSON-LD context. -For each function you can either provide the string to the prefix map JSON, or the URL to it. +!!! tip "Support URL" -#### Load from prefix map + For each `Converter.from_` function you can either provide the file content, or the URL to the file as string. -```python -from curies_rs import Converter - -prefix_map = """{ - "GO": "http://purl.obolibrary.org/obo/GO_", - "DOID": "http://purl.obolibrary.org/obo/DOID_", - "OBO": "http://purl.obolibrary.org/obo/" -}""" -converter = Converter.from_prefix_map(prefix_map) -``` #### Load from extended prefix map @@ -127,6 +126,21 @@ extended_pm = """[ converter = Converter.from_extended_prefix_map(extended_pm) ``` +#### Load from prefix map + +A simple dictionary without synonyms information: + +```python +from curies_rs import Converter + +prefix_map = """{ + "GO": "http://purl.obolibrary.org/obo/GO_", + "DOID": "http://purl.obolibrary.org/obo/DOID_", + "OBO": "http://purl.obolibrary.org/obo/" +}""" +converter = Converter.from_prefix_map(prefix_map) +``` + #### Load from JSON-LD context ```python @@ -150,6 +164,23 @@ from curies_rs import Converter converter = Converter.from_jsonld("https://purl.obolibrary.org/meta/obo_context.jsonld") ``` +#### Load from SHACL prefixes definition + +```python +from curies_rs import Converter + +shacl = """@prefix sh: . +@prefix xsd: . +[ + sh:declare + [ sh:prefix "dc" ; sh:namespace "http://purl.org/dc/elements/1.1/"^^xsd:anyURI ], + [ sh:prefix "dcterms" ; sh:namespace "http://purl.org/dc/terms/"^^xsd:anyURI ], + [ sh:prefix "foaf" ; sh:namespace "http://xmlns.com/foaf/0.1/"^^xsd:anyURI ], + [ sh:prefix "xsd" ; sh:namespace "http://www.w3.org/2001/XMLSchema#"^^xsd:anyURI ] +] .""" +conv = Converter.from_shacl(shacl) +``` + ### ๐Ÿ› ๏ธ Build the converter programmatically Create an empty `Converter`, and populate it with `Record`: @@ -158,12 +189,11 @@ Create an empty `Converter`, and populate it with `Record`: from curies_rs import Converter, Record rec1 = Record("doid", "http://purl.obolibrary.org/obo/DOID_", ["DOID"], ["https://identifiers.org/doid/"]) -rec2 = Record("obo", "http://purl.obolibrary.org/obo/") print(rec1.dict()) converter = Converter() converter.add_record(rec1) -converter.add_record(rec2) +converter.add_prefix("obo", "http://purl.obolibrary.org/obo/") ``` ### โ›“๏ธ Chain converters @@ -180,3 +210,18 @@ converter = ( ) print(len(converter)) ``` + +## โœ’๏ธ Serialize a converter + +Output the converter prefix map as a string in different serialization format: + +```python +from curies_rs import get_bioregistry_converter + +converter = get_bioregistry_converter() + +epm = converter.write_extended_prefix_map() +pm = converter.write_prefix_map() +jsonld = converter.write_jsonld() +shacl = converter.write_shacl() +``` diff --git a/lib/docs/includes/abbreviations.md b/lib/docs/includes/abbreviations.md index f592f98..8e2779c 100644 --- a/lib/docs/includes/abbreviations.md +++ b/lib/docs/includes/abbreviations.md @@ -30,6 +30,7 @@ *[OWL]: Web Ontology Language *[XML]: Extensible Markup Language *[SPARQL]: SPARQL Protocol and RDF Query Language +*[SHACL]: Shapes Constraint Language *[Faiss]: Faiss is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting code for evaluation and parameter tuning. *[GGML]: GGML is a C library for machine learning (ML) - the "GG" refers to the initials of its originator (Georgi Gerganov). In addition to defining low-level machine learning primitives (like a tensor type), GGML defines a binary format for distributing large language models (LLMs). *[GGUF]: GPT-Generated Unified Format, successor to GGML, is a quantization method that allows users to use the CPU to run a LLM, but also offload some of its layers to the GPU for a speed up. diff --git a/lib/docs/mkdocs.yml b/lib/docs/mkdocs.yml index 1a510f9..843e27b 100644 --- a/lib/docs/mkdocs.yml +++ b/lib/docs/mkdocs.yml @@ -58,7 +58,7 @@ nav: - Use from Rust: rust.md - Data structures: struct.md - Architecture details: architecture.md - - Development: contributing.md + - Contributing: contributing.md - Python: - Use from Python: python.md - JavaScript: diff --git a/lib/src/api.rs b/lib/src/api.rs index 6b9e3d0..3aabd60 100644 --- a/lib/src/api.rs +++ b/lib/src/api.rs @@ -1,11 +1,26 @@ //! API for `Converter` and `Record` use crate::error::CuriesError; -use crate::fetch::{ExtendedPrefixMapSource, PrefixMapSource}; +use crate::fetch::{ExtendedPrefixMapSource, PrefixMapSource, ShaclSource}; use ptrie::Trie; use regex::Regex; use serde::{Deserialize, Serialize, Serializer}; use serde_json::{json, Value}; +use sophia::api::dataset::Dataset as _; +use sophia::api::graph::MutableGraph as _; +use sophia::api::ns::{xsd, Namespace}; +use sophia::api::prefix::Prefix; +use sophia::api::quad::Quad as _; +use sophia::api::serializer::{Stringifier as _, TripleSerializer as _}; +use sophia::api::source::QuadSource as _; +use sophia::api::term::matcher::Any; +use sophia::api::term::BnodeId; +use sophia::api::term::Term; +use sophia::inmem::dataset::LightDataset; +use sophia::inmem::graph::LightGraph; +use sophia::iri::Iri; +use sophia::turtle::parser::trig; +use sophia::turtle::serializer::turtle::{TurtleConfig, TurtleSerializer}; use std::collections::{HashMap, HashSet}; use std::fmt; use std::sync::Arc; @@ -136,7 +151,7 @@ impl Converter { /// let rt = runtime::Runtime::new().expect("Failed to create Tokio runtime"); /// let converter = rt.block_on(async { /// Converter::from_prefix_map(prefix_map).await - /// }).expect("Failed to create the GO converter"); + /// }).expect("Failed to create the converter"); /// /// let curie = converter.compress("http://purl.obolibrary.org/obo/DOID_1234").unwrap(); /// assert_eq!(curie, "DOID:1234"); @@ -162,8 +177,8 @@ impl Converter { /// Ok(()) /// } /// ``` - pub async fn from_jsonld(data: T) -> Result { - let prefix_map = data.fetch().await?; + pub async fn from_jsonld(jsonld: T) -> Result { + let prefix_map = jsonld.fetch().await?; let mut converter = Converter::default(); let context = match prefix_map.get("@context") { Some(Value::Object(map)) => map, @@ -189,7 +204,7 @@ impl Converter { /// /// # Arguments /// - /// * `data` - The extended prefix map data, as URL, string, file, or `Vec` + /// * `prefix_map` - The extended prefix map data, as URL, string, file, or `Vec` /// /// # Examples /// @@ -209,6 +224,57 @@ impl Converter { Ok(converter) } + /// Create a `Converter` from a SHACL shape prefixes definition + /// + /// # Arguments + /// + /// * `data` - The SHACL shapes data, as URL, string, file, or `Vec` + /// + /// # Examples + /// + /// ``` + /// use curies::{Converter, Record}; + /// use std::collections::HashMap; + /// use tokio::{runtime}; + /// + /// let rt = runtime::Runtime::new().expect("Failed to create Tokio runtime"); + /// let converter = rt.block_on(async { + /// Converter::from_shacl("https://raw.githubusercontent.com/biopragmatics/bioregistry/main/exports/contexts/semweb.context.ttl").await + /// }).expect("Failed to create the converter"); + /// + /// let uri = converter.expand("foaf:name").unwrap(); + /// assert_eq!(uri, "http://xmlns.com/foaf/0.1/name"); + /// ``` + pub async fn from_shacl(shacl: T) -> Result { + let rdf_str = shacl.fetch().await?; + let mut converter = Converter::default(); + // Parse the RDF string + let graph: LightDataset = trig::parse_str(&rdf_str) + .collect_quads() + .map_err(|e| CuriesError::InvalidFormat(format!("Error parsing TriG: {e}")))?; + let shacl_ns = Namespace::new("http://www.w3.org/ns/shacl#")?; + // Iterate over triples that match the SHACL prefix and namespace pattern + for q_prefix in graph.quads_matching(Any, [shacl_ns.get("prefix")?], Any, Any) { + for q_namespace in + graph.quads_matching([q_prefix?.s()], [shacl_ns.get("namespace")?], Any, Any) + { + converter.add_prefix( + q_prefix? + .o() + .lexical_form() + .ok_or(CuriesError::InvalidFormat("Term".to_string()))? + .as_ref(), + q_namespace? + .o() + .lexical_form() + .ok_or(CuriesError::InvalidFormat("Term".to_string()))? + .as_ref(), + )?; + } + } + Ok(converter) + } + /// Add a `Record` to the `Converter`. /// When adding a new record we create a reference to the `Record` (Arc) /// And we use this reference in the prefix and URI hashmaps @@ -279,7 +345,7 @@ impl Converter { Ok(serde_json::to_string(&self)?) } - /// Write the prefix map as a HashMap where keys are prefixes and values are URI prefixes. + /// Write the prefix map as a `HashMap` where keys are prefixes and values are URI prefixes. pub fn write_prefix_map(&self) -> HashMap { self.records .iter() @@ -287,7 +353,52 @@ impl Converter { .collect() } - /// Write the JSON-LD representation of the prefix map. + /// Write the `Converter` prefix map as SHACL prefixes definition in the Turtle format. + pub fn write_shacl(&self) -> Result { + let mut graph = LightGraph::new(); + let shacl_ns = Namespace::new("http://www.w3.org/ns/shacl#")?; + for (i, arc_record) in self.records.iter().enumerate() { + let record = Arc::clone(arc_record); + let subject = BnodeId::new_unchecked(format!("{}", i)); + graph.insert(&subject, shacl_ns.get("prefix")?, record.prefix.as_str())?; + graph.insert( + &subject, + shacl_ns.get("namespace")?, + record.uri_prefix.as_str() * xsd::anyURI, + )?; + } + let ttl_prefixes = [ + ( + Prefix::new_unchecked("xsd".to_string()), + Iri::new_unchecked("http://www.w3.org/2001/XMLSchema#".to_string()), + ), + ( + Prefix::new_unchecked("sh".to_string()), + Iri::new_unchecked("http://www.w3.org/ns/shacl#".to_string()), + ), + ]; + let ttl_config = TurtleConfig::new() + .with_pretty(true) + .with_prefix_map(&ttl_prefixes[..]); + let mut ttl_stringifier = TurtleSerializer::new_stringifier_with_config(ttl_config); + Ok(ttl_stringifier.serialize_graph(&graph)?.to_string()) + } + + /// Write the JSON-LD representation of the prefix map as serde JSON (can be cast to string easily) + /// + /// # Examples + /// + /// ``` + /// use curies::Converter; + /// + /// let mut converter = Converter::default(); + /// converter.add_prefix("doid", "http://purl.obolibrary.org/obo/DOID_").unwrap(); + /// + /// assert!(converter.write_jsonld()["@context"] + /// .to_string() + /// .starts_with('{')); + /// println!("{:?}", converter.write_jsonld()); + /// ``` pub fn write_jsonld(&self) -> serde_json::Value { let mut context = json!({}); for record in &self.records { @@ -497,6 +608,84 @@ impl Converter { .collect() } + /// Get the standard prefix for a given prefix + /// + /// # Examples + /// + /// ```rust + /// use curies::sources::get_bioregistry_converter; + /// use tokio::runtime; + /// + /// let rt = runtime::Runtime::new().expect("Failed to create Tokio runtime"); + /// let converter = rt.block_on(async { + /// get_bioregistry_converter().await + /// }).expect("Failed to create the converter"); + /// assert_eq!(converter.standardize_prefix("gomf").unwrap(), "go"); + /// ``` + pub fn standardize_prefix(&self, prefix: &str) -> Result { + Ok(self.find_by_prefix(prefix)?.prefix.to_string()) + } + + /// Standardize a CURIE + /// + /// # Examples + /// + /// ```rust + /// use curies::sources::get_bioregistry_converter; + /// use tokio::runtime; + /// + /// let rt = runtime::Runtime::new().expect("Failed to create Tokio runtime"); + /// let converter = rt.block_on(async { + /// get_bioregistry_converter().await + /// }).expect("Failed to create the converter"); + /// assert_eq!(converter.standardize_curie("gomf:0032571").unwrap(), "go:0032571"); + /// ``` + pub fn standardize_curie(&self, curie: &str) -> Result { + let parts: Vec<&str> = curie.split(':').collect(); + if parts.len() == 2 { + Ok(format!( + "{}:{}", + self.standardize_prefix(parts[0])?, + parts[1] + )) + } else { + Ok(curie.to_string()) + } + } + + /// Standardize a URI + /// + /// # Examples + /// + /// ```rust + /// use curies::sources::get_bioregistry_converter; + /// use tokio::runtime; + /// + /// let rt = runtime::Runtime::new().expect("Failed to create Tokio runtime"); + /// let converter = rt.block_on(async { + /// get_bioregistry_converter().await + /// }).expect("Failed to create the converter"); + /// assert_eq!( + /// converter.standardize_uri("http://amigo.geneontology.org/amigo/term/GO:0032571").unwrap(), + /// "http://purl.obolibrary.org/obo/GO_0032571", + /// ); + /// ``` + pub fn standardize_uri(&self, uri: &str) -> Result { + let rec = self.find_by_uri(uri)?; + if uri.starts_with(&rec.uri_prefix) { + Ok(uri.to_string()) + } else { + let (_new_prefix, id) = rec + .uri_prefix_synonyms + .iter() + .filter(|synonym| uri.starts_with(&**synonym)) + .max_by_key(|synonym| synonym.len()) // Get longest first + .and_then(|synonym| uri.strip_prefix(synonym).map(|id| (synonym, id))) + .ok_or_else(|| CuriesError::NotFound(uri.to_string()))?; + Ok(format!("{}{}", rec.uri_prefix, id)) + } + } + /// Returns the number of `Records` in the `Converter` pub fn len(&self) -> usize { self.records.len() diff --git a/lib/src/error.rs b/lib/src/error.rs index 4c2463c..8e23052 100644 --- a/lib/src/error.rs +++ b/lib/src/error.rs @@ -5,6 +5,9 @@ use std::fmt; use std::str::Utf8Error; use serde::Deserialize; +use sophia::api::source::StreamError; +use sophia::inmem::index::TermIndexFullError; +use sophia::iri::InvalidIri; /// Enum of errors returned by this library #[derive(Debug, Deserialize)] @@ -30,6 +33,7 @@ impl fmt::Display for CuriesError { } CuriesError::InvalidCurie(ref msg) => write!(f, "Invalid CURIE: {}", msg), CuriesError::InvalidFormat(ref msg) => write!(f, "Invalid format: {}", msg), + // CuriesError::InvalidTerm() => write!(f, "Invalid RDF term"), CuriesError::Utf8(ref msg) => write!(f, "Error decoding UTF-8: {}", msg), CuriesError::SerdeJson(ref msg) => write!(f, "Error parsing JSON: {}", msg), CuriesError::Reqwest(ref msg) => write!(f, "Error sending request: {}", msg), @@ -59,3 +63,18 @@ impl From for CuriesError { CuriesError::StdIo(err.to_string()) } } +impl From for CuriesError { + fn from(err: InvalidIri) -> Self { + CuriesError::InvalidFormat(err.to_string()) + } +} +impl From for CuriesError { + fn from(err: TermIndexFullError) -> Self { + CuriesError::InvalidFormat(err.to_string()) + } +} +impl From> for CuriesError { + fn from(err: StreamError) -> Self { + CuriesError::InvalidFormat(format!("RDF Trig serialization error: {err}")) + } +} diff --git a/lib/src/fetch.rs b/lib/src/fetch.rs index eb03e63..cf0db2a 100644 --- a/lib/src/fetch.rs +++ b/lib/src/fetch.rs @@ -17,7 +17,7 @@ pub trait PrefixMapSource: Send + Sync { #[async_trait(?Send)] impl PrefixMapSource for &str { async fn fetch(self) -> Result, CuriesError> { - Ok(serde_json::from_str(&fetch_url(self).await?)?) + Ok(serde_json::from_str(&fetch_if_url(self).await?)?) } } #[async_trait(?Send)] @@ -56,7 +56,7 @@ impl ExtendedPrefixMapSource for Vec { #[async_trait(?Send)] impl ExtendedPrefixMapSource for &str { async fn fetch(self) -> Result, CuriesError> { - Ok(serde_json::from_str(&fetch_url(self).await?)?) + Ok(serde_json::from_str(&fetch_if_url(self).await?)?) } } #[async_trait(?Send)] @@ -66,8 +66,26 @@ impl ExtendedPrefixMapSource for &Path { } } +/// Trait to provide the SHACL prefix map as URL, string, or Path to file +#[async_trait(?Send)] +pub trait ShaclSource: Send + Sync { + async fn fetch(self) -> Result; +} +#[async_trait(?Send)] +impl ShaclSource for &str { + async fn fetch(self) -> Result { + fetch_if_url(self).await + } +} +#[async_trait(?Send)] +impl ShaclSource for &Path { + async fn fetch(self) -> Result { + fetch_file(self).await + } +} + /// Given a string, fetch data as string if it is a URL, otherwise return the string -async fn fetch_url(url: &str) -> Result { +async fn fetch_if_url(url: &str) -> Result { if url.starts_with("https://") || url.starts_with("http://") || url.starts_with("ftp://") { // Get URL content with HTTP request let client = reqwest::Client::new(); diff --git a/lib/src/sources.rs b/lib/src/sources.rs index 2ae154a..771fb81 100644 --- a/lib/src/sources.rs +++ b/lib/src/sources.rs @@ -17,7 +17,7 @@ use crate::{error::CuriesError, Converter}; /// let rt = runtime::Runtime::new().expect("Failed to create Tokio runtime"); /// let converter = rt.block_on(async { /// get_obo_converter().await -/// }).expect("Failed to create the OBO converter"); +/// }).expect("Failed to create the converter"); /// /// let uri = converter.expand("DOID:1234").unwrap(); /// assert_eq!(uri, "http://purl.obolibrary.org/obo/DOID_1234"); @@ -57,7 +57,7 @@ pub async fn get_obo_converter() -> Result { /// let rt = runtime::Runtime::new().expect("Failed to create Tokio runtime"); /// let converter = rt.block_on(async { /// get_monarch_converter().await -/// }).expect("Failed to create the GO converter"); +/// }).expect("Failed to create the converter"); /// /// let uri = converter.expand("CHEBI:24867").unwrap(); /// assert_eq!(uri, "http://purl.obolibrary.org/obo/CHEBI_24867"); @@ -92,7 +92,7 @@ pub async fn get_monarch_converter() -> Result { /// let rt = runtime::Runtime::new().expect("Failed to create Tokio runtime"); /// let converter = rt.block_on(async { /// get_go_converter().await -/// }).expect("Failed to create the GO converter"); +/// }).expect("Failed to create the converter"); /// /// let uri = converter.expand("NCBIGene:100010").unwrap(); /// assert_eq!(uri, "http://identifiers.org/ncbigene/100010"); @@ -121,7 +121,7 @@ pub async fn get_go_converter() -> Result { /// let rt = runtime::Runtime::new().expect("Failed to create Tokio runtime"); /// let converter = rt.block_on(async { /// get_bioregistry_converter().await -/// }).expect("Failed to create the GO converter"); +/// }).expect("Failed to create the converter"); /// /// let uri = converter.expand("NCBIGene:100010").unwrap(); /// assert_eq!(uri, "https://www.ncbi.nlm.nih.gov/gene/100010"); diff --git a/lib/tests/curies_test.rs b/lib/tests/curies_test.rs index 5166d9e..aded2fc 100644 --- a/lib/tests/curies_test.rs +++ b/lib/tests/curies_test.rs @@ -42,9 +42,11 @@ fn new_empty_converter() -> Result<(), Box> { assert!(converter.write_prefix_map().len() == 3); assert!(converter.write_jsonld()["@context"] .to_string() - .starts_with("{")); - println!("{:?}", converter.write_jsonld()); + .starts_with('{')); + assert!(converter.write_shacl()?.starts_with("PREFIX")); // println!("{:?}", converter.write_extended_prefix_map()); + // println!("{:?}", converter.write_jsonld()); + // println!("{:?}", converter.write_shacl()); // Find Record by prefix or URI assert_eq!(converter.find_by_prefix("doid")?.prefix, "doid"); diff --git a/python/Cargo.toml b/python/Cargo.toml index 7c80d3f..1e43068 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -19,6 +19,6 @@ crate-type = ["cdylib"] curies.workspace = true serde.workspace = true pyo3 = { version = "0.21", features = ["extension-module"] } -# pyo3-asyncio = { version = "0.20", features = ["tokio-runtime"] } pythonize = "0.21" tokio = { version = "1.34", features = ["rt-multi-thread"] } +# pyo3-asyncio = { version = "0.20", features = ["tokio-runtime"] } diff --git a/python/requirements.txt b/python/requirements.txt index cfa066c..56bbcc9 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,4 +1,4 @@ -maturin[patchelf] +maturin pre-commit pytest mktestdocs diff --git a/python/src/api.rs b/python/src/api.rs index 4fce577..276c514 100644 --- a/python/src/api.rs +++ b/python/src/api.rs @@ -87,6 +87,31 @@ impl ConverterPy { }) } + // #[staticmethod] + // #[pyo3(text_signature = "(data)")] + // fn from_extended_prefix_map(py: Python, data: &PyAny) -> PyResult { + // let rt = Runtime::new().map_err(|e| PyErr::new::(format!("Failed to create Tokio runtime: {e}")))?; + // rt.block_on(async move { + // let converter_result = if let Ok(s) = data.downcast::() { + // // Handle the case where the input is a string + // Converter::from_extended_prefix_map(s.to_str()?) + // .await + // .map_err(|e| PyErr::new::(format!("Error processing string input: {e}"))) + // } else if let Ok(dict) = data.downcast::() { + // // Handle the case where the input is a dictionary + // // let json_str = dict_to_json(py, dict)?; + // let hashmap = dict; + // Converter::from_extended_prefix_map(&hashmap) + // .await + // .map_err(|e| PyErr::new::(format!("Error processing dictionary input: {e}"))) + // } else { + // // Return an error if neither type matches + // Err(PyErr::new::("Expected a string or a dictionary")) + // }; + // converter_result.map(|converter| Self { converter }) + // }) + // } + /// Load a `Converter` from a prefix map JSON string or URL #[staticmethod] #[pyo3(text_signature = "(data)")] @@ -117,6 +142,21 @@ impl ConverterPy { }) } + /// Load a `Converter` from a SHACL prefix definition string or URL + #[staticmethod] + #[pyo3(text_signature = "(data)")] + fn from_shacl(data: &str) -> PyResult { + let rt = Runtime::new().map_err(|e| { + PyErr::new::(format!("Failed to create Tokio runtime: {e}")) + })?; + rt.block_on(async move { + Converter::from_shacl(data) + .await + .map(|converter| Self { converter }) + .map_err(|e| PyErr::new::(e.to_string())) + }) + } + /// Add a record to the `Converter` #[pyo3(text_signature = "($self, record)")] fn add_record(&mut self, record: RecordPy) -> PyResult<()> { @@ -125,6 +165,14 @@ impl ConverterPy { .map_err(|e| PyErr::new::(e.to_string())) } + /// Add a prefix/namespace to the `Converter` + #[pyo3(text_signature = "($self, prefix, namespace)")] + fn add_prefix(&mut self, prefix: String, namespace: String) -> PyResult<()> { + self.converter + .add_prefix(&prefix, &namespace) + .map_err(|e| PyErr::new::(e.to_string())) + } + /// Compress a URI #[pyo3(text_signature = "($self, uri)")] fn compress(&self, uri: String) -> PyResult { @@ -155,6 +203,42 @@ impl ConverterPy { .compress_list(uris.iter().map(|s| s.as_str()).collect()) } + /// Standardize prefix + #[pyo3(text_signature = "($self, prefix)")] + fn standardize_prefix(&self, prefix: String) -> PyResult { + self.converter + .standardize_prefix(&prefix) + .map_err(|e| PyErr::new::(e.to_string())) + } + + /// Standardize a CURIE + #[pyo3(text_signature = "($self, curie)")] + fn standardize_curie(&self, curie: String) -> PyResult { + self.converter + .standardize_curie(&curie) + .map_err(|e| PyErr::new::(e.to_string())) + } + + /// Standardize a URI + #[pyo3(text_signature = "($self, uri)")] + fn standardize_uri(&self, uri: String) -> PyResult { + self.converter + .standardize_uri(&uri) + .map_err(|e| PyErr::new::(e.to_string())) + } + + #[pyo3(text_signature = "($self, include_synonyms)")] + fn get_prefixes(&self, include_synonyms: Option) -> Vec { + self.converter + .get_prefixes(include_synonyms.unwrap_or(false)) + } + + #[pyo3(text_signature = "($self, include_synonyms)")] + fn get_uri_prefixes(&self, include_synonyms: Option) -> Vec { + self.converter + .get_uri_prefixes(include_synonyms.unwrap_or(false)) + } + /// Chain with another `Converter` #[pyo3(text_signature = "($self, converter)")] fn chain(&self, converter: &ConverterPy) -> PyResult { @@ -163,6 +247,35 @@ impl ConverterPy { .map_err(|e| PyErr::new::(e.to_string())) } + /// Write the `Converter` as a simple prefix map JSON + #[pyo3(text_signature = "($self)")] + fn write_prefix_map(&self) -> String { + format!("{:?}", self.converter.write_prefix_map()) + } + + /// Write the `Converter` as a extended prefix map JSON + #[pyo3(text_signature = "($self)")] + fn write_extended_prefix_map(&self) -> PyResult { + Ok((self + .converter + .write_extended_prefix_map() + .map_err(|e| PyErr::new::(e.to_string()))?) + .to_string()) + } + + /// Write the `Converter` prefix map as JSON-LD context + #[pyo3(text_signature = "($self)")] + fn write_jsonld(&self) -> String { + format!("{}", self.converter.write_jsonld()) + } + + #[pyo3(text_signature = "($self)")] + fn write_shacl(&self) -> PyResult { + self.converter + .write_shacl() + .map_err(|e| PyErr::new::(e.to_string())) + } + // NOTE: could there be a way to pass a list of converters? // #[staticmethod] // #[pyo3(text_signature = "(converters)")] @@ -192,8 +305,8 @@ pub fn get_obo_converter() -> PyResult { } #[pyfunction] -pub fn get_bioregistry_converter(py: Python<'_>) -> PyResult { - // TODO: https://pyo3.rs/v0.21.1/ecosystem/async-await +pub fn get_bioregistry_converter() -> PyResult { + // TODO: https://pyo3.rs/v0.21.1/ecosystem/async-await py: Python<'_> let rt = Runtime::new().map_err(|e| { PyErr::new::(format!("Failed to create Tokio runtime: {e}")) })?; diff --git a/python/src/lib.rs b/python/src/lib.rs index 419ef15..ad2a4e3 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -6,7 +6,7 @@ use pyo3::wrap_pyfunction; /// Python bindings #[pymodule] -fn curies_rs(_py: Python<'_>, m: &PyModule) -> PyResult<()> { +fn curies_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("__package__", "curies-rs")?; m.add("__version__", env!("CARGO_PKG_VERSION"))?; m.add("__author__", env!("CARGO_PKG_AUTHORS").replace(':', "\n"))?; @@ -16,5 +16,6 @@ fn curies_rs(_py: Python<'_>, m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(get_obo_converter))?; m.add_wrapped(wrap_pyfunction!(get_bioregistry_converter))?; m.add_wrapped(wrap_pyfunction!(get_monarch_converter))?; - m.add_wrapped(wrap_pyfunction!(get_go_converter)) + m.add_wrapped(wrap_pyfunction!(get_go_converter))?; + Ok(()) } diff --git a/python/tests/test_api.py b/python/tests/test_api.py index 44dab1a..7dbc2c3 100644 --- a/python/tests/test_api.py +++ b/python/tests/test_api.py @@ -1,117 +1,179 @@ -import unittest - -from curies_rs import Record, Converter, get_obo_converter, get_bioregistry_converter, get_monarch_converter, get_go_converter - - -class TestAPI(unittest.TestCase): - """Test the API.""" - - def test_converter(self): - """Test the converter: create, add record, compress, expand, chain converters.""" - rec1 = Record("doid", "http://purl.obolibrary.org/obo/DOID_") - - converter = Converter() - converter.add_record(rec1) - - self.assertEqual(converter.compress("http://purl.obolibrary.org/obo/DOID_1234"), "doid:1234") - self.assertEqual(converter.expand("doid:1234"), "http://purl.obolibrary.org/obo/DOID_1234") - - self.assertEqual(converter.expand_list(["doid:1234"]), ["http://purl.obolibrary.org/obo/DOID_1234"]) - self.assertEqual(converter.compress_list(["http://purl.obolibrary.org/obo/DOID_1234"]), ["doid:1234"]) - - # Test chain - rec2 = Record("obo", "http://purl.obolibrary.org/obo/", [], []) - converter2 = Converter() - converter2.add_record(rec2) - - merged = converter.chain(converter2) - self.assertEqual(merged.expand("doid:1234"), "http://purl.obolibrary.org/obo/DOID_1234") - self.assertEqual(merged.expand("obo:1234"), "http://purl.obolibrary.org/obo/1234") - - - def test_from_prefix_map(self): - """Test creating the converter from prefix map.""" - prefix_map = """{ +from curies_rs import ( + Converter, + Record, + get_bioregistry_converter, + get_go_converter, + get_monarch_converter, + get_obo_converter, +) + + +def test_converter(): + """Test the converter: create, add record, compress, expand, chain converters.""" + rec1 = Record("doid", "http://purl.obolibrary.org/obo/DOID_") + + converter = Converter() + converter.add_record(rec1) + assert converter.compress("http://purl.obolibrary.org/obo/DOID_1234") == "doid:1234" + assert converter.expand("doid:1234") == "http://purl.obolibrary.org/obo/DOID_1234" + assert converter.expand_list(["doid:1234"]) == ["http://purl.obolibrary.org/obo/DOID_1234"] + assert converter.compress_list(["http://purl.obolibrary.org/obo/DOID_1234"]) == ["doid:1234"] + + # Test chain + converter2 = Converter() + converter2.add_prefix("obo", "http://purl.obolibrary.org/obo/") + + merged = converter.chain(converter2) + assert merged.expand("doid:1234") == "http://purl.obolibrary.org/obo/DOID_1234" + assert merged.expand("obo:1234") == "http://purl.obolibrary.org/obo/1234" + assert len(merged.get_prefixes()) == 2 + assert len(merged.get_uri_prefixes()) == 2 + + assert merged.write_extended_prefix_map().startswith("[{") + assert merged.write_shacl().startswith("PREFIX") + assert len(merged.write_prefix_map()) > 10 # TODO: these checks could be improved + assert len(merged.write_jsonld()) > 10 + # print(merged.write_extended_prefix_map()) + # print(merged.write_prefix_map()) + # print(merged.write_jsonld()) + + +def test_from_prefix_map(): + """Test creating the converter from prefix map.""" + prefix_map = """{ + "GO": "http://purl.obolibrary.org/obo/GO_", + "DOID": "http://purl.obolibrary.org/obo/DOID_", + "OBO": "http://purl.obolibrary.org/obo/" + }""" + conv = Converter.from_prefix_map(prefix_map) + assert conv.expand("DOID:1234") == "http://purl.obolibrary.org/obo/DOID_1234" + assert conv.compress("http://purl.obolibrary.org/obo/DOID_1234") == "DOID:1234" + + +def test_from_extended_prefix_map(): + """Test creating the converter from extended prefix map.""" + extended_pm = """[ + { + "prefix": "DOID", + "prefix_synonyms": [ + "doid" + ], + "uri_prefix": "http://purl.obolibrary.org/obo/DOID_", + "uri_prefix_synonyms": [ + "http://bioregistry.io/DOID:" + ], + "pattern": "^\\\\d+$" + }, + { + "prefix": "GO", + "prefix_synonyms": [ + "go" + ], + "uri_prefix": "http://purl.obolibrary.org/obo/GO_", + "pattern": "^\\\\d{7}$" + }, + { + "prefix": "OBO", + "prefix_synonyms": [ + "obo" + ], + "uri_prefix": "http://purl.obolibrary.org/obo/" + }]""" + conv = Converter.from_extended_prefix_map(extended_pm) + assert conv.expand("doid:1234") == "http://purl.obolibrary.org/obo/DOID_1234" + assert conv.compress("http://purl.obolibrary.org/obo/DOID_1234") == "DOID:1234" + + +# def test_from_extended_prefix_map_dict(): +# """Test creating the converter from extended prefix map.""" +# extended_pm = [ +# { +# "prefix": "DOID", +# "prefix_synonyms": [ +# "doid" +# ], +# "uri_prefix": "http://purl.obolibrary.org/obo/DOID_", +# "uri_prefix_synonyms": [ +# "http://bioregistry.io/DOID:" +# ], +# "pattern": "^\\\\d+$" +# }, +# { +# "prefix": "GO", +# "prefix_synonyms": [ +# "go" +# ], +# "uri_prefix": "http://purl.obolibrary.org/obo/GO_", +# "pattern": "^\\\\d{7}$" +# }, +# { +# "prefix": "OBO", +# "prefix_synonyms": [ +# "obo" +# ], +# "uri_prefix": "http://purl.obolibrary.org/obo/" +# }] +# conv = Converter.from_extended_prefix_map(extended_pm) +# assert conv.expand("doid:1234"), "http://purl.obolibrary.org/obo/DOID_1234" +# assert conv.compress("http://purl.obolibrary.org/obo/DOID_1234"), "DOID:1234" + + +def test_from_jsonld(): + """Test creating the converter from JSON-LD context.""" + jsonld = """{ + "@context": { "GO": "http://purl.obolibrary.org/obo/GO_", "DOID": "http://purl.obolibrary.org/obo/DOID_", "OBO": "http://purl.obolibrary.org/obo/" - }""" - conv = Converter.from_prefix_map(prefix_map) - self.assertEqual(conv.expand("DOID:1234"), "http://purl.obolibrary.org/obo/DOID_1234") - self.assertEqual(conv.compress("http://purl.obolibrary.org/obo/DOID_1234"), "DOID:1234") - - - def test_from_extended_prefix_map(self): - """Test creating the converter from extended prefix map.""" - extended_pm = """[ - { - "prefix": "DOID", - "prefix_synonyms": [ - "doid" - ], - "uri_prefix": "http://purl.obolibrary.org/obo/DOID_", - "uri_prefix_synonyms": [ - "http://bioregistry.io/DOID:" - ], - "pattern": "^\\\\d+$" - }, - { - "prefix": "GO", - "prefix_synonyms": [ - "go" - ], - "uri_prefix": "http://purl.obolibrary.org/obo/GO_", - "pattern": "^\\\\d{7}$" - }, - { - "prefix": "OBO", - "prefix_synonyms": [ - "obo" - ], - "uri_prefix": "http://purl.obolibrary.org/obo/" - }]""" - conv = Converter.from_extended_prefix_map(extended_pm) - self.assertEqual(conv.expand("doid:1234"), "http://purl.obolibrary.org/obo/DOID_1234") - self.assertEqual(conv.compress("http://purl.obolibrary.org/obo/DOID_1234"), "DOID:1234") - - - def test_from_jsonld(self): - """Test creating the converter from JSON-LD context.""" - jsonld = """{ - "@context": { - "GO": "http://purl.obolibrary.org/obo/GO_", - "DOID": "http://purl.obolibrary.org/obo/DOID_", - "OBO": "http://purl.obolibrary.org/obo/" - } - }""" - conv = Converter.from_jsonld(jsonld) - self.assertEqual(conv.expand("DOID:1234"), "http://purl.obolibrary.org/obo/DOID_1234") - self.assertEqual(conv.compress("http://purl.obolibrary.org/obo/DOID_1234"), "DOID:1234") - - - def test_predefined_converters(self): - """Test the predefined converters.""" - obo = get_obo_converter() - self.assertEqual(obo.expand("DOID:1234"), "http://purl.obolibrary.org/obo/DOID_1234") - self.assertEqual(obo.compress("http://purl.obolibrary.org/obo/DOID_1234"), "DOID:1234") - - bioregistry = get_bioregistry_converter() - self.assertEqual(bioregistry.expand("doid:1234"), "http://purl.obolibrary.org/obo/DOID_1234") - self.assertEqual(bioregistry.compress("http://purl.obolibrary.org/obo/DOID_1234"), "doid:1234") - - go = get_go_converter() - self.assertEqual(go.expand("NCBIGene:100010"), "http://identifiers.org/ncbigene/100010") - self.assertEqual(go.compress("http://identifiers.org/ncbigene/100010"), "NCBIGene:100010") - - monarch = get_monarch_converter() - self.assertEqual(monarch.expand("CHEBI:24867"), "http://purl.obolibrary.org/obo/CHEBI_24867") - self.assertEqual(monarch.compress("http://purl.obolibrary.org/obo/CHEBI_24867"), "CHEBI:24867") - - def test_chain(self): - converter = ( - get_obo_converter() - .chain(get_go_converter()) - .chain(get_monarch_converter()) - ) - self.assertEqual(converter.expand("CHEBI:24867"), "http://purl.obolibrary.org/obo/CHEBI_24867") - print(len(converter)) + } + }""" + conv = Converter.from_jsonld(jsonld) + assert conv.expand("DOID:1234") == "http://purl.obolibrary.org/obo/DOID_1234" + assert conv.compress("http://purl.obolibrary.org/obo/DOID_1234") == "DOID:1234" + +def test_from_shacl(): + """Test creating the converter from SHACL prefix definition.""" + shacl = """@prefix sh: . +@prefix xsd: . +[ + sh:declare + [ sh:prefix "dc" ; sh:namespace "http://purl.org/dc/elements/1.1/"^^xsd:anyURI ], + [ sh:prefix "dcterms" ; sh:namespace "http://purl.org/dc/terms/"^^xsd:anyURI ], + [ sh:prefix "foaf" ; sh:namespace "http://xmlns.com/foaf/0.1/"^^xsd:anyURI ], + [ sh:prefix "xsd" ; sh:namespace "http://www.w3.org/2001/XMLSchema#"^^xsd:anyURI ] +] .""" + conv = Converter.from_shacl(shacl) + assert conv.expand("foaf:name") == "http://xmlns.com/foaf/0.1/name" + + +def test_predefined_converters(): + """Test the predefined converters.""" + obo = get_obo_converter() + assert obo.expand("DOID:1234") == "http://purl.obolibrary.org/obo/DOID_1234" + assert obo.compress("http://purl.obolibrary.org/obo/DOID_1234") == "DOID:1234" + + go = get_go_converter() + assert go.expand("NCBIGene:100010") == "http://identifiers.org/ncbigene/100010" + assert go.compress("http://identifiers.org/ncbigene/100010") == "NCBIGene:100010" + + monarch = get_monarch_converter() + assert monarch.expand("CHEBI:24867") == "http://purl.obolibrary.org/obo/CHEBI_24867" + assert monarch.compress("http://purl.obolibrary.org/obo/CHEBI_24867") == "CHEBI:24867" + + bioregistry = get_bioregistry_converter() + assert bioregistry.expand("doid:1234") == "http://purl.obolibrary.org/obo/DOID_1234" + assert bioregistry.compress("http://purl.obolibrary.org/obo/DOID_1234") == "doid:1234" + + assert bioregistry.standardize_prefix("gomf") == "go" + assert bioregistry.standardize_curie("gomf:0032571") == "go:0032571" + assert bioregistry.standardize_uri("http://amigo.geneontology.org/amigo/term/GO:0032571") == "http://purl.obolibrary.org/obo/GO_0032571" + + +def test_chain(): + converter = ( + get_obo_converter() + .chain(get_go_converter()) + .chain(get_monarch_converter()) + ) + assert converter.expand("CHEBI:24867") == "http://purl.obolibrary.org/obo/CHEBI_24867" + # print(len(converter)) diff --git a/scripts/install-dev.sh b/scripts/install-dev.sh index 817716c..f008495 100755 --- a/scripts/install-dev.sh +++ b/scripts/install-dev.sh @@ -8,9 +8,17 @@ source .venv/bin/activate pip install -r python/requirements.txt pip install -r lib/docs/requirements.txt +if [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then + echo "Installing Linux specific dependency" + maturin[patchelf] +fi + +# On MacOS you might need to setup the default CRAN mirror +# echo 'options(repos=c(CRAN="https://cran.r-project.org"))' >> ~/.Rprofile + rustup update rustup toolchain install nightly # For tarpaulin -cargo install wasm-pack cargo-tarpaulin cargo-make +cargo install wasm-pack cargo-tarpaulin cargo-deny cargo-outdated pre-commit install diff --git a/scripts/test-python.sh b/scripts/test-python.sh index cae1a11..310aa39 100755 --- a/scripts/test-python.sh +++ b/scripts/test-python.sh @@ -11,11 +11,11 @@ for arg in "$@"; do done if [ ! -d ".venv" ]; then - echo ".venv virtual environment does not exist. Creating it" + echo "๐ŸŒ€ .venv virtual environment does not exist. Creating it" python -m venv .venv fi -echo "Activating virtual environment" +echo "โš™๏ธ Activating virtual environment" source .venv/bin/activate pip install -q -r python/requirements.txt diff --git a/scripts/test-r.sh b/scripts/test-r.sh index efd2e52..05e25a4 100755 --- a/scripts/test-r.sh +++ b/scripts/test-r.sh @@ -13,13 +13,8 @@ for arg in "$@"; do done if [ "$INSTALL_DEPS" = true ]; then - Rscript -e 'install.packages("usethis"); install.packages("devtools"); install.packages("testthat"); install.packages("rextendr"); rextendr::document("./r"); library(testthat); test_dir("r/tests");' - # NOTE: the packages installed in separate Rscript commands are not available in the next command, so we need to install them all in one command - # Rscript -e 'install.packages("usethis")' - # Rscript -e 'install.packages("devtools")' - # Rscript -e 'install.packages("testthat")' - # Rscript -e 'install.packages("rextendr")' - # Rscript -e 'remotes::install_github("extendr/rextendr")' -else - Rscript -e 'rextendr::document("./r"); library(testthat); test_dir("r/tests");' + Rscript -e 'required_packages <- c("usethis", "devtools", "testthat", "rextendr"); install.packages(required_packages, repos="http://cran.r-project.org")' + # Rscript --save -e 'required_packages <- c("usethis", "devtools", "testthat", "rextendr"); install.packages(required_packages, repos="http://cran.r-project.org", dependencies=TRUE)' fi + +Rscript -e 'rextendr::document("./r"); library(testthat); test_dir("r/tests");'