Skip to content

Commit

Permalink
feat: Add functions to standardize prefix, CURIE, and URI. Add `Conve…
Browse files Browse the repository at this point in the history
…rter::from_shacl()` loader using the sophia library for RDF manipulation. All functions added to the rust library are now also available in python and JS
  • Loading branch information
vemonet committed Apr 15, 2024
1 parent d5a1694 commit ae67b97
Show file tree
Hide file tree
Showing 25 changed files with 796 additions and 214 deletions.
12 changes: 12 additions & 0 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[target.x86_64-apple-darwin]
rustflags = [
"-C", "link-arg=-undefined",
"-C", "link-arg=dynamic_lookup",
]

[target.aarch64-apple-darwin]
rustflags = [
"-C", "link-arg=-undefined",
"-C", "link-arg=dynamic_lookup",
]
# Required for the `sophia` crate to build on macOS
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ js/LICENSE

# R
*.rds
.RData

# Docs
lib/docs/dist
Expand Down
3 changes: 2 additions & 1 deletion deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ ignore = [

# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
[licenses]
unlicensed = "deny"
# unlicensed = "deny"
unlicensed = "warn" # ring license is not detected
# See https://spdx.org/licenses/ for list of possible licenses
allow = [
#"MIT",
Expand Down
85 changes: 81 additions & 4 deletions js/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,17 @@ impl ConverterJs {
})
}

/// Load `Converter` from extended prefix map JSON string or URL
#[wasm_bindgen(static_method_of = ConverterJs, js_name = fromExtendedPrefixMap)]
pub fn from_extended_prefix_map(prefix_map: String) -> Promise {
future_to_promise(async move {
match Converter::from_extended_prefix_map(&*prefix_map).await {
Ok(converter) => Ok(JsValue::from(ConverterJs { converter })),
Err(e) => Err(JsValue::from_str(&e.to_string())),
}
})
}

/// Load `Converter` from JSON-LD string or URL
#[wasm_bindgen(static_method_of = ConverterJs, js_name = fromJsonld)]
pub fn from_jsonld(jsonld: String) -> Promise {
Expand All @@ -94,11 +105,11 @@ impl ConverterJs {
})
}

/// Load `Converter` from extended prefix map JSON string or URL
#[wasm_bindgen(static_method_of = ConverterJs, js_name = fromExtendedPrefixMap)]
pub fn from_extended_prefix_map(prefix_map: String) -> Promise {
/// Load `Converter` from JSON-LD string or URL
#[wasm_bindgen(static_method_of = ConverterJs, js_name = fromShacl)]
pub fn from_shacl(shacl: String) -> Promise {
future_to_promise(async move {
match Converter::from_extended_prefix_map(&*prefix_map).await {
match Converter::from_shacl(&*shacl).await {
Ok(converter) => Ok(JsValue::from(ConverterJs { converter })),
Err(e) => Err(JsValue::from_str(&e.to_string())),
}
Expand Down Expand Up @@ -171,6 +182,72 @@ impl ConverterJs {
Ok(JsValue::from(js_array))
}

/// Standardize prefix
#[wasm_bindgen(js_name = standardizePrefix)]
pub fn standardize_prefix(&self, prefix: String) -> Result<String, JsValue> {
self.converter
.standardize_prefix(&prefix)
.map_err(|e| JsValue::from_str(&e.to_string()))
}

/// Standardize a CURIE
#[wasm_bindgen(js_name = standardizeCurie)]
pub fn standardize_curie(&self, curie: String) -> Result<String, JsValue> {
self.converter
.standardize_curie(&curie)
.map_err(|e| JsValue::from_str(&e.to_string()))
}

/// Standardize a URI
#[wasm_bindgen(js_name = standardizeUri)]
pub fn standardize_uri(&self, uri: String) -> Result<String, JsValue> {
self.converter
.standardize_uri(&uri)
.map_err(|e| JsValue::from_str(&e.to_string()))
}

#[wasm_bindgen(js_name = getPrefixes)]
pub fn get_prefixes(&self, include_synonyms: Option<bool>) -> Vec<String> {
self.converter
.get_prefixes(include_synonyms.unwrap_or(false))
}

#[wasm_bindgen(js_name = getUriPrefixes)]
pub fn get_uri_prefixes(&self, include_synonyms: Option<bool>) -> Vec<String> {
self.converter
.get_uri_prefixes(include_synonyms.unwrap_or(false))
}

/// Write the `Converter` as a simple prefix map JSON
#[wasm_bindgen(js_name = writePrefixMap)]
pub fn write_prefix_map(&self) -> String {
format!("{:?}", self.converter.write_prefix_map())
}

/// Write the `Converter` as a extended prefix map JSON
#[wasm_bindgen(js_name = writeExtendedPrefixMap)]
pub fn write_extended_prefix_map(&self) -> Result<String, JsValue> {
Ok((self
.converter
.write_extended_prefix_map()
.map_err(|e| JsValue::from_str(&e.to_string()))?)
.to_string())
}

/// Write the `Converter` prefix map as JSON-LD context
#[wasm_bindgen(js_name = writeJsonld)]
pub fn write_jsonld(&self) -> String {
format!("{}", self.converter.write_jsonld())
}

/// Write the `Converter` prefix map as SHACL prefixes definition
#[wasm_bindgen(js_name = writeShacl)]
pub fn write_shacl(&self) -> Result<String, JsValue> {
self.converter
.write_shacl()
.map_err(|e| JsValue::from_str(&e.to_string()))
}

#[wasm_bindgen(js_name = toString)]
pub fn to_string(&self) -> String {
self.converter.to_string()
Expand Down
24 changes: 24 additions & 0 deletions js/tests/curies.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ describe('Tests for the curies npm package', () => {
"http://purl.obolibrary.org/obo/DOID_1234",
"http://identifiers.org/DOID:1234"
])).toEqual(["OBO:1234", "DOID:1234", undefined]);
expect(converter.getPrefixes().length).toBe(2)
expect(converter.getUriPrefixes().length).toBe(2)

console.log(converter.writeExtendedPrefixMap())
expect(converter.writeExtendedPrefixMap().startsWith("[{")).toBe(true);
expect(converter.writeShacl().startsWith("PREFIX")).toBe(true);
expect(converter.writePrefixMap().length).toBeGreaterThan(10);
expect(converter.writeJsonld().length).toBeGreaterThan(10);
});

test('from prefix map', async () => {
Expand Down Expand Up @@ -56,6 +64,19 @@ describe('Tests for the curies npm package', () => {
expect(converter.expand("DOID:1234")).toBe("http://purl.obolibrary.org/obo/DOID_1234");
});

test('from SHACL', async () => {
const converter = await Converter.fromShacl(`@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
[
sh:declare
[ sh:prefix "dc" ; sh:namespace "http://purl.org/dc/elements/1.1/"^^xsd:anyURI ],
[ sh:prefix "dcterms" ; sh:namespace "http://purl.org/dc/terms/"^^xsd:anyURI ],
[ sh:prefix "foaf" ; sh:namespace "http://xmlns.com/foaf/0.1/"^^xsd:anyURI ],
[ sh:prefix "xsd" ; sh:namespace "http://www.w3.org/2001/XMLSchema#"^^xsd:anyURI ]
] .`);
expect(converter.expand("foaf:name")).toBe("http://xmlns.com/foaf/0.1/name");
});

test('from extended prefix map', async () => {
const converter = await Converter.fromExtendedPrefixMap(`[
{
Expand Down Expand Up @@ -99,6 +120,9 @@ describe('Tests for the curies npm package', () => {
const converter = await getBioregistryConverter();
expect(converter.compress("http://purl.obolibrary.org/obo/DOID_1234")).toBe("doid:1234");
expect(converter.expand("doid:1234")).toBe("http://purl.obolibrary.org/obo/DOID_1234");
expect(converter.standardizePrefix("gomf")).toBe("go");
expect(converter.standardizeCurie("gomf:0032571")).toBe("go:0032571");
expect(converter.standardizeUri("http://amigo.geneontology.org/amigo/term/GO:0032571")).toBe("http://purl.obolibrary.org/obo/GO_0032571");
});

test('get GO converter', async () => {
Expand Down
1 change: 1 addition & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ serde_json = "1.0"
async-trait = "0.1"
regex = "1.10"
reqwest = { version = "0.12", default-features = false }
sophia = { version = "0.8.0" }

[dev-dependencies]
tokio = { version = "1.34", features = ["rt-multi-thread", "macros"] }
Expand Down
77 changes: 38 additions & 39 deletions lib/docs/docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,62 +2,61 @@

This page presents the project architecture and some technical details.

### ✨ Features
## 🗃️ Folder structure

List of features availability per language binding, based on features defined in [curies.readthedocs.io](https://curies.readthedocs.io):
```
curies.rs/
├── lib/
│ ├── src/
│ │ └── 🦀 Source code for the core Rust crate
│ ├── tests/
│ │ └── 🧪 Tests for the core Rust crate
│ └── docs/
│ └── 📖 Markdown and HTML files for the documentation website
├── python/
│ └── 🐍 Python bindings
├── js/
│ └── 🟨 JavaScript bindings
├── r/
│ └── 📈 R bindings
├── scripts/
│ └── 🛠️ Development scripts (build, test, gen docs)
└── .github/
└── workflows/
└── ⚙️ Automated CI/CD workflows for testing and building releases
```

## ✨ Features

List of features available per language binding, based on features defined in [curies.readthedocs.io](https://curies.readthedocs.io)

| Feature | Rust (core) | Python | JS | R |
| ------------------------------------------------ | ----------- | ------ | ---- | ---- |
| compress |||||
| expand |||||
| compress_list |||| |
| expand_list |||| |
| standardize (prefix, curie, uri) | | | | |
| standardize (prefix, curie, uri) | | | | |
| chain converters |||| |
| Record object and converter.add_record() |||| |
| converter.add_prefix(prefix, ns) || || |
| converter.get_prefixes() and .get_uri_prefixes() || | | |
| converter.add_prefix(prefix, ns) || || |
| converter.get_prefixes() and .get_uri_prefixes() || | | |
| Load from prefix map |||| |
| Load from extended prefix map |||| |
| Load from JSON-LD |||| |
| Load from SHACL shape | | | | |
| Load from JSON-LD context |||| |
| Load from SHACL prefix definition | | | | |
| Load OBO converter |||| |
| Load GO converter |||| |
| Load Bioregistry converter |||||
| Load Monarch converter |||| |
| Write converter to prefix map || | | |
| Write converter to extended prefix map || | | |
| Write converter to JSON-LD || | | |
| Write converter to prefix map |||| |
| Write converter to extended prefix map |||| |
| Write converter to JSON-LD |||| |
| Write converter to SHACL |||| |
| Prefixes discovery | | | | |

⚠️ Important differences between rust core and bindings:
## ⚠️​ Differences between rust core and language bindings

1. **Load prefix map**, extended prefix map and JSON-LD can take `HashMap` as input in rust. But for JS and python, we currently need to pass it as `String` (we need to figure out how to pass arbitrary objects). You can pass either a URL or a JSON object as string, the lib will automatically retrieve the content of the URL if it is one. The original python lib was taking directly JSON objects for all loaders, apart from SHACL which takes a URL (which was not convenient when wanting to provide a local SHACL file)
1. The **functions to Load** prefix map, extended prefix map and JSON-LD can take `HashMap` as input in rust. But for JS and python, we currently need to pass it as `String` (we need to figure out how to pass arbitrary objects). You can pass either a URL or a JSON object as string, the lib will automatically retrieve the content of the URL if it is one. The original python lib was taking directly JSON objects for all loaders, apart from SHACL which takes a URL (which was not convenient when wanting to provide a local SHACL file)
2. In rust **chain()** is a static function taking a list of converters, `chained = Converter::chain([conv1, conv2])`. In JS and python we cannot easily pass a list of complex objects like converters, so chain is a normal function that takes 1 converter to chain: `chained = conv1.chain(conv2)`
3. Currently **write** prefix map returns a HashMap, write extended map returns a JSON as string, and write JSON-LD returns `serde::json` type. In the original python lib it was writing to a file.

### 🗃️ Folder structure

```
curies.rs/
├── lib/
│ ├── src/
│ │ └── 🦀 Source code for the core Rust crate.
│ ├── tests/
│ │ └── 🧪 Tests for the core Rust crate.
│ └── docs/
│ └── 📖 Markdown and HTML files for the documentation website.
├── python/
│ └── 🐍 Python bindings
├── js/
│ └── 🟨 JavaScript bindings
├── r/
│ └── 📈 R bindings
├── scripts/
│ └── 🛠️ Development scripts (build, test, gen docs).
└── .github/
└── workflows/
└── ⚙️ Automated CI/CD workflows.
```

###
3. In the rust lib, currently the **functions to write** prefix map returns a HashMap, write extended map returns a JSON as string, and write JSON-LD returns `serde::json` type. The JS and python equivalent directly return a string. In the original python lib it was writing to a file.
5 changes: 2 additions & 3 deletions lib/docs/docs/contributing.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# 🛠️ Development
# 🛠️ Contributing

[![Build](https://github.com/biopragmatics/curies.rs/actions/workflows/build.yml/badge.svg)](https://github.com/biopragmatics/curies.rs/actions/workflows/build.yml) [![Lint and Test](https://github.com/biopragmatics/curies.rs/actions/workflows/test.yml/badge.svg)](https://github.com/biopragmatics/curies.rs/actions/workflows/test.yml) [![codecov](https://codecov.io/gh/biopragmatics/curies.rs/graph/badge.svg?token=BF15PSO6GN)](https://codecov.io/gh/biopragmatics/curies.rs) [![dependency status](https://deps.rs/repo/github/biopragmatics/curies.rs/status.svg)](https://deps.rs/repo/github/biopragmatics/curies.rs)

Expand Down Expand Up @@ -28,8 +28,7 @@ cd curies.rs
Install development dependencies:

```bash
rustup update
cargo install wasm-pack cargo-tarpaulin cargo-deny cargo-outdated
./scripts/install-dev.sh
```

> If you are using VSCode we strongly recommend to install the [`rust-lang.rust-analyzer`](https://marketplace.visualstudio.com/items?itemName=rust-lang.rust-analyzer) extension.
Expand Down
15 changes: 15 additions & 0 deletions lib/docs/docs/javascript.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,21 @@ async function main() {
// Expand and compress list of CURIEs and URIs
const curies = converter.compressList(["http://purl.obolibrary.org/obo/DOID_1234"]);
const uris = converter.expandList(["doid:1234"]);

// Standardize prefix, CURIEs, and URIs using the preferred alternative
console.log(converter.standardizePrefix("gomf"))
console.log(converter.standardizeCurie("gomf:0032571"))
console.log(converter.standardizeUri("http://amigo.geneontology.org/amigo/term/GO:0032571"))

// Get the list of prefixes or URI prefixes, argument include_synonyms default to False
const prefixes_without_syn = converter.getPrefixes()
const uri_prefixes_with_syn = converter.getUriPrefixes(true)

// Output the converter prefix map as a string in different serialization format
const epm = converter.writeExtendedPrefixMap()
const pm = converter.writePrefixMap()
const jsonld = converter.writeJsonld()
const shacl = converter.writeShacl()
}
main();
```
Expand Down
Loading

0 comments on commit ae67b97

Please sign in to comment.