Skip to content

Commit

Permalink
Add from_prefix_map and from_jsonld and get_obo_converter() to create…
Browse files Browse the repository at this point in the history
… new converters. Add DataSource trait to provide the data as string, hashmap or URL (requesting the URL requires the functions using DataSource to be async)
  • Loading branch information
vemonet committed Dec 15, 2023
1 parent 1ea7763 commit 87f8cb7
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 30 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: rustup update
- run: rustup component add clippy rustfmt
- run: rustup update && rustup component add clippy rustfmt
- run: cargo fmt -- --check
- run: cargo clippy --all --all-targets --all-features

Expand Down
4 changes: 2 additions & 2 deletions js/src/curies.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::collections::HashSet;

use curies::{error::DuplicateRecordError, Converter, Record};
use js_sys::{Promise, JSON};
use curies::{Converter, Record};
// use js_sys::{Promise, JSON};
use serde::{Deserialize, Serialize};
use wasm_bindgen::prelude::*;

Expand Down
7 changes: 7 additions & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ categories.workspace = true
[dependencies]
trie-rs = "0.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
reqwest = { version = "0.11", features = ["blocking", "json"] }
async-trait = "0.1"


[dev-dependencies]
tokio = { version = "1.34", features = ["rt-multi-thread", "macros"] }

[package.metadata.docs.rs]
all-features = true
47 changes: 35 additions & 12 deletions lib/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,29 @@ use std::error::Error;
use std::fmt;
use std::str::Utf8Error;

#[derive(Debug)]
pub struct DuplicateRecordError(pub String);
// #[derive(Debug)]
// pub struct DuplicateRecordError(pub String);

impl Error for DuplicateRecordError {}
// impl Error for DuplicateRecordError {}

impl fmt::Display for DuplicateRecordError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Curies Duplicate Record: {}", self.0)
}
}
// impl fmt::Display for DuplicateRecordError {
// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// write!(f, "Curies Duplicate Record: {}", self.0)
// }
// }

// NOTE: In case we need a generic error that contains other errors

#[derive(Debug)]
pub enum CuriesError {
NotFound(String),
InvalidCurie(String),
DuplicateRecordError(String),
InvalidFormat(String),
DuplicateRecord(String),
Utf8(String),
SerdeJson(String),
Reqwest(String),
StdIo(String),
}

impl Error for CuriesError {}
Expand All @@ -29,11 +33,15 @@ impl fmt::Display for CuriesError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
CuriesError::NotFound(ref prefix) => write!(f, "Prefix not found: {}", prefix),
CuriesError::DuplicateRecordError(ref prefix) => {
CuriesError::DuplicateRecord(ref prefix) => {
write!(f, "Duplicate record found for prefix: {}", prefix)
}
CuriesError::InvalidCurie(ref prefix) => write!(f, "Invalid CURIE: {}", prefix),
CuriesError::Utf8(ref prefix) => write!(f, "Error decoding UTF-8: {}", prefix),
CuriesError::InvalidCurie(ref msg) => write!(f, "Invalid CURIE: {}", msg),
CuriesError::InvalidFormat(ref msg) => write!(f, "Invalid format: {}", msg),
CuriesError::Utf8(ref msg) => write!(f, "Error decoding UTF-8: {}", msg),
CuriesError::SerdeJson(ref msg) => write!(f, "Error parsing JSON: {}", msg),
CuriesError::Reqwest(ref msg) => write!(f, "Error sending request: {}", msg),
CuriesError::StdIo(ref msg) => write!(f, "Error reading file: {}", msg),
}
}
}
Expand All @@ -44,3 +52,18 @@ impl From<Utf8Error> for CuriesError {
CuriesError::Utf8(err.to_string())
}
}
impl From<serde_json::Error> for CuriesError {
fn from(err: serde_json::Error) -> Self {
CuriesError::SerdeJson(err.to_string())
}
}
impl From<reqwest::Error> for CuriesError {
fn from(err: reqwest::Error) -> Self {
CuriesError::Reqwest(err.to_string())
}
}
impl From<std::io::Error> for CuriesError {
fn from(err: std::io::Error) -> Self {
CuriesError::StdIo(err.to_string())
}
}
121 changes: 116 additions & 5 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
use async_trait::async_trait;
use error::CuriesError;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::io::Read;
use std::path::Path;
use std::sync::Arc;
use trie_rs::{Trie, TrieBuilder};

use crate::error::DuplicateRecordError;
pub mod error;
pub mod sources;

/// A CURIE `Record`, containing its prefixes and URI prefixes
#[derive(Debug, Clone, Serialize, Deserialize)]
Expand Down Expand Up @@ -38,15 +43,67 @@ impl Converter {
}
}

/// When adding a new CURIE we create a reference to the `Record` (Arc)
/// Create a `Converter` from a prefix `HashMap`
pub fn from_prefix_map(prefix_map: HashMap<String, String>) -> Result<Self, CuriesError> {
let mut converter = Converter::default();
for (prefix, uri_prefix) in prefix_map {
converter.add_record(Record {
prefix,
uri_prefix,
prefix_synonyms: HashSet::from([]),
uri_prefix_synonyms: HashSet::from([]),
})?;
}
Ok(converter)
}

/// Create a `Converter` from a JSON-LD file context
pub async fn from_jsonld<T: DataSource>(data: T) -> Result<Self, CuriesError> {
let prefix_map = data.fetch().await?;
let mut converter = Converter::default();
let context = match prefix_map.get("@context") {
Some(Value::Object(map)) => map,
_ => return Err(CuriesError::InvalidFormat("JSON-LD".to_string())),
};
for (key, value) in context {
if key.starts_with('@') {
continue;
}
match value {
Value::String(uri) => {
converter.add_record(Record {
prefix: key.clone(),
uri_prefix: uri.clone(),
prefix_synonyms: HashSet::from([]),
uri_prefix_synonyms: HashSet::from([]),
})?;
}
Value::Object(map) if map.get("@prefix") == Some(&Value::Bool(true)) => {
if let Some(Value::String(uri)) = map.get("@id") {
converter.add_record(Record {
prefix: key.clone(),
uri_prefix: uri.clone(),
prefix_synonyms: HashSet::from([]),
uri_prefix_synonyms: HashSet::from([]),
})?;
}
}
_ => continue,
}
}
Ok(converter)
}

/// Add a `Record` to the `Converter`
/// When adding a new record we create a reference to the `Record` (Arc)
/// And we use this reference in the prefix and URI hashmaps
pub fn add_record(&mut self, record: Record) -> Result<(), DuplicateRecordError> {
pub fn add_record(&mut self, record: Record) -> Result<(), CuriesError> {
let rec = Arc::new(record);
if self.prefix_map.contains_key(&rec.prefix) {
return Err(DuplicateRecordError(rec.prefix.clone()));
return Err(CuriesError::DuplicateRecord(rec.prefix.clone()));
}
if self.uri_map.contains_key(&rec.uri_prefix) {
return Err(DuplicateRecordError(rec.uri_prefix.clone()));
return Err(CuriesError::DuplicateRecord(rec.uri_prefix.clone()));
}
// TODO: check if synonyms are unique?

Expand Down Expand Up @@ -128,6 +185,60 @@ impl Default for Converter {
}
}


/// Trait to provide the data as URL, HashMap, string, or Path to file
#[async_trait]
pub trait DataSource {
async fn fetch(self) -> Result<HashMap<String, Value>, CuriesError>;
}
#[async_trait]
impl DataSource for HashMap<String, Value> {
async fn fetch(self) -> Result<HashMap<String, Value>, CuriesError> {
Ok(self)
}
}
#[async_trait]
impl DataSource for HashMap<String, String> {
async fn fetch(self) -> Result<HashMap<String, Value>, CuriesError> {
Ok(self
.into_iter()
.map(|(key, value)| (key, Value::String(value)))
.collect())
}
}
#[async_trait]
impl DataSource for &str {
async fn fetch(self) -> Result<HashMap<String, Value>, CuriesError> {
if self.starts_with("https://") || self.starts_with("http://") || self.starts_with("ftp://")
{
// Making an HTTP request
let res = reqwest::get(self).await?;
if res.status().is_success() {
return Ok(res.json().await?)
} else {
return Err(CuriesError::Reqwest(format!("{}: {}", res.status(), res.text().await?)))
}
} else {
// Directly parsing the provided string as JSON
Ok(serde_json::from_str(self)?)
}
}
}
#[async_trait]
impl DataSource for &Path {
async fn fetch(self) -> Result<HashMap<String, Value>, CuriesError> {
if self.exists() {
// Reading from a file path
let mut file = File::open(self)?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
Ok(serde_json::from_str(&contents)?)
} else {
return Err(CuriesError::NotFound(format!("{:?}", self.to_str())))
}
}
}

// Python API: https://github.com/cthoyt/curies/blob/main/src/curies/api.py#L1099
// HashSet more efficient than Vec: https://stackoverflow.com/questions/3185226/huge-performance-difference-between-vector-and-hashset
// But HashSet are not ordered, while Vec are ordered
Expand Down
6 changes: 6 additions & 0 deletions lib/src/sources.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
use crate::{Converter, error::CuriesError};


pub async fn get_obo_converter() -> Result<Converter, CuriesError> {
Converter::from_jsonld("http://purl.obolibrary.org/meta/obo_context.jsonld").await
}
48 changes: 39 additions & 9 deletions lib/tests/curies_test.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use curies::{Converter, Record};
use std::collections::HashSet;
use curies::{Converter, Record, sources::get_obo_converter};
use std::collections::{HashSet, HashMap};

#[test]
fn main_tests() -> Result<(), Box<dyn std::error::Error>> {
fn new_empty_converter() -> Result<(), Box<dyn std::error::Error>> {
let mut converter = Converter::new();

let record1 = Record {
Expand All @@ -25,13 +25,11 @@ fn main_tests() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(curie.prefix, "doid");
println!("Found CURIE by prefix: {}", curie.prefix);

let curie = converter
.find_by_uri_prefix("http://purl.obolibrary.org/obo/DOID_")?;
let curie = converter.find_by_uri_prefix("http://purl.obolibrary.org/obo/DOID_")?;
assert_eq!(curie.prefix, "doid");
println!("Found CURIE by URI prefix: {}", curie.prefix);

let curie = converter
.find_by_uri("http://purl.obolibrary.org/obo/DOID_1234")?;
let curie = converter.find_by_uri("http://purl.obolibrary.org/obo/DOID_1234")?;
assert_eq!(curie.prefix, "doid");
println!("Found CURIE by URI: {}", curie.prefix);

Expand All @@ -40,9 +38,41 @@ fn main_tests() -> Result<(), Box<dyn std::error::Error>> {
println!("Expanded CURIE: {}", uri);
assert_eq!(uri, "http://purl.obolibrary.org/obo/DOID_1234");

let curie = converter
.compress("http://purl.obolibrary.org/obo/DOID_1234")?;
let curie = converter.compress("http://purl.obolibrary.org/obo/DOID_1234")?;
println!("Compressed URI: {}", curie);
assert_eq!(curie, "doid:1234");
Ok(())
}


#[test]
fn from_prefix_map_converter() -> Result<(), Box<dyn std::error::Error>> {
let mut prefix_map: HashMap<String, String> = HashMap::new();
prefix_map.insert("DOID".to_string(), "http://purl.obolibrary.org/obo/DOID_".to_string());
prefix_map.insert("OBO".to_string(), "http://purl.obolibrary.org/obo/".to_string());
let converter = Converter::from_prefix_map(prefix_map)?;

let uri = converter.expand("DOID:1234")?;
println!("Expanded CURIE: {}", uri);
assert_eq!(uri, "http://purl.obolibrary.org/obo/DOID_1234");

let curie = converter.compress("http://purl.obolibrary.org/obo/DOID_1234")?;
println!("Compressed URI: {}", curie);
assert_eq!(curie, "DOID:1234");
Ok(())
}

#[tokio::test]
async fn from_jsonld_converter() -> Result<(), Box<dyn std::error::Error>> {
// let url = "http://purl.obolibrary.org/meta/obo_context.jsonld";
let converter = get_obo_converter().await?;

let uri = converter.expand("DOID:1234")?;
println!("Expanded CURIE: {}", uri);
assert_eq!(uri, "http://purl.obolibrary.org/obo/DOID_1234");

let curie = converter.compress("http://purl.obolibrary.org/obo/DOID_1234")?;
println!("Compressed URI: {}", curie);
assert_eq!(curie, "DOID:1234");
Ok(())
}

0 comments on commit 87f8cb7

Please sign in to comment.