From 32688aa2463d9a750bfd952a2cfcd5caa98e9d62 Mon Sep 17 00:00:00 2001 From: Vincent Prins Date: Wed, 12 Jun 2024 14:38:36 +0200 Subject: [PATCH] Added huggingface yamls --- server/data/taggers/hug-tdn-1400-1600.yaml | 22 +++++++++++++++++++ server/data/taggers/hug-tdn-1600-1900.yaml | 22 +++++++++++++++++++ server/data/taggers/hug-tdn-all-enhanced.yaml | 22 +++++++++++++++++++ server/data/taggers/hug-tdn-all.yaml | 22 +++++++++++++++++++ server/data/taggers/hug-tdn-bab.yaml | 22 +++++++++++++++++++ server/data/taggers/hug-tdn-clvn.yaml | 22 +++++++++++++++++++ server/data/taggers/hug-tdn-cour.yaml | 22 +++++++++++++++++++ server/data/taggers/hug-tdn-dbnldq.yaml | 22 +++++++++++++++++++ 8 files changed, 176 insertions(+) create mode 100644 server/data/taggers/hug-tdn-1400-1600.yaml create mode 100644 server/data/taggers/hug-tdn-1600-1900.yaml create mode 100644 server/data/taggers/hug-tdn-all-enhanced.yaml create mode 100644 server/data/taggers/hug-tdn-all.yaml create mode 100644 server/data/taggers/hug-tdn-bab.yaml create mode 100644 server/data/taggers/hug-tdn-clvn.yaml create mode 100644 server/data/taggers/hug-tdn-cour.yaml create mode 100644 server/data/taggers/hug-tdn-dbnldq.yaml diff --git a/server/data/taggers/hug-tdn-1400-1600.yaml b/server/data/taggers/hug-tdn-1400-1600.yaml new file mode 100644 index 0000000..ef3d03e --- /dev/null +++ b/server/data/taggers/hug-tdn-1400-1600.yaml @@ -0,0 +1,22 @@ +id: hug-tdn-1400-1600 +description: "INT-Hug trained on DBNL excerpts, GTB dictionary quotations & CLVN (1400-1600)" +tagset: "TDN-Core" +eraFrom: "1400" +eraTo: "1600" +produces: + - LEM + - POS + - TOK +model: + name: hug-tdn-1400-1600 + href: https://github.com/INL/galahad-huggingface-models/tree/1.0.0/models/galahad/tagger/pos_model_tdn_1400-1600 +software: + name: int-huggingface + href: https://github.com/INL/int-huggingface-tagger/tree/1.0.0 +dataset: + name: 1400-1600 + href: https://github.com/INL/galahad-corpus-data/tree/1.0.1/combinations/1400-1600.combination.json +trainedBy: "INT" +date: "2024-06-12" +devport: 8110 +version: "1.0.0" diff --git a/server/data/taggers/hug-tdn-1600-1900.yaml b/server/data/taggers/hug-tdn-1600-1900.yaml new file mode 100644 index 0000000..12a3f29 --- /dev/null +++ b/server/data/taggers/hug-tdn-1600-1900.yaml @@ -0,0 +1,22 @@ +id: hug-tdn-1600-1900 +description: "INT-Hug trained on DBNL excerpts, GTB dictionary quotations, newspapers & letters as loot (1600-1900)" +tagset: "TDN-Core" +eraFrom: "1600" +eraTo: "1900" +produces: + - LEM + - POS + - TOK +model: + name: hug-tdn-1400-1600 + href: https://github.com/INL/galahad-huggingface-models/tree/1.0.0/models/galahad/tagger/pos_model_tdn_1400-1600 +software: + name: int-huggingface + href: https://github.com/INL/int-huggingface-tagger/tree/1.0.0 +dataset: + name: 1600-1900 + href: https://github.com/INL/galahad-corpus-data/tree/1.0.1/combinations/1600-1900.combination.json +trainedBy: "INT" +date: "2024-06-12" +devport: 8111 +version: "1.0.0" diff --git a/server/data/taggers/hug-tdn-all-enhanced.yaml b/server/data/taggers/hug-tdn-all-enhanced.yaml new file mode 100644 index 0000000..9fc3e05 --- /dev/null +++ b/server/data/taggers/hug-tdn-all-enhanced.yaml @@ -0,0 +1,22 @@ +id: hug-tdn-all-enhanced +description: "INT-Hug trained on all TDN-Core material with enhancements for separable verbs" +tagset: "TDN-Core" +eraFrom: "1400" +eraTo: "1900" +produces: + - LEM + - POS + - TOK +model: + name: hug-tdn-all-enhanced + href: https://github.com/INL/galahad-huggingface-models/tree/1.0.0/models/galahad/tagger/pos_model_tdn_all_enhanced +software: + name: int-huggingface + href: https://github.com/INL/int-huggingface-tagger/tree/1.0.0 +dataset: + name: ALL + href: https://github.com/INL/galahad-corpus-data/tree/1.0.1/combinations/ALL.combination.json +trainedBy: "INT" +date: "2024-06-12" +devport: 8117 +version: "1.0.0" diff --git a/server/data/taggers/hug-tdn-all.yaml b/server/data/taggers/hug-tdn-all.yaml new file mode 100644 index 0000000..a6912c3 --- /dev/null +++ b/server/data/taggers/hug-tdn-all.yaml @@ -0,0 +1,22 @@ +id: hug-tdn-all +description: "INT-Hug trained on all TDN-Core material" +tagset: "TDN-Core" +eraFrom: "1400" +eraTo: "1900" +produces: + - LEM + - POS + - TOK +model: + name: hug-tdn-all + href: https://github.com/INL/galahad-huggingface-models/tree/1.0.0/models/galahad/tagger/pos_model_tdn_ALL +software: + name: int-huggingface + href: https://github.com/INL/int-huggingface-tagger/tree/1.0.0 +dataset: + name: ALL + href: https://github.com/INL/galahad-corpus-data/tree/1.0.1/combinations/ALL.combination.json +trainedBy: "INT" +date: "2024-06-12" +devport: 8112 +version: "1.0.0" diff --git a/server/data/taggers/hug-tdn-bab.yaml b/server/data/taggers/hug-tdn-bab.yaml new file mode 100644 index 0000000..45e89e2 --- /dev/null +++ b/server/data/taggers/hug-tdn-bab.yaml @@ -0,0 +1,22 @@ +id: hug-tdn-bab +description: "INT-Hug trained on letters as loot (Brieven als Buit)" +tagset: "TDN-Core" +eraFrom: "1600" +eraTo: "1800" +produces: + - LEM + - POS + - TOK +model: + name: hug-tdn-bab + href: https://github.com/INL/galahad-huggingface-models/tree/1.0.0/models/galahad/tagger/pos_model_tdn_BAB +software: + name: int-huggingface + href: https://github.com/INL/int-huggingface-tagger/tree/1.0.0 +dataset: + name: letters-as-loot + href: https://github.com/INL/galahad-corpus-data/tree/1.0.0/combinations/BAB.combination.json +trainedBy: "INT" +date: "2024-06-12" +devport: 8113 +version: "1.0.0" diff --git a/server/data/taggers/hug-tdn-clvn.yaml b/server/data/taggers/hug-tdn-clvn.yaml new file mode 100644 index 0000000..f4f6bd0 --- /dev/null +++ b/server/data/taggers/hug-tdn-clvn.yaml @@ -0,0 +1,22 @@ +id: hug-tdn-clvn +description: "INT-Hug trained on CLVN" +tagset: "TDN-Core" +eraFrom: "1500" +eraTo: "1600" +produces: + - LEM + - POS + - TOK +model: + name: hug-tdn-clvn + href: https://github.com/INL/galahad-huggingface-models/tree/1.0.0/models/galahad/tagger/pos_model_tdn_CLVN +software: + name: int-huggingface + href: https://github.com/INL/int-huggingface-tagger/tree/1.0.0 +dataset: + name: CLVN + href: https://github.com/INL/galahad-corpus-data/tree/1.0.0/combinations/CLVN.combination.json +trainedBy: "INT" +date: "2024-06-12" +devport: 8114 +version: "1.0.0" diff --git a/server/data/taggers/hug-tdn-cour.yaml b/server/data/taggers/hug-tdn-cour.yaml new file mode 100644 index 0000000..d78b19c --- /dev/null +++ b/server/data/taggers/hug-tdn-cour.yaml @@ -0,0 +1,22 @@ +id: hug-tdn-cour +description: "INT-Hug trained on newspapers" +tagset: "TDN-Core" +eraFrom: "1600" +eraTo: "1700" +produces: + - LEM + - POS + - TOK +model: + name: hug-tdn-cour + href: https://github.com/INL/galahad-huggingface-models/tree/1.0.0/models/galahad/tagger/pos_model_tdn_COUR +software: + name: int-huggingface + href: https://github.com/INL/int-huggingface-tagger/tree/1.0.0 +dataset: + name: couranten + href: https://github.com/INL/galahad-corpus-data/tree/1.0.0/combinations/COUR.combination.json +trainedBy: "INT" +date: "2024-06-12" +devport: 8115 +version: "1.0.0" diff --git a/server/data/taggers/hug-tdn-dbnldq.yaml b/server/data/taggers/hug-tdn-dbnldq.yaml new file mode 100644 index 0000000..51a9ae7 --- /dev/null +++ b/server/data/taggers/hug-tdn-dbnldq.yaml @@ -0,0 +1,22 @@ +id: hug-tdn-dbnldq +description: "INT-Hug trained on DBNL excerpts & GTB dictionary quotations" +tagset: "TDN-Core" +eraFrom: "1400" +eraTo: "1900" +produces: + - LEM + - POS + - TOK +model: + name: hug-tdn-dbnldq + href: https://github.com/INL/galahad-huggingface-models/tree/1.0.0/models/galahad/tagger/pos_model_tdn_DBNLDQ +software: + name: int-huggingface + href: https://github.com/INL/int-huggingface-tagger/tree/1.0.0 +dataset: + name: DBNLDQ + href: https://github.com/INL/galahad-corpus-data/tree/1.0.1/combinations/DBNLDQ.combination.json +trainedBy: "INT" +date: "2024-06-12" +devport: 8116 +version: "1.0.0"