From 5864b4a0b0b76805fbdd6a55458ddf97ceaad125 Mon Sep 17 00:00:00 2001 From: Montana Date: Thu, 8 Dec 2022 16:37:55 -0800 Subject: [PATCH 01/12] update 4 terms --- src/schema/nmdc.yaml | 47 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 3fa79346f2..cf96608822 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -476,6 +476,53 @@ classes: required: false part_of: required: true + fire: + name: fire + title: history/fire + aliases: + - mixs:0001086 + description: Historical and/or physical evidence of fire + guidance: + - Provide the date the fire occurred. If extended burning occurred provide the date range. + examples: + - value: 1871-10-10 + - value: 1871-10-01 to 1871-10-31 + todos: + - is "to" acceptable? Is there a better way to request that be written? + annotations: + expected_value: + tag: expected_value + value: date + occurrence: + tag: occurrence + value: '1' + is_a: attribute + multivalued: false + range: text + flooding: + name: flooding + title: history/flooding + aliases: + - mixs:0000319 + annotations: + expected_value: + tag: expected_value + value: date + occurrence: + tag: occurrence + value: '1' + description: Historical and/or physical evidence of flooding + guidance: + - Provide the date the flood occurred. If extended flooding occurred provide the date range. + examples: + - value: 1927-04-15 + - value: 1927-04 to 1927-05 + todos: + - is "to" acceptable? Is there a better way to request that be written? + - What about if the "day" isn't known? Is this ok? + is_a: attribute + multivalued: false + range: text id_prefixes: - GOLD From f620be8f8d91285cdd6bf842712ca085aa565392 Mon Sep 17 00:00:00 2001 From: Montana Date: Fri, 16 Dec 2022 12:56:19 -0800 Subject: [PATCH 02/12] corrected range, removed repeats --- src/schema/nmdc.yaml | 26 ++++---------------------- src/schema/portal/emsl.yaml | 16 +++++++++++++--- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index cf96608822..6423ad7bf5 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -477,11 +477,6 @@ classes: part_of: required: true fire: - name: fire - title: history/fire - aliases: - - mixs:0001086 - description: Historical and/or physical evidence of fire guidance: - Provide the date the fire occurred. If extended burning occurred provide the date range. examples: @@ -492,26 +487,15 @@ classes: annotations: expected_value: tag: expected_value - value: date - occurrence: - tag: occurrence - value: '1' + value: date string is_a: attribute multivalued: false - range: text + range: string flooding: - name: flooding - title: history/flooding - aliases: - - mixs:0000319 annotations: expected_value: tag: expected_value - value: date - occurrence: - tag: occurrence - value: '1' - description: Historical and/or physical evidence of flooding + value: date string guidance: - Provide the date the flood occurred. If extended flooding occurred provide the date range. examples: @@ -520,9 +504,7 @@ classes: todos: - is "to" acceptable? Is there a better way to request that be written? - What about if the "day" isn't known? Is this ok? - is_a: attribute - multivalued: false - range: text + range: string id_prefixes: - GOLD diff --git a/src/schema/portal/emsl.yaml b/src/schema/portal/emsl.yaml index 2593e834f7..13bf3b4a11 100644 --- a/src/schema/portal/emsl.yaml +++ b/src/schema/portal/emsl.yaml @@ -64,7 +64,7 @@ slots: sample_shipped: name: sample_shipped description: The total amount or size (volume (ml), mass (g) or area (m2) ) of - sample sent to EMSL + sample sent to EMSL. title: sample shipped amount from_schema: https://example.com/nmdc_dh rank: 3 @@ -72,6 +72,12 @@ slots: slot_group: EMSL required: false recommended: true + examples: + - value: 15 g + - value: 100 uL + - value: 5 mL + comments: + - This field is only required when completing metadata for samples being submitted to EMSL for analyses. sample_type: name: sample_type description: Type of sample being submitted @@ -88,11 +94,15 @@ slots: recommended: true technical_reps: name: technical_reps - description: If sending multiple technical replicates of the same sample, indicate - how many replicates are being sent + description: If sending technical replicates of the same sample, indicate the replicate number. title: number technical replicate + examples: + -value: 2 from_schema: https://example.com/nmdc_dh rank: 5 string_serialization: '{integer}' slot_group: EMSL recommended: true + comments: + - This field is only required when completing metadata for samples being submitted to EMSL for analyses. + From a8371953278a9133a09d29b4a030d65ec6afe92c Mon Sep 17 00:00:00 2001 From: Montana Date: Mon, 19 Dec 2022 14:23:54 -0800 Subject: [PATCH 03/12] make test error ValueError: Unknown argument: -value = 2 slot=fire --- src/schema/nmdc.yaml | 58 ++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index ddce3267d4..e8f5e5c3d2 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -524,35 +524,35 @@ classes: required: false part_of: required: true - fire: - guidance: - - Provide the date the fire occurred. If extended burning occurred provide the date range. - examples: - - value: 1871-10-10 - - value: 1871-10-01 to 1871-10-31 - todos: - - is "to" acceptable? Is there a better way to request that be written? - annotations: - expected_value: - tag: expected_value - value: date string - is_a: attribute - multivalued: false - range: string - flooding: - annotations: - expected_value: - tag: expected_value - value: date string - guidance: - - Provide the date the flood occurred. If extended flooding occurred provide the date range. - examples: - - value: 1927-04-15 - - value: 1927-04 to 1927-05 - todos: - - is "to" acceptable? Is there a better way to request that be written? - - What about if the "day" isn't known? Is this ok? - range: string +# fire: +# comments: +# - Provide the date the fire occurred. If extended burning occurred provide the date range. +# examples: +# - value: 1871-10-10 +# - value: 1871-10-01 to 1871-10-31 +# todos: +# - is "to" acceptable? Is there a better way to request that be written? +# annotations: +# expected_value: +# tag: expected_value +# value: date string +# is_a: attribute +# multivalued: false +# range: string +# flooding: +# annotations: +# expected_value: +# tag: expected_value +# value: date string +# comments: +# - Provide the date the flood occurred. If extended flooding occurred provide the date range. +# examples: +# - value: 1927-04-15 +# - value: 1927-04 to 1927-05 +# todos: +# - is "to" acceptable? Is there a better way to request that be written? +# - What about if the "day" isn't known? Is this ok? +# range: string extreme_event: examples: From d810d059fce66cd9ac870aa4c023f7b731cb846d Mon Sep 17 00:00:00 2001 From: Montana Date: Mon, 19 Dec 2022 18:03:50 -0800 Subject: [PATCH 04/12] updated slots in mixs_inspired --- src/schema/nmdc.yaml | 59 ++++++++++++++-------------- src/schema/portal/mixs_inspired.yaml | 47 ++++++++++++++++++++-- 2 files changed, 72 insertions(+), 34 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index e8f5e5c3d2..0d2c24b2cf 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -524,36 +524,35 @@ classes: required: false part_of: required: true -# fire: -# comments: -# - Provide the date the fire occurred. If extended burning occurred provide the date range. -# examples: -# - value: 1871-10-10 -# - value: 1871-10-01 to 1871-10-31 -# todos: -# - is "to" acceptable? Is there a better way to request that be written? -# annotations: -# expected_value: -# tag: expected_value -# value: date string -# is_a: attribute -# multivalued: false -# range: string -# flooding: -# annotations: -# expected_value: -# tag: expected_value -# value: date string -# comments: -# - Provide the date the flood occurred. If extended flooding occurred provide the date range. -# examples: -# - value: 1927-04-15 -# - value: 1927-04 to 1927-05 -# todos: -# - is "to" acceptable? Is there a better way to request that be written? -# - What about if the "day" isn't known? Is this ok? -# range: string - + fire: + comments: + - Provide the date the fire occurred. If extended burning occurred provide the date range. + examples: + - value: 1871-10-10 + - value: 1871-10-01 to 1871-10-31 + todos: + - is "to" acceptable? Is there a better way to request that be written? + annotations: + expected_value: + tag: expected_value + value: date string + is_a: attribute + multivalued: false + range: string + flooding: + annotations: + expected_value: + tag: expected_value + value: date string + comments: + - Provide the date the flood occurred. If extended flooding occurred provide the date range. + examples: + - value: 1927-04-15 + - value: 1927-04 to 1927-05 + todos: + - is "to" acceptable? Is there a better way to request that be written? + - What about if the "day" isn't known? Is this ok? + range: string extreme_event: examples: - value: 1980-05-18, volcanic eruption diff --git a/src/schema/portal/mixs_inspired.yaml b/src/schema/portal/mixs_inspired.yaml index 9848ec7f8f..ec9d211ba0 100644 --- a/src/schema/portal/mixs_inspired.yaml +++ b/src/schema/portal/mixs_inspired.yaml @@ -83,6 +83,8 @@ slots: string_serialization: '{text}' slot_group: MIxS Inspired recommended: true + comments: + - This is open text to describe your sample. Information put here will be used to consider additional slots/terms that should be provided for collection. filter_method: name: filter_method description: Type of filter used or how the sample was filtered @@ -94,9 +96,21 @@ slots: string_serialization: '{text}' slot_group: MIxS Inspired recommended: true + comments: + - describe the filter or provide a catalog number and manufacturer + examples: + - value: C18 + - value: Basix PES, 13-100-106 FisherSci isotope_exposure: name: isotope_exposure description: List isotope exposure or addition applied to your sample. + guidance: + - This is requred when your experimental design includes the use of isotopically labeled compounds + examples: + -value: 13C glucose + -value: H218O + todos: + - Can we make the H218O correctly super and subscripted? title: isotope exposure/addition from_schema: https://example.com/nmdc_dh see_also: @@ -108,6 +122,11 @@ slots: micro_biomass_c_meth: name: micro_biomass_c_meth description: Reference or method used in determining microbial biomass + guidance: + - Only required when providing a microbial biomass measurement value + examples: + -value: https://doi.org/10.1016/0038-0717(87)90052-6 + -value: https://doi.org/10.1016/0038-0717(87)90052-6 | https://www.sciencedirect.com/science/article/abs/pii/0038071787900526 title: microbial biomass carbon method comments: - required if "microbial_biomass_c" is provided @@ -121,6 +140,11 @@ slots: micro_biomass_n_meth: name: micro_biomass_n_meth description: Reference or method used in determining microbial biomass nitrogen + guidance: + - Only required when providing a microbial biomass measurement value + examples: + -value: https://doi.org/10.1016/0038-0717(87)90052-6 + -value: https://doi.org/10.1016/0038-0717(87)90052-6 | https://www.sciencedirect.com/science/article/abs/pii/0038071787900526 title: microbial biomass nitrogen method comments: - required if "microbial_biomass_n" is provided @@ -133,8 +157,9 @@ slots: microbial_biomass_c: name: microbial_biomass_c description: The part of the organic matter in the soil that constitutes living - microorganisms smaller than 5-10 micrometer. If you keep this, you would need - to have correction factors used for conversion to the final units + microorganisms smaller than 5-10 micrometer. + comments: + - If you provide this, correction factors used for conversion to the final units and method are required title: microbial biomass carbon from_schema: https://example.com/nmdc_dh see_also: @@ -142,11 +167,16 @@ slots: rank: 10 string_serialization: '{float} {unit}' slot_group: MIxS Inspired + examples: + -value: 0.05 ug C/g dry soil microbial_biomass_n: name: microbial_biomass_n description: The part of the organic matter in the soil that constitutes living - microorganisms smaller than 5-10 micrometer. If you keep this, you would need - to have correction factors used for conversion to the final units + microorganisms smaller than 5-10 micrometer. + comments: + - If you provide this, correction factors used for conversion to the final units and method are required + examples: + -value: 0.05 ug N/g dry soil title: microbial biomass nitrogen from_schema: https://example.com/nmdc_dh see_also: @@ -166,6 +196,8 @@ slots: rank: 8 string_serialization: '{text};{float} {unit}' slot_group: MIxS Inspired + examples: + - insect 0.23 ug; plant 1g non_microb_biomass_method: name: non_microb_biomass_method description: Reference or method used in determining biomass @@ -178,12 +210,16 @@ slots: rank: 9 string_serialization: '{PMID}|{DOI}|{URL}' slot_group: MIxS Inspired + example: + - https://doi.org/10.1038/s41467-021-26181-3 org_nitro_method: name: org_nitro_method description: Method used for obtaining organic nitrogen title: organic nitrogen method comments: - required if "org_nitro" is provided + examples: + -value: https://doi.org/10.1016/0038-0717(85)90144-0 from_schema: https://example.com/nmdc_dh see_also: - MIxS:org_nitro|tot_nitro_cont_meth @@ -202,6 +238,9 @@ slots: string_serialization: '{text}' slot_group: MIxS Inspired recommended: true + guidance: + - This is an open text field to provide any treatments that cannot be captured in the provided slots. + - Values entered here will be used to determine potential new slots. start_date_inc: name: start_date_inc description: Date the incubation was started. Only relevant for incubation samples. From 28e749a13f6ec9756a47269153ed67b5b4c37920 Mon Sep 17 00:00:00 2001 From: Montana Date: Tue, 20 Dec 2022 15:56:25 -0800 Subject: [PATCH 05/12] updated slots in mixs_inspired --- src/schema/portal/emsl.yaml | 2 +- src/schema/portal/mixs_inspired.yaml | 126 +++++++++++++-------------- 2 files changed, 62 insertions(+), 66 deletions(-) diff --git a/src/schema/portal/emsl.yaml b/src/schema/portal/emsl.yaml index 13bf3b4a11..1c53c91d1f 100644 --- a/src/schema/portal/emsl.yaml +++ b/src/schema/portal/emsl.yaml @@ -97,7 +97,7 @@ slots: description: If sending technical replicates of the same sample, indicate the replicate number. title: number technical replicate examples: - -value: 2 + - value: 2 from_schema: https://example.com/nmdc_dh rank: 5 string_serialization: '{integer}' diff --git a/src/schema/portal/mixs_inspired.yaml b/src/schema/portal/mixs_inspired.yaml index ec9d211ba0..c7005eec58 100644 --- a/src/schema/portal/mixs_inspired.yaml +++ b/src/schema/portal/mixs_inspired.yaml @@ -5,13 +5,13 @@ description: This file defines terms that appear in the 'MIxS Inspired' section title: 'NMDC Schema supplement for Submission Portal: MIxS Inspired' id: https://microbiomedata/schema/mixs_inspired imports: -- linkml:types + - linkml:types license: 'license: https://creativecommons.org/publicdomain/zero/1.0/' prefixes: -- prefix_prefix: linkml - prefix_reference: https://w3id.org/linkml/ -- prefix_prefix: nmdc - prefix_reference: https://w3id.org/nmdc/ + - prefix_prefix: linkml + prefix_reference: https://w3id.org/linkml/ + - prefix_prefix: nmdc + prefix_reference: https://w3id.org/nmdc/ default_prefix: nmdc default_range: string slots: @@ -21,15 +21,15 @@ slots: for incubation samples. title: incubation collection date notes: - - MIxS collection_date accepts (truncated) ISO8601. DH taking arbitrary precision date only + - MIxS collection_date accepts (truncated) ISO8601. DH taking arbitrary precision date only comments: - - Date should be formatted as YYYY(-MM(-DD)). Ie, 2021-04-15, 2021-04 and 2021 - are all acceptable. + - Date should be formatted as YYYY(-MM(-DD)). Ie, 2021-04-15, 2021-04 and 2021 + are all acceptable. examples: - - value: 2021-04-15, 2021-04 and 2021 are all acceptable. + - value: 2021-04-15, 2021-04 and 2021 are all acceptable. from_schema: https://example.com/nmdc_dh see_also: - - MIxS:collection_date + - MIxS:collection_date rank: 2 string_serialization: '{date, arbitrary precision}' slot_group: MIxS Inspired @@ -39,15 +39,15 @@ slots: description: The time of sampling, either as an instance (single point) or interval. title: collection time, GMT notes: - - MIxS collection_date accepts (truncated) ISO8601. DH taking seconds optional - time only + - MIxS collection_date accepts (truncated) ISO8601. DH taking seconds optional + time only comments: - - 'Time should be entered as HH:MM(:SS) in GMT. See here for a converter: https://www.worldtimebuddy.com/pst-to-gmt-converter' + - 'Time should be entered as HH:MM(:SS) in GMT. See here for a converter: https://www.worldtimebuddy.com/pst-to-gmt-converter' examples: - - value: 13:33 or 13:33:55 + - value: 13:33 or 13:33:55 from_schema: https://example.com/nmdc_dh see_also: - - MIxS:collection_date + - MIxS:collection_date rank: 1 string_serialization: '{time, seconds optional}' slot_group: MIxS Inspired @@ -58,15 +58,15 @@ slots: for incubation samples. title: incubation collection time, GMT notes: - - MIxS collection_date accepts (truncated) ISO8601. DH taking seconds optional - time only + - MIxS collection_date accepts (truncated) ISO8601. DH taking seconds optional + time only comments: - - 'Time should be entered as HH:MM(:SS) in GMT. See here for a converter: https://www.worldtimebuddy.com/pst-to-gmt-converter' + - 'Time should be entered as HH:MM(:SS) in GMT. See here for a converter: https://www.worldtimebuddy.com/pst-to-gmt-converter' examples: - - value: 13:33 or 13:33:55 + - value: 13:33 or 13:33:55 from_schema: https://example.com/nmdc_dh see_also: - - MIxS:collection_date + - MIxS:collection_date rank: 3 string_serialization: '{time, seconds optional}' slot_group: MIxS Inspired @@ -78,7 +78,7 @@ slots: title: experimental factor- other from_schema: https://example.com/nmdc_dh see_also: - - MIxS:experimental_factor|additional_info + - MIxS:experimental_factor|additional_info rank: 7 string_serialization: '{text}' slot_group: MIxS Inspired @@ -91,7 +91,7 @@ slots: title: filter method from_schema: https://example.com/nmdc_dh see_also: - - MIxS:filter_type + - MIxS:filter_type rank: 6 string_serialization: '{text}' slot_group: MIxS Inspired @@ -104,17 +104,17 @@ slots: isotope_exposure: name: isotope_exposure description: List isotope exposure or addition applied to your sample. - guidance: - - This is requred when your experimental design includes the use of isotopically labeled compounds + comments: + - This is required when your experimental design includes the use of isotopically labeled compounds examples: - -value: 13C glucose - -value: H218O + - value: 13C glucose + - value: H218O todos: - Can we make the H218O correctly super and subscripted? title: isotope exposure/addition from_schema: https://example.com/nmdc_dh see_also: - - MIxS:chem_administration + - MIxS:chem_administration rank: 16 string_serialization: '{termLabel} {[termID]}; {timestamp}' slot_group: MIxS Inspired @@ -122,17 +122,15 @@ slots: micro_biomass_c_meth: name: micro_biomass_c_meth description: Reference or method used in determining microbial biomass - guidance: - - Only required when providing a microbial biomass measurement value examples: - -value: https://doi.org/10.1016/0038-0717(87)90052-6 - -value: https://doi.org/10.1016/0038-0717(87)90052-6 | https://www.sciencedirect.com/science/article/abs/pii/0038071787900526 + - value: https://doi.org/10.1016/0038-0717(87)90052-6 + - value: https://doi.org/10.1016/0038-0717(87)90052-6 | https://www.sciencedirect.com/science/article/abs/pii/0038071787900526 title: microbial biomass carbon method comments: - - required if "microbial_biomass_c" is provided + - required if "microbial_biomass_c" is provided from_schema: https://example.com/nmdc_dh see_also: - - MIxS:micro_biomass_meth + - MIxS:micro_biomass_meth rank: 11 string_serialization: '{PMID}|{DOI}|{URL}' slot_group: MIxS Inspired @@ -140,17 +138,15 @@ slots: micro_biomass_n_meth: name: micro_biomass_n_meth description: Reference or method used in determining microbial biomass nitrogen - guidance: - - Only required when providing a microbial biomass measurement value examples: - -value: https://doi.org/10.1016/0038-0717(87)90052-6 - -value: https://doi.org/10.1016/0038-0717(87)90052-6 | https://www.sciencedirect.com/science/article/abs/pii/0038071787900526 + - value: https://doi.org/10.1016/0038-0717(87)90052-6 + - value: https://doi.org/10.1016/0038-0717(87)90052-6 | https://www.sciencedirect.com/science/article/abs/pii/0038071787900526 title: microbial biomass nitrogen method comments: - - required if "microbial_biomass_n" is provided + - required if "microbial_biomass_n" is provided from_schema: https://example.com/nmdc_dh see_also: - - MIxS:micro_biomass_meth + - MIxS:micro_biomass_meth rank: 13 string_serialization: '{PMID}|{DOI}|{URL}' slot_group: MIxS Inspired @@ -163,12 +159,12 @@ slots: title: microbial biomass carbon from_schema: https://example.com/nmdc_dh see_also: - - MIxS:microbial_biomass + - MIxS:microbial_biomass rank: 10 string_serialization: '{float} {unit}' slot_group: MIxS Inspired examples: - -value: 0.05 ug C/g dry soil + - value: 0.05 ug C/g dry soil microbial_biomass_n: name: microbial_biomass_n description: The part of the organic matter in the soil that constitutes living @@ -176,11 +172,11 @@ slots: comments: - If you provide this, correction factors used for conversion to the final units and method are required examples: - -value: 0.05 ug N/g dry soil + - value: 0.05 ug N/g dry soil title: microbial biomass nitrogen from_schema: https://example.com/nmdc_dh see_also: - - MIxS:microbial_biomass + - MIxS:microbial_biomass rank: 12 string_serialization: '{float} {unit}' slot_group: MIxS Inspired @@ -192,37 +188,37 @@ slots: title: non-microbial biomass from_schema: https://example.com/nmdc_dh see_also: - - MIxS:biomass|microbial_biomass + - MIxS:biomass|microbial_biomass rank: 8 string_serialization: '{text};{float} {unit}' slot_group: MIxS Inspired examples: - - insect 0.23 ug; plant 1g + - value: insect 0.23 ug; plant 1g non_microb_biomass_method: name: non_microb_biomass_method description: Reference or method used in determining biomass title: non-microbial biomass method comments: - - required if "non-microbial biomass" is provided + - required if "non-microbial biomass" is provided from_schema: https://example.com/nmdc_dh see_also: - - MIxS:microbial_biomass + - MIxS:microbial_biomass rank: 9 string_serialization: '{PMID}|{DOI}|{URL}' slot_group: MIxS Inspired - example: - - https://doi.org/10.1038/s41467-021-26181-3 + examples: + - value: https://doi.org/10.1038/s41467-021-26181-3 org_nitro_method: name: org_nitro_method description: Method used for obtaining organic nitrogen title: organic nitrogen method comments: - - required if "org_nitro" is provided + - required if "org_nitro" is provided examples: - -value: https://doi.org/10.1016/0038-0717(85)90144-0 + - value: https://doi.org/10.1016/0038-0717(85)90144-0 from_schema: https://example.com/nmdc_dh see_also: - - MIxS:org_nitro|tot_nitro_cont_meth + - MIxS:org_nitro|tot_nitro_cont_meth rank: 14 string_serialization: '{PMID}|{DOI}|{URL}' slot_group: MIxS Inspired @@ -233,28 +229,28 @@ slots: title: other treatments from_schema: https://example.com/nmdc_dh see_also: - - MIxS:additional_info + - MIxS:additional_info rank: 15 string_serialization: '{text}' slot_group: MIxS Inspired recommended: true - guidance: + comments: - This is an open text field to provide any treatments that cannot be captured in the provided slots. - - Values entered here will be used to determine potential new slots. +# - Values entered here will be used to determine potential new slots. start_date_inc: name: start_date_inc description: Date the incubation was started. Only relevant for incubation samples. title: incubation start date notes: - - MIxS collection_date accepts (truncated) ISO8601. DH taking arbitrary precision date only + - MIxS collection_date accepts (truncated) ISO8601. DH taking arbitrary precision date only comments: - - Date should be formatted as YYYY(-MM(-DD)). Ie, 2021-04-15, 2021-04 and 2021 - are all acceptable. + - Date should be formatted as YYYY(-MM(-DD)). Ie, 2021-04-15, 2021-04 and 2021 + are all acceptable. examples: - - value: 2021-04-15, 2021-04 and 2021 are all acceptable. + - value: 2021-04-15, 2021-04 and 2021 are all acceptable. from_schema: https://example.com/nmdc_dh see_also: - - MIxS:collection_date + - MIxS:collection_date rank: 4 string_serialization: '{date, arbitrary precision}' slot_group: MIxS Inspired @@ -264,15 +260,15 @@ slots: description: Time the incubation was started. Only relevant for incubation samples. title: incubation start time, GMT notes: - - MIxS collection_date accepts (truncated) ISO8601. DH taking seconds optional - time only + - MIxS collection_date accepts (truncated) ISO8601. DH taking seconds optional + time only comments: - - 'Time should be entered as HH:MM(:SS) in GMT. See here for a converter: https://www.worldtimebuddy.com/pst-to-gmt-converter' + - 'Time should be entered as HH:MM(:SS) in GMT. See here for a converter: https://www.worldtimebuddy.com/pst-to-gmt-converter' examples: - - value: 13:33 or 13:33:55 + - value: 13:33 or 13:33:55 from_schema: https://example.com/nmdc_dh see_also: - - MIxS:collection_date + - MIxS:collection_date rank: 5 string_serialization: '{time, seconds optional}' slot_group: MIxS Inspired From c097bfdbf5feae5b1895deb3079f4d2b27e02edf Mon Sep 17 00:00:00 2001 From: Montana Date: Tue, 20 Dec 2022 16:06:36 -0800 Subject: [PATCH 06/12] removed redundancy in fire: slot usage --- src/schema/nmdc.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 0d2c24b2cf..78c4f98027 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -536,8 +536,6 @@ classes: expected_value: tag: expected_value value: date string - is_a: attribute - multivalued: false range: string flooding: annotations: From 7b0d930b172bf1fc473302f1a7bc80cb996ea4ae Mon Sep 17 00:00:00 2001 From: Montana Date: Fri, 23 Dec 2022 13:36:15 -0800 Subject: [PATCH 07/12] added examples and slot updates --- src/schema/nmdc.yaml | 198 ++++++++++++++++++++++++++- src/schema/portal/mixs_inspired.yaml | 10 +- 2 files changed, 204 insertions(+), 4 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 78c4f98027..e2f4d4aa27 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -496,7 +496,6 @@ classes: syntax: "{id_nmdc_prefix}:{id_typecode_biosample}-{id_shoulder}-{id_blade}{id_version}{id_locus}" interpolated: true partial_match: false - gold_biosample_identifiers: description: Unique identifier for a biosample submitted to GOLD that matches the NMDC submitted biosample comments: This is the ID provided by GOLD that starts with 'GB' @@ -508,7 +507,6 @@ classes: alternative_identifiers: description: Unique identifier for a biosample submitted to additional resources. Matches the entity that has been submitted to NMDC required: false - lat_lon: required: false description: This is currently a required field but it's not clear if this @@ -537,6 +535,8 @@ classes: tag: expected_value value: date string range: string + todos: + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot flooding: annotations: expected_value: @@ -551,6 +551,8 @@ classes: - is "to" acceptable? Is there a better way to request that be written? - What about if the "day" isn't known? Is this ok? range: string + todos: + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot extreme_event: examples: - value: 1980-05-18, volcanic eruption @@ -559,6 +561,198 @@ classes: tag: expected_value value: date, string range: string + todos: + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot + slope_aspect: + examples: + - value: 35 + comments: + - Aspect is the orientation of slope, measured clockwise in degrees from 0 to 360, where 0 is north-facing, 90 is east-facing, 180 is south-facing, and 270 is west-facing. + description: The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees). + - This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration. + todos: + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot + slope_gradient: + examples: + - value: 10% + - value: 10 % + - value: 0.10 + todos: + - Slope is a percent. How does the validation work? Check to correct examples + + al_sat: + title: aluminum saturation/ extreme unusual properties + description: The relative abundance of aluminum in the sample + comments: + - Aluminum saturation is the percentage of the CEC occupies by aluminum. Like all cations, aluminum held by the cation exchange complex is in equilibrium with aluminum in the soil solution. + examples: + - value: 27% + todos: + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot + - Example & validation. Can we make rules that 27% & 27 % & 0.27 will validate? + - I think it's weird the way GSC writes the title. I recommend this change. Thoughts? I would argue this isn't an extreme unusual property. It's just a biogeochemical measurement. + al_sat_meth: + title: aluminum saturation method/ extreme unusual properties + description: Reference or method used in determining Aluminum saturation + comments: Required when aluminum saturation is provided. + examples: + - value: https://doi.org/10.1371/journal.pone.0176357 + todos: + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot + - I think it's weird the way GSC writes the title. I recommend this change. Thoughts? + annual_precpt: + examples: + - value: 8.94 inch + todos: + - This is no longer matching the listed IRI from GSC, added example. When NMDC has its own slots, map this to the MIxS slot + cur_vegetation: + description: Vegetation classification from one or more standard classification + systems, or agricultural crop + comments: + - Values provided here can be specific species of vegetation or vegetation regions + - See for vegetation regions- https://education.nationalgeographic.org/resource/vegetation-region + examples: + - value: deciduous forest + - values: forest + - values: Bauhinia variegata + todos: + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot + - Recommend changing this from text value to some king of ontology? + cur_vegetation_meth: + comments: + - Required when current vegetation is provided. + examples: + - value: https://doi.org/10.1111/j.1654-109X.2011.01154.x + todos: + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot + - I'm not sure this is a DOI, PMID, or URI. Should pool the community and find out how they accomplish this if provided. + heavy_metals: + title: heavy metals/ extreme unusual properties + description: Heavy metals present in the sample and their concentrations. + comments: + - For multiple heavy metals and concentrations, separate by ; + notes: + - Changed to multi-valued. In MIxS, you add another column to denote multiple heavy metals. We don't have that ability in the submission portal. + examples: + - value: mercury 0.09 micrograms per gram + - value: mercury 0.09 ug/g; chromium 0.03 ug/g + multivalued: true + todos: + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot + - Example & validation. Can we make rules that 27% & 27 % & 0.27 will validate? + - I think it's weird the way GSC writes the title. I recommend this change. Thoughts? I would argue this isn't an extreme unusual property. It's just a biogeochemical measurement. + heavy_metals_meth: + title: heavy metals method/ extreme unusual properties + multivalued: true + comments: + - Required when heavy metals are provided + - If different methods are used for multiple metals, indicate the metal and method. Separate metals by ; + examples: + - values: https://doi.org/10.3390/ijms9040434 + - values: mercury https://doi.org/10.1007/BF01056090; chromium https://doi.org/10.1007/s00216-006-0322-8 + todos: + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot + season_precpt: + title: average seasonal precipitation + guidance: + - Seasons are defined as spring (March, April, May), summer (June, July, August), autumn (September, October, November) and winter (December, January, February). + examples: + - value: 0.4 inch + - value: 10.16 mm + notes: + - mean and average are the same thing, but it seems like bad practice to not be consistent. Changed mean to average + todos: + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot + - check validation & examples. always mm? so value only? Or value + unit + water_cont_soil_meth: + comments: + - Required if providing water content + examples: + - value: J. Nat. Prod. Plant Resour., 2012, 2 (4):500-503 + - values: https://dec.alaska.gov/applications/spar/webcalc/definitions.htm + todos: + - Why is it soil water content method in the name but not the title? Is this slot used in other samples? + - Soil water content can be measure MANY ways and often, multiple ways are used in one experiment (gravimetric water content and water holding capacity and water filled pore space, to name a few). + - Should this be multi valued? How to we manage and validate this? + water_content: + annotations: + expected_value: + tag: expected_value + value: string + preferred_unit: + tag: preferred_unit + #value: gram per gram or cubic centimeter per cubic centimeter + #multivalued: true + range: string + examples: + - value: 75% + - value: 75 % + - value: 0.75 g water/g dry soil + - value: 75% water holding capacity + - value: 1.1 g fresh weight/ dry weight + - value: 10% water filled pore space + todos: + - value in preferred unit is too limiting. need to change this + - check and correct validation so examples are accepted + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot + - how to manage multiple water content methods? + ph_meth: + examples: + - value: https://www.southernlabware.com/pc9500-benchtop-ph-conductivity-meter-kit-ph-accuracy-2000mv-ph-range-2-000-to-20-000.html?gclid=Cj0KCQiAwJWdBhCYARIsAJc4idCO5vtvbVMf545fcvdROFqa6zjzNSoywNx6K4k9Coo9cCc2pybtvGsaAiR0EALw_wcB + - value: https://doi.org/10.2136/sssabookser5.3.c16 + comments: + - This can include a link to the instrument used or a citation for the method. + tot_carb: + examples: + - values: 1 ug/L + todos: + - is this inorganic and organic? both? could use some clarification. + - ug/L doesn't seem like the right units. Should check this slots usage in databases and re-evaluate. I couldn't find any references that provided this data in this format + + + tot_nitro_cont_meth: + examples: + - value: https://doi.org/10.2134/agronmonogr9.2.c32 + - value: https://acsess.onlinelibrary.wiley.com/doi/full/10.2136/sssaj2009.0389?casa_token=bm0pYIUdNMgAAAAA%3AOWVRR0STHaOe-afTcTdxn5m1hM8n2ltM0wY-b1iYpYdD9dhwppk5j3LvC2IO5yhOIvyLVeQz4NZRCZo + tot_nitro_content: + examples: + - value: 5 mg N/ L + tot_org_c_meth: + examples: + - value: https://doi.org/10.1080/07352680902776556 + tot_org_carb: + examples: + - value: 5 mg N/ L + todos: + - check description. How are they different? + salinity_meth: + -examples: + - value: https://doi.org/10.1007/978-1-61779-986-0_28 + sieving: + guidance: + - Describe how samples were composited or sieved. + - Use 'sample link' to indicate which samples were combined. + examples: + - value: combined 2 cores | 4mm sieved + - value: 4 mm sieved and homogenized + - value: 50 g | 5 cores | 2 mm sieved + todos: + - check validation and examples + climate_environment: + todos: + - description says "can include multiple climates" but multivalued is set to false + - add examples, i need to see some examples to add correctly formatted example. + gaseous_environment: + todos: + - would like to see usage examples for this slot. Requiring micromole/L seems too limiting and doesn't match expected_value value + - did I do this right? keep the example that's provided and add another? soas to not override + examples: + - value: CO2; 500ppm above ambient; constant + - value: nitric oxide;0.5 micromole per liter;R2/2018-05-11T14:30/2018-05-11T19:30/P1H30M + watering_regm: + examples: + - value: 1 liter;R2/2018-05-11T14:30/2018-05-11T19:30/P1H30M + - value: 75% water holding capacity; constant id_prefixes: - GOLD diff --git a/src/schema/portal/mixs_inspired.yaml b/src/schema/portal/mixs_inspired.yaml index c7005eec58..d32c1e5f50 100644 --- a/src/schema/portal/mixs_inspired.yaml +++ b/src/schema/portal/mixs_inspired.yaml @@ -84,7 +84,11 @@ slots: slot_group: MIxS Inspired recommended: true comments: - - This is open text to describe your sample. Information put here will be used to consider additional slots/terms that should be provided for collection. + - This slot accepts open-ended text about your sample. + - We recommend using key:value pairs. + - Provided pairs will be considered for inclusion as future slots/terms in this data collection template. + examples: + - value: "experimental treatment: value" filter_method: name: filter_method description: Type of filter used or how the sample was filtered @@ -121,7 +125,7 @@ slots: recommended: true micro_biomass_c_meth: name: micro_biomass_c_meth - description: Reference or method used in determining microbial biomass + description: Reference or method used in determining microbial biomass carbon examples: - value: https://doi.org/10.1016/0038-0717(87)90052-6 - value: https://doi.org/10.1016/0038-0717(87)90052-6 | https://www.sciencedirect.com/science/article/abs/pii/0038071787900526 @@ -135,6 +139,8 @@ slots: string_serialization: '{PMID}|{DOI}|{URL}' slot_group: MIxS Inspired recommended: true + todos: + - How should we separate values? | or ;? lets be consistent micro_biomass_n_meth: name: micro_biomass_n_meth description: Reference or method used in determining microbial biomass nitrogen From 5eeaef19aa997fec2a78f754bd63b067f1a0dde1 Mon Sep 17 00:00:00 2001 From: Montana Date: Fri, 23 Dec 2022 13:56:17 -0800 Subject: [PATCH 08/12] commented out water_content, need help --- src/schema/nmdc.yaml | 161 +++++++++++++++++++++---------------------- 1 file changed, 78 insertions(+), 83 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index e2f4d4aa27..649e2ff52d 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -530,13 +530,12 @@ classes: - value: 1871-10-01 to 1871-10-31 todos: - is "to" acceptable? Is there a better way to request that be written? + - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot annotations: expected_value: tag: expected_value value: date string range: string - todos: - - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot flooding: annotations: expected_value: @@ -550,9 +549,8 @@ classes: todos: - is "to" acceptable? Is there a better way to request that be written? - What about if the "day" isn't known? Is this ok? - range: string - todos: - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot + range: string extreme_event: examples: - value: 1980-05-18, volcanic eruption @@ -579,7 +577,6 @@ classes: - value: 0.10 todos: - Slope is a percent. How does the validation work? Check to correct examples - al_sat: title: aluminum saturation/ extreme unusual properties description: The relative abundance of aluminum in the sample @@ -613,8 +610,8 @@ classes: - See for vegetation regions- https://education.nationalgeographic.org/resource/vegetation-region examples: - value: deciduous forest - - values: forest - - values: Bauhinia variegata + - value: forest + - value: Bauhinia variegata todos: - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot - Recommend changing this from text value to some king of ontology? @@ -648,13 +645,13 @@ classes: - Required when heavy metals are provided - If different methods are used for multiple metals, indicate the metal and method. Separate metals by ; examples: - - values: https://doi.org/10.3390/ijms9040434 - - values: mercury https://doi.org/10.1007/BF01056090; chromium https://doi.org/10.1007/s00216-006-0322-8 + - value: https://doi.org/10.3390/ijms9040434 + - value: mercury https://doi.org/10.1007/BF01056090; chromium https://doi.org/10.1007/s00216-006-0322-8 todos: - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot season_precpt: title: average seasonal precipitation - guidance: + comments: - Seasons are defined as spring (March, April, May), summer (June, July, August), autumn (September, October, November) and winter (December, January, February). examples: - value: 0.4 inch @@ -669,90 +666,88 @@ classes: - Required if providing water content examples: - value: J. Nat. Prod. Plant Resour., 2012, 2 (4):500-503 - - values: https://dec.alaska.gov/applications/spar/webcalc/definitions.htm + - value: https://dec.alaska.gov/applications/spar/webcalc/definitions.htm todos: - Why is it soil water content method in the name but not the title? Is this slot used in other samples? - Soil water content can be measure MANY ways and often, multiple ways are used in one experiment (gravimetric water content and water holding capacity and water filled pore space, to name a few). - Should this be multi valued? How to we manage and validate this? water_content: - annotations: - expected_value: - tag: expected_value - value: string - preferred_unit: - tag: preferred_unit - #value: gram per gram or cubic centimeter per cubic centimeter - #multivalued: true +# annotations: +# expected_value: +# tag: expected_value +# value: string +# preferred_unit: +# tag: preferred_unit +# #value: gram per gram or cubic centimeter per cubic centimeter +# #multivalued: true range: string - examples: - - value: 75% - - value: 75 % - - value: 0.75 g water/g dry soil - - value: 75% water holding capacity - - value: 1.1 g fresh weight/ dry weight - - value: 10% water filled pore space +# examples: +# - value: 75% +# - value: 75 % +# - value: 0.75 g water/g dry soil +# - value: 75% water holding capacity +# - value: 1.1 g fresh weight/ dry weight +# - value: 10% water filled pore space todos: - value in preferred unit is too limiting. need to change this - check and correct validation so examples are accepted - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot - how to manage multiple water content methods? - ph_meth: - examples: - - value: https://www.southernlabware.com/pc9500-benchtop-ph-conductivity-meter-kit-ph-accuracy-2000mv-ph-range-2-000-to-20-000.html?gclid=Cj0KCQiAwJWdBhCYARIsAJc4idCO5vtvbVMf545fcvdROFqa6zjzNSoywNx6K4k9Coo9cCc2pybtvGsaAiR0EALw_wcB - - value: https://doi.org/10.2136/sssabookser5.3.c16 - comments: - - This can include a link to the instrument used or a citation for the method. - tot_carb: - examples: - - values: 1 ug/L - todos: - - is this inorganic and organic? both? could use some clarification. - - ug/L doesn't seem like the right units. Should check this slots usage in databases and re-evaluate. I couldn't find any references that provided this data in this format - - - tot_nitro_cont_meth: - examples: - - value: https://doi.org/10.2134/agronmonogr9.2.c32 - - value: https://acsess.onlinelibrary.wiley.com/doi/full/10.2136/sssaj2009.0389?casa_token=bm0pYIUdNMgAAAAA%3AOWVRR0STHaOe-afTcTdxn5m1hM8n2ltM0wY-b1iYpYdD9dhwppk5j3LvC2IO5yhOIvyLVeQz4NZRCZo - tot_nitro_content: - examples: - - value: 5 mg N/ L - tot_org_c_meth: - examples: - - value: https://doi.org/10.1080/07352680902776556 - tot_org_carb: - examples: - - value: 5 mg N/ L - todos: - - check description. How are they different? - salinity_meth: - -examples: - - value: https://doi.org/10.1007/978-1-61779-986-0_28 - sieving: - guidance: - - Describe how samples were composited or sieved. - - Use 'sample link' to indicate which samples were combined. - examples: - - value: combined 2 cores | 4mm sieved - - value: 4 mm sieved and homogenized - - value: 50 g | 5 cores | 2 mm sieved - todos: - - check validation and examples - climate_environment: - todos: - - description says "can include multiple climates" but multivalued is set to false - - add examples, i need to see some examples to add correctly formatted example. - gaseous_environment: - todos: - - would like to see usage examples for this slot. Requiring micromole/L seems too limiting and doesn't match expected_value value - - did I do this right? keep the example that's provided and add another? soas to not override - examples: - - value: CO2; 500ppm above ambient; constant - - value: nitric oxide;0.5 micromole per liter;R2/2018-05-11T14:30/2018-05-11T19:30/P1H30M - watering_regm: - examples: - - value: 1 liter;R2/2018-05-11T14:30/2018-05-11T19:30/P1H30M - - value: 75% water holding capacity; constant + #ph_meth: +# examples: +# - value: https://www.southernlabware.com/pc9500-benchtop-ph-conductivity-meter-kit-ph-accuracy-2000mv-ph-range-2-000-to-20-000.html?gclid=Cj0KCQiAwJWdBhCYARIsAJc4idCO5vtvbVMf545fcvdROFqa6zjzNSoywNx6K4k9Coo9cCc2pybtvGsaAiR0EALw_wcB +# - value: https://doi.org/10.2136/sssabookser5.3.c16 +# comments: +# - This can include a link to the instrument used or a citation for the method. +# tot_carb: +# examples: +# - value: 1 ug/L +# todos: +# - is this inorganic and organic? both? could use some clarification. +# - ug/L doesn't seem like the right units. Should check this slots usage in databases and re-evaluate. I couldn't find any references that provided this data in this format +# tot_nitro_cont_meth: +# examples: +# - value: https://doi.org/10.2134/agronmonogr9.2.c32 +# - value: https://acsess.onlinelibrary.wiley.com/doi/full/10.2136/sssaj2009.0389?casa_token=bm0pYIUdNMgAAAAA%3AOWVRR0STHaOe-afTcTdxn5m1hM8n2ltM0wY-b1iYpYdD9dhwppk5j3LvC2IO5yhOIvyLVeQz4NZRCZo +# tot_nitro_content: +# examples: +# - value: 5 mg N/ L +# tot_org_c_meth: +# examples: +# - value: https://doi.org/10.1080/07352680902776556 +# tot_org_carb: +# examples: +# - value: 5 mg N/ L +# todos: +# - check description. How are they different? +# salinity_meth: +# -examples: +# - value: https://doi.org/10.1007/978-1-61779-986-0_28 +# sieving: +# comments: +# - Describe how samples were composited or sieved. +# - Use 'sample link' to indicate which samples were combined. +# examples: +# - value: combined 2 cores | 4mm sieved +# - value: 4 mm sieved and homogenized +# - value: 50 g | 5 cores | 2 mm sieved +# todos: +# - check validation and examples +# climate_environment: +# todos: +# - description says "can include multiple climates" but multivalued is set to false +# - add examples, i need to see some examples to add correctly formatted example. +# gaseous_environment: +# todos: +# - would like to see usage examples for this slot. Requiring micromole/L seems too limiting and doesn't match expected_value value +# - did I do this right? keep the example that's provided and add another? soas to not override +# examples: +# - value: CO2; 500ppm above ambient; constant +# - value: nitric oxide;0.5 micromole per liter;R2/2018-05-11T14:30/2018-05-11T19:30/P1H30M +# watering_regm: +# examples: +# - value: 1 liter;R2/2018-05-11T14:30/2018-05-11T19:30/P1H30M +# - value: 75% water holding capacity; constant id_prefixes: - GOLD From c32bf05328baf4059be54f983d8275ae598b81cb Mon Sep 17 00:00:00 2001 From: Montana Date: Fri, 23 Dec 2022 14:00:28 -0800 Subject: [PATCH 09/12] make all corrections. finished examples. --- src/schema/nmdc.yaml | 110 +++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 649e2ff52d..212fdc7285 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -693,61 +693,61 @@ classes: - check and correct validation so examples are accepted - This is no longer matching the listed IRI from GSC. When NMDC has its own slots, map this to the MIxS slot - how to manage multiple water content methods? - #ph_meth: -# examples: -# - value: https://www.southernlabware.com/pc9500-benchtop-ph-conductivity-meter-kit-ph-accuracy-2000mv-ph-range-2-000-to-20-000.html?gclid=Cj0KCQiAwJWdBhCYARIsAJc4idCO5vtvbVMf545fcvdROFqa6zjzNSoywNx6K4k9Coo9cCc2pybtvGsaAiR0EALw_wcB -# - value: https://doi.org/10.2136/sssabookser5.3.c16 -# comments: -# - This can include a link to the instrument used or a citation for the method. -# tot_carb: -# examples: -# - value: 1 ug/L -# todos: -# - is this inorganic and organic? both? could use some clarification. -# - ug/L doesn't seem like the right units. Should check this slots usage in databases and re-evaluate. I couldn't find any references that provided this data in this format -# tot_nitro_cont_meth: -# examples: -# - value: https://doi.org/10.2134/agronmonogr9.2.c32 -# - value: https://acsess.onlinelibrary.wiley.com/doi/full/10.2136/sssaj2009.0389?casa_token=bm0pYIUdNMgAAAAA%3AOWVRR0STHaOe-afTcTdxn5m1hM8n2ltM0wY-b1iYpYdD9dhwppk5j3LvC2IO5yhOIvyLVeQz4NZRCZo -# tot_nitro_content: -# examples: -# - value: 5 mg N/ L -# tot_org_c_meth: -# examples: -# - value: https://doi.org/10.1080/07352680902776556 -# tot_org_carb: -# examples: -# - value: 5 mg N/ L -# todos: -# - check description. How are they different? -# salinity_meth: -# -examples: -# - value: https://doi.org/10.1007/978-1-61779-986-0_28 -# sieving: -# comments: -# - Describe how samples were composited or sieved. -# - Use 'sample link' to indicate which samples were combined. -# examples: -# - value: combined 2 cores | 4mm sieved -# - value: 4 mm sieved and homogenized -# - value: 50 g | 5 cores | 2 mm sieved -# todos: -# - check validation and examples -# climate_environment: -# todos: -# - description says "can include multiple climates" but multivalued is set to false -# - add examples, i need to see some examples to add correctly formatted example. -# gaseous_environment: -# todos: -# - would like to see usage examples for this slot. Requiring micromole/L seems too limiting and doesn't match expected_value value -# - did I do this right? keep the example that's provided and add another? soas to not override -# examples: -# - value: CO2; 500ppm above ambient; constant -# - value: nitric oxide;0.5 micromole per liter;R2/2018-05-11T14:30/2018-05-11T19:30/P1H30M -# watering_regm: -# examples: -# - value: 1 liter;R2/2018-05-11T14:30/2018-05-11T19:30/P1H30M -# - value: 75% water holding capacity; constant + ph_meth: + examples: + - value: https://www.southernlabware.com/pc9500-benchtop-ph-conductivity-meter-kit-ph-accuracy-2000mv-ph-range-2-000-to-20-000.html?gclid=Cj0KCQiAwJWdBhCYARIsAJc4idCO5vtvbVMf545fcvdROFqa6zjzNSoywNx6K4k9Coo9cCc2pybtvGsaAiR0EALw_wcB + - value: https://doi.org/10.2136/sssabookser5.3.c16 + comments: + - This can include a link to the instrument used or a citation for the method. + tot_carb: + examples: + - value: 1 ug/L + todos: + - is this inorganic and organic? both? could use some clarification. + - ug/L doesn't seem like the right units. Should check this slots usage in databases and re-evaluate. I couldn't find any references that provided this data in this format + tot_nitro_cont_meth: + examples: + - value: https://doi.org/10.2134/agronmonogr9.2.c32 + - value: https://acsess.onlinelibrary.wiley.com/doi/full/10.2136/sssaj2009.0389?casa_token=bm0pYIUdNMgAAAAA%3AOWVRR0STHaOe-afTcTdxn5m1hM8n2ltM0wY-b1iYpYdD9dhwppk5j3LvC2IO5yhOIvyLVeQz4NZRCZo + tot_nitro_content: + examples: + - value: 5 mg N/ L + tot_org_c_meth: + examples: + - value: https://doi.org/10.1080/07352680902776556 + tot_org_carb: + examples: + - value: 5 mg N/ L + todos: + - check description. How are they different? + salinity_meth: + examples: + - value: https://doi.org/10.1007/978-1-61779-986-0_28 + sieving: + comments: + - Describe how samples were composited or sieved. + - Use 'sample link' to indicate which samples were combined. + examples: + - value: combined 2 cores | 4mm sieved + - value: 4 mm sieved and homogenized + - value: 50 g | 5 cores | 2 mm sieved + todos: + - check validation and examples + climate_environment: + todos: + - description says "can include multiple climates" but multivalued is set to false + - add examples, i need to see some examples to add correctly formatted example. + gaseous_environment: + todos: + - would like to see usage examples for this slot. Requiring micromole/L seems too limiting and doesn't match expected_value value + - did I do this right? keep the example that's provided and add another? soas to not override + examples: + - value: CO2; 500ppm above ambient; constant + - value: nitric oxide;0.5 micromole per liter;R2/2018-05-11T14:30/2018-05-11T19:30/P1H30M + watering_regm: + examples: + - value: 1 liter;R2/2018-05-11T14:30/2018-05-11T19:30/P1H30M + - value: 75% water holding capacity; constant id_prefixes: - GOLD From aecbabc1ff47d1ccba46293e75baa9f01b80a124 Mon Sep 17 00:00:00 2001 From: "Mark A. Miller" Date: Tue, 24 Jan 2023 15:16:51 -0500 Subject: [PATCH 10/12] #382 validation pattern for fire --- Makefile | 6 ++-- src/schema/nmdc.yaml | 1 + .../minimal_biosample_invalid_fire.json | 29 +++++++++++++++++ .../minimal_biosample_with_fire_test.json | 29 +++++++++++++++++ test/data/minimal_valid_biosample_test.json | 32 +++++++++++++++++++ .../minimal_valid_biosample_with_fire.json | 29 +++++++++++++++++ 6 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 test/data/invalid_data/minimal_biosample_invalid_fire.json create mode 100644 test/data/invalid_data/minimal_biosample_with_fire_test.json create mode 100644 test/data/minimal_valid_biosample_test.json create mode 100644 test/data/minimal_valid_biosample_with_fire.json diff --git a/Makefile b/Makefile index 9874e57bb6..47eec168d8 100644 --- a/Makefile +++ b/Makefile @@ -225,7 +225,8 @@ SCHEMA_TEST_EXAMPLES := \ nmdc_example_database \ samp_prep_db \ study_credit_test \ - study_test + study_test \ + minimal_valid_biosample_with_fire SCHEMA_TEST_EXAMPLES_INVALID := \ biosample_invalid_range \ @@ -233,7 +234,8 @@ SCHEMA_TEST_EXAMPLES_INVALID := \ biosample_missing_required_field \ biosample_single_multi_value_mixup \ biosample_undeclared_slot \ - study_credit_enum_mangle + study_credit_enum_mangle \ + minimal_biosample_invalid_fire # functional_annotation_set_invalid has invalid ID pattern but regex tests aren't applied yet? MAM 2021-06-24 diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 212fdc7285..669d0d94af 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -536,6 +536,7 @@ classes: tag: expected_value value: date string range: string + pattern: ^[12]\d{3}(?:(?:-(?:0[1-9]|1[0-2]))(?:-(?:0[1-9]|[12]\d|3[01]))?)?(\s+to\s+[12]\d{3}(?:(?:-(?:0[1-9]|1[0-2]))(?:-(?:0[1-9]|[12]\d|3[01]))?)?)?$ flooding: annotations: expected_value: diff --git a/test/data/invalid_data/minimal_biosample_invalid_fire.json b/test/data/invalid_data/minimal_biosample_invalid_fire.json new file mode 100644 index 0000000000..c61c3a94d0 --- /dev/null +++ b/test/data/invalid_data/minimal_biosample_invalid_fire.json @@ -0,0 +1,29 @@ +{ + "biosample_set": [ + { + "id": "nmdc:bsm-99-dtTMNb", + "fire": "like a volcano", + "part_of": [ + "gold:Gs0110115" + ], + "env_broad_scale": { + "has_raw_value": "ENVO:00002030", + "term": { + "id": "ENVO:00002030" + } + }, + "env_local_scale": { + "has_raw_value": "ENVO:00002169", + "term": { + "id": "ENVO:00002169" + } + }, + "env_medium": { + "has_raw_value": "ENVO:00005792", + "term": { + "id": "ENVO:00005792" + } + } + } + ] +} \ No newline at end of file diff --git a/test/data/invalid_data/minimal_biosample_with_fire_test.json b/test/data/invalid_data/minimal_biosample_with_fire_test.json new file mode 100644 index 0000000000..391dd4a49e --- /dev/null +++ b/test/data/invalid_data/minimal_biosample_with_fire_test.json @@ -0,0 +1,29 @@ +{ + "biosample_set": [ + { + "id": "nmdc:bsm-99-dtTMNb", + "fire": "1871-10-01 to 1871-10-31", + "part_of": [ + "gold:Gs0110115" + ], + "env_broad_scale": { + "has_raw_value": "ENVO:00002030", + "term": { + "id": "ENVO:00002030" + } + }, + "env_local_scale": { + "has_raw_value": "ENVO:00002169", + "term": { + "id": "ENVO:00002169" + } + }, + "env_medium": { + "has_raw_value": "ENVO:00005792", + "term": { + "id": "ENVO:00005792" + } + } + } + ] +} \ No newline at end of file diff --git a/test/data/minimal_valid_biosample_test.json b/test/data/minimal_valid_biosample_test.json new file mode 100644 index 0000000000..4ce6772b90 --- /dev/null +++ b/test/data/minimal_valid_biosample_test.json @@ -0,0 +1,32 @@ +// 20230124150223 +// https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/test/data/minimal_valid_biosample_test.json + +{ + "biosample_set": [ + { + "id": "nmdc:bsm-99-dtTMNb", + "fire": "1871-10-01 to 1871-10-31", + "part_of": [ + "gold:Gs0110115" + ], + "env_broad_scale": { + "has_raw_value": "ENVO:00002030", + "term": { + "id": "ENVO:00002030" + } + }, + "env_local_scale": { + "has_raw_value": "ENVO:00002169", + "term": { + "id": "ENVO:00002169" + } + }, + "env_medium": { + "has_raw_value": "ENVO:00005792", + "term": { + "id": "ENVO:00005792" + } + } + } + ] +} \ No newline at end of file diff --git a/test/data/minimal_valid_biosample_with_fire.json b/test/data/minimal_valid_biosample_with_fire.json new file mode 100644 index 0000000000..391dd4a49e --- /dev/null +++ b/test/data/minimal_valid_biosample_with_fire.json @@ -0,0 +1,29 @@ +{ + "biosample_set": [ + { + "id": "nmdc:bsm-99-dtTMNb", + "fire": "1871-10-01 to 1871-10-31", + "part_of": [ + "gold:Gs0110115" + ], + "env_broad_scale": { + "has_raw_value": "ENVO:00002030", + "term": { + "id": "ENVO:00002030" + } + }, + "env_local_scale": { + "has_raw_value": "ENVO:00002169", + "term": { + "id": "ENVO:00002169" + } + }, + "env_medium": { + "has_raw_value": "ENVO:00005792", + "term": { + "id": "ENVO:00005792" + } + } + } + ] +} \ No newline at end of file From cb59bca9eb17fe8cd6dbae05f0912a5a9a77e6e4 Mon Sep 17 00:00:00 2001 From: "Mark A. Miller" Date: Tue, 24 Jan 2023 15:29:36 -0500 Subject: [PATCH 11/12] addresses many #382 subtasks --- jsonschema/nmdc.schema.json | 2 +- src/schema/portal/emsl.yaml | 2 +- .../minimal_biosample_invalid_fire.json | 2 +- .../minimal_biosample_with_fire_test.json | 29 ------------------- test/data/minimal_valid_biosample_test.json | 5 +--- .../minimal_valid_biosample_with_fire.json | 2 +- 6 files changed, 5 insertions(+), 37 deletions(-) delete mode 100644 test/data/invalid_data/minimal_biosample_with_fire_test.json diff --git a/jsonschema/nmdc.schema.json b/jsonschema/nmdc.schema.json index 274bf4551c..f141be2ca8 100644 --- a/jsonschema/nmdc.schema.json +++ b/jsonschema/nmdc.schema.json @@ -1 +1 @@ -{"$defs": {"Activity": {"additionalProperties": false, "description": "a provence-generating activity", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"$ref": "#/$defs/Agent"}, "was_informed_by": {"type": "string"}}, "required": ["id"], "title": "Activity", "type": "object"}, "Agent": {"additionalProperties": false, "description": "a provence-generating agent", "properties": {"acted_on_behalf_of": {"$ref": "#/$defs/Agent"}, "was_informed_by": {"type": "string"}}, "title": "Agent", "type": "object"}, "AnalysisTypeEnum": {"description": "", "enum": ["metabolomics", "metagenomics", "metaproteomics", "metatranscriptomics", "natural organic matter"], "title": "AnalysisTypeEnum", "type": "string"}, "AnalyticalSample": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "AnalyticalSample", "type": "object"}, "ArchStrucEnum": {"description": "", "enum": ["building", "shed", "home"], "title": "ArchStrucEnum", "type": "string"}, "AttributeValue": {"additionalProperties": false, "description": "The value for any value of a attribute for a sample. This object can hold both the un-normalized atomic value and the structured value", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "AttributeValue", "type": "object"}, "BiolStatEnum": {"description": "", "enum": ["wild", "natural", "semi-natural", "inbred line", "breeder's line", "hybrid", "clonal selection", "mutant"], "title": "BiolStatEnum", "type": "string"}, "Biosample": {"additionalProperties": false, "description": "Biological source material which can be characterized by an experiment.", "properties": {"add_date": {"description": "The date on which the information was added to the database.", "type": "string"}, "agrochem_addition": {"$ref": "#/$defs/QuantityValue", "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications"}, "air_temp_regm": {"$ref": "#/$defs/QuantityValue", "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens"}, "al_sat": {"$ref": "#/$defs/QuantityValue", "description": "Aluminum saturation (esp. For tropical soils)"}, "al_sat_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining Al saturation"}, "alkalinity": {"$ref": "#/$defs/QuantityValue", "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate"}, "alkalinity_method": {"$ref": "#/$defs/TextValue", "description": "Method used for alkalinity measurement"}, "alkyl_diethers": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of alkyl diethers"}, "alt": {"$ref": "#/$defs/QuantityValue", "description": "Altitude is a term used to identify heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air"}, "alternative_identifiers": {"description": "Unique identifier for a biosample submitted to additional resources. Matches the entity that has been submitted to NMDC", "items": {"type": "string"}, "type": "array"}, "aminopept_act": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of aminopeptidase activity"}, "ammonium": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of ammonium in the sample"}, "ammonium_nitrogen": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of ammonium nitrogen in the sample"}, "analysis_type": {"description": "Select all the data types associated or available for this biosample", "items": {"$ref": "#/$defs/AnalysisTypeEnum"}, "type": "array"}, "annual_precpt": {"$ref": "#/$defs/QuantityValue", "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps."}, "annual_temp": {"$ref": "#/$defs/QuantityValue", "description": "Mean annual temperature"}, "bacteria_carb_prod": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of bacterial carbon production"}, "biosample_categories": {"items": {"$ref": "#/$defs/BiosampleCategoryEnum"}, "type": "array"}, "biotic_regm": {"$ref": "#/$defs/TextValue", "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi."}, "biotic_relationship": {"$ref": "#/$defs/TextValue", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object"}, "bishomohopanol": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of bishomohopanol"}, "bromide": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of bromide"}, "calcium": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of calcium in the sample"}, "carb_nitro_ratio": {"$ref": "#/$defs/QuantityValue", "description": "Ratio of amount or concentrations of carbon to nitrogen"}, "chem_administration": {"$ref": "#/$defs/ControlledTermValue", "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi"}, "chloride": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of chloride in the sample"}, "chlorophyll": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of chlorophyll"}, "climate_environment": {"$ref": "#/$defs/TextValue", "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates"}, "collected_from": {"description": "The Site from which a Biosample was collected", "type": "string"}, "collection_date": {"$ref": "#/$defs/TimestampValue", "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant"}, "collection_date_inc": {"description": "Date the incubation was harvested/collected/ended. Only relevant for incubation samples.", "type": "string"}, "collection_time": {"description": "The time of sampling, either as an instance (single point) or interval.", "type": "string"}, "collection_time_inc": {"description": "Time the incubation was harvested/collected/ended. Only relevant for incubation samples.", "type": "string"}, "community": {"type": "string"}, "crop_rotation": {"$ref": "#/$defs/TextValue", "description": "Whether or not crop is rotated, and if yes, rotation schedule"}, "cur_land_use": {"$ref": "#/$defs/TextValue", "description": "Present state of sample site"}, "cur_vegetation": {"$ref": "#/$defs/TextValue", "description": "Vegetation classification from one or more standard classification systems, or agricultural crop"}, "cur_vegetation_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in vegetation classification"}, "density": {"$ref": "#/$defs/QuantityValue", "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)"}, "depth": {"$ref": "#/$defs/QuantityValue", "description": "The vertical distance below local surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples."}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "diss_carb_dioxide": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample"}, "diss_hydrogen": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of dissolved hydrogen"}, "diss_inorg_carb": {"$ref": "#/$defs/QuantityValue", "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter"}, "diss_inorg_phosp": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of dissolved inorganic phosphorus in the sample"}, "diss_org_carb": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid"}, "diss_org_nitro": {"$ref": "#/$defs/QuantityValue", "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2"}, "diss_oxygen": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of dissolved oxygen"}, "dna_absorb1": {"description": "260/280 measurement of DNA sample purity", "type": "string"}, "dna_absorb2": {"description": "260/230 measurement of DNA sample purity", "type": "string"}, "dna_collect_site": {"description": "Provide information on the site your DNA sample was collected from", "type": "string"}, "dna_concentration": {"maximum": 2000, "minimum": 0, "type": "string"}, "dna_cont_type": {"$ref": "#/$defs/DnaContTypeEnum", "description": "Tube or plate (96-well)"}, "dna_cont_well": {"type": "string"}, "dna_container_id": {"type": "string"}, "dna_dnase": {"$ref": "#/$defs/DnaDnaseEnum"}, "dna_isolate_meth": {"description": "Describe the method/protocol/kit used to extract DNA/RNA.", "type": "string"}, "dna_organisms": {"description": "List any organisms known or suspected to grow in co-culture, as well as estimated % of the organism in that culture.", "type": "string"}, "dna_project_contact": {"type": "string"}, "dna_samp_id": {"type": "string"}, "dna_sample_format": {"$ref": "#/$defs/DnaSampleFormatEnum", "description": "Solution in which the DNA sample has been suspended"}, "dna_sample_name": {"description": "Give the DNA sample a name that is meaningful to you. Sample names must be unique across all JGI projects and contain a-z, A-Z, 0-9, - and _ only.", "type": "string"}, "dna_seq_project": {"type": "string"}, "dna_seq_project_name": {"type": "string"}, "dna_seq_project_pi": {"type": "string"}, "dna_volume": {"maximum": 1000, "minimum": 0, "type": "string"}, "dnase_rna": {"$ref": "#/$defs/DnaseRnaEnum"}, "drainage_class": {"$ref": "#/$defs/TextValue", "description": "Drainage classification from a standard system such as the USDA system"}, "ecosystem": {"description": "An ecosystem is a combination of a physical environment (abiotic factors) and all the organisms (biotic factors) that interact with this environment. Ecosystem is in position 1/5 in a GOLD path.", "type": "string"}, "ecosystem_category": {"description": "Ecosystem categories represent divisions within the ecosystem based on specific characteristics of the environment from where an organism or sample is isolated. Ecosystem category is in position 2/5 in a GOLD path.", "type": "string"}, "ecosystem_subtype": {"description": "Ecosystem subtypes represent further subdivision of Ecosystem types into more distinct subtypes. Ecosystem subtype is in position 4/5 in a GOLD path.", "type": "string"}, "ecosystem_type": {"description": "Ecosystem types represent things having common characteristics within the Ecosystem Category. These common characteristics based grouping is still broad but specific to the characteristics of a given environment. Ecosystem type is in position 3/5 in a GOLD path.", "type": "string"}, "elev": {"$ref": "#/$defs/QuantityValue", "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit."}, "emsl_biosample_identifiers": {"description": "A list of identifiers for the biosample from the EMSL database. This is used to link the biosample, as modeled by NMDC, to the biosample in the planned EMSL NEXUS database.", "items": {"type": "string"}, "type": "array"}, "env_broad_scale": {"$ref": "#/$defs/ControlledIdentifiedTermValue", "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO\u2019s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS"}, "env_local_scale": {"$ref": "#/$defs/ControlledIdentifiedTermValue", "description": "Report the entity or entities which are in the sample or specimen\u2019s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS."}, "env_medium": {"$ref": "#/$defs/ControlledIdentifiedTermValue", "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)."}, "env_package": {"$ref": "#/$defs/TextValue", "description": "MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported", "pattern": "[air|built environment|host\\-associated|human\\-associated|human\\-skin|human\\-oral|human\\-gut|human\\-vaginal|hydrocarbon resources\\-cores|hydrocarbon resources\\-fluids\\/swabs|microbial mat\\/biofilm|misc environment|plant\\-associated|sediment|soil|wastewater\\/sludge|water]"}, "experimental_factor": {"$ref": "#/$defs/ControlledTermValue", "description": "Experimental factors are essentially the variable aspects of an experiment design which can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI). For a browser of EFO (v 2.95) terms, please see http://purl.bioontology.org/ontology/EFO; for a browser of OBI (v 2018-02-12) terms please see http://purl.bioontology.org/ontology/OBI"}, "experimental_factor_other": {"description": "Other details about your sample that you feel can't be accurately represented in the available columns.", "type": "string"}, "extreme_event": {"$ref": "#/$defs/TimestampValue", "description": "Unusual physical events that may have affected microbial populations"}, "fao_class": {"$ref": "#/$defs/TextValue", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups"}, "filter_method": {"description": "Type of filter used or how the sample was filtered", "type": "string"}, "fire": {"$ref": "#/$defs/TimestampValue", "description": "Historical and/or physical evidence of fire"}, "flooding": {"$ref": "#/$defs/TimestampValue", "description": "Historical and/or physical evidence of flooding"}, "gaseous_environment": {"$ref": "#/$defs/QuantityValue", "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens"}, "geo_loc_name": {"$ref": "#/$defs/TextValue", "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)"}, "glucosidase_act": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of glucosidase activity"}, "gold_biosample_identifiers": {"description": "Unique identifier for a biosample submitted to GOLD that matches the NMDC submitted biosample", "items": {"type": "string"}, "pattern": "^GOLD:Gb[0-9]+$", "type": "array"}, "growth_facil": {"$ref": "#/$defs/ControlledTermValue", "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research"}, "habitat": {"type": "string"}, "heavy_metals": {"$ref": "#/$defs/QuantityValue", "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field."}, "heavy_metals_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining heavy metals"}, "host_name": {"type": "string"}, "humidity_regm": {"$ref": "#/$defs/QuantityValue", "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens"}, "id": {"description": "An NMDC assigned unique identifier for a biosample submitted to NMDC.", "pattern": "^nmdc:", "type": "string"}, "igsn_biosample_identifiers": {"description": "A list of identifiers for the biosample from the IGSN database.", "items": {"type": "string"}, "type": "array"}, "img_identifiers": {"description": "A list of identifiers that relate the biosample to records in the IMG database.", "items": {"type": "string"}, "type": "array"}, "insdc_biosample_identifiers": {"description": "identifiers for corresponding sample in INSDC", "items": {"type": "string"}, "pattern": "^biosample:SAM[NED]([A-Z])?[0-9]+$", "type": "array"}, "isotope_exposure": {"description": "List isotope exposure or addition applied to your sample.", "type": "string"}, "lat_lon": {"$ref": "#/$defs/GeolocationValue", "description": "This is currently a required field but it's not clear if this should be required for human hosts"}, "lbc_thirty": {"$ref": "#/$defs/QuantityValue", "description": "lime buffer capacity, determined after 30 minute incubation"}, "lbceq": {"$ref": "#/$defs/QuantityValue", "description": "lime buffer capacity, determined at equilibrium after 5 day incubation"}, "light_regm": {"$ref": "#/$defs/QuantityValue", "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality."}, "link_addit_analys": {"$ref": "#/$defs/TextValue", "description": "Link to additional analysis results performed on the sample"}, "link_class_info": {"$ref": "#/$defs/TextValue", "description": "Link to digitized soil maps or other soil classification information"}, "link_climate_info": {"$ref": "#/$defs/TextValue", "description": "Link to climate resource"}, "local_class": {"$ref": "#/$defs/TextValue", "description": "Soil classification based on local soil classification system"}, "local_class_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining the local soil classification"}, "location": {"type": "string"}, "magnesium": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of magnesium in the sample"}, "manganese": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of manganese in the sample"}, "mean_frict_vel": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of mean friction velocity"}, "mean_peak_frict_vel": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of mean peak friction velocity"}, "micro_biomass_c_meth": {"description": "Reference or method used in determining microbial biomass", "type": "string"}, "micro_biomass_n_meth": {"description": "Reference or method used in determining microbial biomass nitrogen", "type": "string"}, "microbial_biomass_c": {"description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "type": "string"}, "microbial_biomass_n": {"description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "type": "string"}, "misc_param": {"$ref": "#/$defs/QuantityValue", "description": "Any other measurement performed or parameter collected, that is not listed here"}, "mod_date": {"description": "The last date on which the database information was modified.", "type": "string"}, "n_alkanes": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of n-alkanes; can include multiple n-alkanes"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "ncbi_taxonomy_name": {"type": "string"}, "nitrate": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of nitrate in the sample"}, "nitrate_nitrogen": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of nitrate nitrogen in the sample"}, "nitrite": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of nitrite in the sample"}, "nitrite_nitrogen": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of nitrite nitrogen in the sample"}, "non_microb_biomass": {"description": "Amount of biomass; should include the name for the part of biomass measured, e.g.insect, plant, total. Can include multiple measurements separated by ;", "type": "string"}, "non_microb_biomass_method": {"description": "Reference or method used in determining biomass", "type": "string"}, "org_matter": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of organic matter"}, "org_nitro": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of organic nitrogen"}, "org_nitro_method": {"description": "Method used for obtaining organic nitrogen", "type": "string"}, "organism_count": {"$ref": "#/$defs/QuantityValue", "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)"}, "other_treatment": {"description": "Other treatments applied to your samples that are not applicable to the provided fields", "type": "string"}, "oxy_stat_samp": {"$ref": "#/$defs/TextValue", "description": "Oxygenation status of sample"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "part_org_carb": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of particulate organic carbon"}, "perturbation": {"$ref": "#/$defs/TextValue", "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types"}, "petroleum_hydrocarb": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of petroleum hydrocarbon"}, "ph": {"$ref": "#/$defs/QuantityValue", "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid"}, "ph_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining ph"}, "phaeopigments": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of phaeopigments; can include multiple phaeopigments"}, "phosphate": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of phosphate"}, "phosplipid_fatt_acid": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of phospholipid fatty acids; can include multiple values"}, "pool_dna_extracts": {"$ref": "#/$defs/TextValue", "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given"}, "potassium": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of potassium in the sample"}, "pressure": {"$ref": "#/$defs/QuantityValue", "description": "Pressure to which the sample is subject to, in atmospheres"}, "profile_position": {"$ref": "#/$defs/TextValue", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas"}, "project_id": {"description": "Proposal IDs or names associated with dataset", "type": "string"}, "proport_woa_temperature": {"type": "string"}, "proposal_dna": {"type": "string"}, "proposal_rna": {"type": "string"}, "redox_potential": {"$ref": "#/$defs/QuantityValue", "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential"}, "rel_to_oxygen": {"$ref": "#/$defs/TextValue", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments"}, "replicate_number": {"description": "If sending biological replicates, indicate the rep number here.", "type": "string"}, "rna_absorb1": {"description": "260/280 measurement of RNA sample purity", "type": "string"}, "rna_absorb2": {"description": "260/230 measurement of RNA sample purity", "type": "string"}, "rna_collect_site": {"description": "Provide information on the site your RNA sample was collected from", "type": "string"}, "rna_concentration": {"maximum": 1000, "minimum": 0, "type": "string"}, "rna_cont_type": {"$ref": "#/$defs/RnaContTypeEnum", "description": "Tube or plate (96-well)"}, "rna_cont_well": {"type": "string"}, "rna_container_id": {"type": "string"}, "rna_isolate_meth": {"description": "Describe the method/protocol/kit used to extract DNA/RNA.", "type": "string"}, "rna_organisms": {"description": "List any organisms known or suspected to grow in co-culture, as well as estimated % of the organism in that culture.", "type": "string"}, "rna_project_contact": {"type": "string"}, "rna_samp_id": {"type": "string"}, "rna_sample_format": {"$ref": "#/$defs/RnaSampleFormatEnum", "description": "Solution in which the RNA sample has been suspended"}, "rna_sample_name": {"description": "Give the RNA sample a name that is meaningful to you. Sample names must be unique across all JGI projects and contain a-z, A-Z, 0-9, - and _ only.", "maximum": 2000, "minimum": 0, "type": "string"}, "rna_seq_project": {"type": "string"}, "rna_seq_project_name": {"type": "string"}, "rna_seq_project_pi": {"type": "string"}, "rna_volume": {"type": "string"}, "salinity": {"$ref": "#/$defs/QuantityValue", "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater."}, "salinity_category": {"description": "Categorcial description of the sample's salinity. Examples: halophile, halotolerant, hypersaline, huryhaline", "type": "string"}, "salinity_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining salinity"}, "samp_collec_method": {"description": "The method employed for collecting the sample.", "type": "string"}, "samp_mat_process": {"$ref": "#/$defs/ControlledTermValue", "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed."}, "samp_name": {"description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name.", "type": "string"}, "samp_size": {"$ref": "#/$defs/QuantityValue", "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected."}, "samp_store_dur": {"$ref": "#/$defs/TextValue", "description": "Duration for which the sample was stored"}, "samp_store_loc": {"$ref": "#/$defs/TextValue", "description": "Location at which sample was stored, usually name of a specific freezer/room"}, "samp_store_temp": {"$ref": "#/$defs/QuantityValue", "description": "Temperature at which sample was stored, e.g. -80 degree Celsius"}, "samp_vol_we_dna_ext": {"$ref": "#/$defs/QuantityValue", "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (mixs:0000001)."}, "sample_collection_site": {"type": "string"}, "sample_link": {"description": "JsonObj()", "items": {"type": "string"}, "type": "array"}, "sample_shipped": {"description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample sent to EMSL", "type": "string"}, "sample_type": {"$ref": "#/$defs/SampleTypeEnum", "description": "Type of sample being submitted"}, "season_precpt": {"$ref": "#/$defs/QuantityValue", "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps."}, "season_temp": {"$ref": "#/$defs/QuantityValue", "description": "Mean seasonal temperature"}, "sieving": {"$ref": "#/$defs/QuantityValue", "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved"}, "size_frac_low": {"$ref": "#/$defs/QuantityValue", "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample"}, "size_frac_up": {"$ref": "#/$defs/QuantityValue", "description": "Refers to the mesh/pore size used to retain the sample. Materials smaller than the size threshold are excluded from the sample"}, "slope_aspect": {"$ref": "#/$defs/QuantityValue", "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration."}, "slope_gradient": {"$ref": "#/$defs/QuantityValue", "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer"}, "sodium": {"$ref": "#/$defs/QuantityValue", "description": "Sodium concentration in the sample"}, "soil_type": {"$ref": "#/$defs/TextValue", "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes."}, "soil_type_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining soil series name or other lower-level classification"}, "soluble_iron_micromol": {"type": "string"}, "source_mat_id": {"$ref": "#/$defs/TextValue", "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)."}, "specific_ecosystem": {"description": "Specific ecosystems represent specific features of the environment like aphotic zone in an ocean or gastric mucosa within a host digestive system. Specific ecosystem is in position 5/5 in a GOLD path.", "type": "string"}, "start_date_inc": {"description": "Date the incubation was started. Only relevant for incubation samples.", "type": "string"}, "start_time_inc": {"description": "Time the incubation was started. Only relevant for incubation samples.", "type": "string"}, "store_cond": {"$ref": "#/$defs/TextValue", "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)."}, "subsurface_depth": {"$ref": "#/$defs/QuantityValue"}, "sulfate": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of sulfate in the sample"}, "sulfide": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of sulfide in the sample"}, "technical_reps": {"description": "If sending multiple technical replicates of the same sample, indicate how many replicates are being sent", "type": "string"}, "temp": {"$ref": "#/$defs/QuantityValue", "description": "Temperature of the sample at the time of sampling."}, "tidal_stage": {"$ref": "#/$defs/TextValue", "description": "Stage of tide"}, "tillage": {"$ref": "#/$defs/TextValue", "description": "Note method(s) used for tilling"}, "tot_carb": {"$ref": "#/$defs/QuantityValue", "description": "Total carbon content"}, "tot_depth_water_col": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of total depth of water column"}, "tot_diss_nitro": {"$ref": "#/$defs/QuantityValue", "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen"}, "tot_nitro_cont_meth": {"description": "Reference or method used in determining the total nitrogen", "type": "string"}, "tot_nitro_content": {"$ref": "#/$defs/QuantityValue", "description": "Total nitrogen content of the sample"}, "tot_org_c_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining total organic carbon"}, "tot_org_carb": {"$ref": "#/$defs/QuantityValue", "description": "Definition for soil: total organic carbon content of the soil, definition otherwise: total organic carbon content"}, "tot_phosp": {"$ref": "#/$defs/QuantityValue", "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "water_cont_soil_meth": {"description": "Reference or method used in determining the water content of soil", "type": "string"}, "water_content": {"$ref": "#/$defs/QuantityValue", "description": "Water content measurement"}, "watering_regm": {"$ref": "#/$defs/QuantityValue", "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens"}, "zinc": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of zinc in the sample"}}, "required": ["part_of", "id", "env_broad_scale", "env_local_scale", "env_medium"], "title": "Biosample", "type": "object"}, "BiosampleCategoryEnum": {"description": "Funding-based, sample location-based, or experimental method-based defined categories", "enum": ["LTER", "SIP", "SFA", "FICUS", "NEON"], "title": "BiosampleCategoryEnum", "type": "string"}, "BiosampleProcessing": {"additionalProperties": false, "description": "A process that takes one or more biosamples as inputs and generates one or as outputs. Examples of outputs include samples cultivated from another sample or data objects created by instruments runs.", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "BiosampleProcessing", "type": "object"}, "BioticRelationshipEnum": {"description": "", "enum": ["free living", "parasite", "commensal", "symbiont"], "title": "BioticRelationshipEnum", "type": "string"}, "BooleanValue": {"additionalProperties": false, "description": "A value that is a boolean", "properties": {"has_boolean_value": {"description": "Links a quantity value to a boolean", "type": "boolean"}, "has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "BooleanValue", "type": "object"}, "BuildDocsEnum": {"description": "", "enum": ["building information model", "commissioning report", "complaint logs", "contract administration", "cost estimate", "janitorial schedules or logs", "maintenance plans", "schedule", "sections", "shop drawings", "submittals", "ventilation system", "windows"], "title": "BuildDocsEnum", "type": "string"}, "BuildOccupTypeEnum": {"description": "", "enum": ["office", "market", "restaurant", "residence", "school", "residential", "commercial", "low rise", "high rise", "wood framed", "health care", "airport", "sports complex"], "title": "BuildOccupTypeEnum", "type": "string"}, "BuildingSettingEnum": {"description": "", "enum": ["urban", "suburban", "exurban", "rural"], "title": "BuildingSettingEnum", "type": "string"}, "CeilCondEnum": {"description": "", "enum": ["new", "visible wear", "needs repair", "damaged", "rupture"], "title": "CeilCondEnum", "type": "string"}, "CeilFinishMatEnum": {"description": "", "enum": ["drywall", "mineral fibre", "tiles", "PVC", "plasterboard", "metal", "fiberglass", "stucco", "mineral wool/calcium silicate", "wood"], "title": "CeilFinishMatEnum", "type": "string"}, "CeilTextureEnum": {"description": "", "enum": ["crows feet", "crows-foot stomp", "double skip", "hawk and trowel", "knockdown", "popcorn", "orange peel", "rosebud stomp", "Santa-Fe texture", "skip trowel", "smooth", "stomp knockdown", "swirl"], "title": "CeilTextureEnum", "type": "string"}, "CeilTypeEnum": {"description": "", "enum": ["cathedral", "dropped", "concave", "barrel-shaped", "coffered", "cove", "stretched"], "title": "CeilTypeEnum", "type": "string"}, "ChemicalEntity": {"additionalProperties": false, "description": "An atom or molecule that can be represented with a chemical formula. Include lipids, glycans, natural products, drugs. There may be different terms for distinct acid-base forms, protonation states", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "ChemicalEntity", "type": "object"}, "CollectingBiosamplesFromSite": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "has_inputs": {"items": {"type": "string"}, "type": "array"}, "has_outputs": {"items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "participating_agent": {"$ref": "#/$defs/Agent"}}, "required": ["has_inputs", "has_outputs", "id"], "title": "CollectingBiosamplesFromSite", "type": "object"}, "ContainerTypeEnum": {"description": "", "enum": ["screw_top_conical"], "title": "ContainerTypeEnum", "type": "string"}, "ControlledIdentifiedTermValue": {"additionalProperties": false, "description": "A controlled term or class from an ontology, requiring the presence of term with an id", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "term": {"$ref": "#/$defs/OntologyClass", "description": "pointer to an ontology class"}, "was_generated_by": {"type": "string"}}, "required": ["term"], "title": "ControlledIdentifiedTermValue", "type": "object"}, "ControlledTermValue": {"additionalProperties": false, "description": "A controlled term or class from an ontology", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "term": {"$ref": "#/$defs/OntologyClass", "description": "pointer to an ontology class"}, "was_generated_by": {"type": "string"}}, "title": "ControlledTermValue", "type": "object"}, "CreditAssociation": {"additionalProperties": false, "description": "This class supports binding associated researchers to studies. There will be at least a slot for a CRediT Contributor Role (https://casrai.org/credit/) and for a person value Specifically see the associated researchers tab on the NMDC_SampleMetadata-V4_CommentsForUpdates at https://docs.google.com/spreadsheets/d/1INlBo5eoqn2efn4H2P2i8rwRBtnbDVTqXrochJEAPko/edit#gid=0", "properties": {"applied_role": {"$ref": "#/$defs/CreditEnum"}, "applied_roles": {"items": {"$ref": "#/$defs/CreditEnum"}, "type": "array"}, "applies_to_person": {"$ref": "#/$defs/PersonValue"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}}, "required": ["applies_to_person", "applied_roles"], "title": "CreditAssociation", "type": "object"}, "CreditEnum": {"description": "", "enum": ["Conceptualization", "Data curation", "Formal Analysis", "Funding acquisition", "Investigation", "Methodology", "Project administration", "Resources", "Software", "Supervision", "Validation", "Visualization", "Writing original draft", "Writing review and editing", "Principal Investigator", "Submitter"], "title": "CreditEnum", "type": "string"}, "CurLandUseEnum": {"description": "", "enum": ["cities", "farmstead", "industrial areas", "roads/railroads", "rock", "sand", "gravel", "mudflats", "salt flats", "badlands", "permanent snow or ice", "saline seeps", "mines/quarries", "oil waste areas", "small grains", "row crops", "vegetable crops", "horticultural plants (e.g. tulips)", "marshlands (grass,sedges,rushes)", "tundra (mosses,lichens)", "rangeland", "pastureland (grasslands used for livestock grazing)", "hayland", "meadows (grasses,alfalfa,fescue,bromegrass,timothy)", "shrub land (e.g. mesquite,sage-brush,creosote bush,shrub oak,eucalyptus)", "successional shrub land (tree saplings,hazels,sumacs,chokecherry,shrub dogwoods,blackberries)", "shrub crops (blueberries,nursery ornamentals,filberts)", "vine crops (grapes)", "conifers (e.g. pine,spruce,fir,cypress)", "hardwoods (e.g. oak,hickory,elm,aspen)", "intermixed hardwood and conifers", "tropical (e.g. mangrove,palms)", "rainforest (evergreen forest receiving greater than 406 cm annual rainfall)", "swamp (permanent or semi-permanent water body dominated by woody plants)", "crop trees (nuts,fruit,christmas trees,nursery trees)"], "title": "CurLandUseEnum", "type": "string"}, "DataObject": {"additionalProperties": false, "description": "An object that primarily consists of symbols that represent information. Files, records, and omics data are examples of data objects.", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "compression_type": {"description": "If provided, specifies the compression type", "type": "string"}, "data_object_type": {"$ref": "#/$defs/FileTypeEnum", "description": "The type of file represented by the data object."}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "file_size_bytes": {"description": "Size of the file in bytes", "type": "integer"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "md5_checksum": {"description": "MD5 checksum of file (pre-compressed)", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "url": {"type": "string"}, "was_generated_by": {"type": "string"}}, "required": ["id", "name", "description"], "title": "DataObject", "type": "object"}, "Database": {"additionalProperties": false, "description": "An abstract holder for any set of metadata and data. It does not need to correspond to an actual managed database top level holder class. When translated to JSON-Schema this is the 'root' object. It should contain pointers to other objects of interest", "properties": {"activity_set": {"description": "This property links a database object to the set of workflow activities.", "items": {"$ref": "#/$defs/WorkflowExecutionActivity"}, "type": "array"}, "biosample_set": {"description": "This property links a database object to the set of samples within it.", "items": {"$ref": "#/$defs/Biosample"}, "type": "array"}, "cbfs_set": {"items": {"$ref": "#/$defs/CollectingBiosamplesFromSite"}, "type": "array"}, "data_object_set": {"description": "This property links a database object to the set of data objects within it.", "items": {"$ref": "#/$defs/DataObject"}, "type": "array"}, "dissolving_activity_set": {"items": {"$ref": "#/$defs/DissolvingActivity"}, "type": "array"}, "frs_set": {"items": {"$ref": "#/$defs/FieldResearchSite"}, "type": "array"}, "functional_annotation_set": {"description": "This property links a database object to the set of all functional annotations", "items": {"$ref": "#/$defs/FunctionalAnnotation"}, "type": "array"}, "genome_feature_set": {"description": "This property links a database object to the set of all features", "items": {"$ref": "#/$defs/GenomeFeature"}, "type": "array"}, "mags_activity_set": {"description": "This property links a database object to the set of MAGs analysis activities.", "items": {"$ref": "#/$defs/MagsAnalysisActivity"}, "type": "array"}, "material_sample_set": {"items": {"$ref": "#/$defs/MaterialSample"}, "type": "array"}, "material_sampling_activity_set": {"items": {"$ref": "#/$defs/MaterialSamplingActivity"}, "type": "array"}, "metabolomics_analysis_activity_set": {"description": "This property links a database object to the set of metabolomics analysis activities.", "items": {"$ref": "#/$defs/MetabolomicsAnalysisActivity"}, "type": "array"}, "metagenome_annotation_activity_set": {"description": "This property links a database object to the set of metagenome annotation activities.", "items": {"$ref": "#/$defs/MetagenomeAnnotationActivity"}, "type": "array"}, "metagenome_assembly_set": {"description": "This property links a database object to the set of metagenome assembly activities.", "items": {"$ref": "#/$defs/MetagenomeAssembly"}, "type": "array"}, "metaproteomics_analysis_activity_set": {"description": "This property links a database object to the set of metaproteomics analysis activities.", "items": {"$ref": "#/$defs/MetaproteomicsAnalysisActivity"}, "type": "array"}, "metatranscriptome_activity_set": {"description": "TODO", "items": {"$ref": "#/$defs/MetatranscriptomeActivity"}, "type": "array"}, "nom_analysis_activity_set": {"description": "This property links a database object to the set of natural organic matter (NOM) analysis activities.", "items": {"$ref": "#/$defs/NomAnalysisActivity"}, "type": "array"}, "omics_processing_set": {"description": "This property links a database object to the set of omics processings within it.", "items": {"$ref": "#/$defs/OmicsProcessing"}, "type": "array"}, "reaction_activity_set": {"items": {"$ref": "#/$defs/ReactionActivity"}, "type": "array"}, "read_based_taxonomy_analysis_activity_set": {"description": "This property links a database object to the set of read based analysis activities.", "items": {"$ref": "#/$defs/ReadBasedTaxonomyAnalysisActivity"}, "type": "array"}, "read_qc_analysis_activity_set": {"description": "This property links a database object to the set of read QC analysis activities.", "items": {"$ref": "#/$defs/ReadQcAnalysisActivity"}, "type": "array"}, "study_set": {"description": "This property links a database object to the set of studies within it.", "items": {"$ref": "#/$defs/Study"}, "type": "array"}}, "title": "Database", "type": "object"}, "DeposEnvEnum": {"description": "", "enum": ["Continental - Alluvial", "Continental - Aeolian", "Continental - Fluvial", "Continental - Lacustrine", "Transitional - Deltaic", "Transitional - Tidal", "Transitional - Lagoonal", "Transitional - Beach", "Transitional - Lake", "Marine - Shallow", "Marine - Deep", "Marine - Reef", "Other - Evaporite", "Other - Glacial", "Other - Volcanic", "other"], "title": "DeposEnvEnum", "type": "string"}, "DeviceTypeEnum": {"description": "", "enum": ["orbital_shaker", "thermomixer"], "title": "DeviceTypeEnum", "type": "string"}, "DissolvingActivity": {"additionalProperties": false, "description": "", "properties": {"dissolution_aided_by": {"$ref": "#/$defs/LabDevice"}, "dissolution_reagent": {"$ref": "#/$defs/SolventEnum"}, "dissolution_volume": {"$ref": "#/$defs/QuantityValue"}, "dissolved_in": {"$ref": "#/$defs/MaterialContainer"}, "material_input": {"type": "string"}, "material_output": {"type": "string"}}, "title": "DissolvingActivity", "type": "object"}, "DnaContTypeEnum": {"description": "", "enum": ["plate", "tube"], "title": "DnaContTypeEnum", "type": "string"}, "DnaDnaseEnum": {"description": "", "enum": ["no", "yes"], "title": "DnaDnaseEnum", "type": "string"}, "DnaSampleFormatEnum": {"description": "", "enum": ["10 mM Tris-HCl", "DNAStable", "Ethanol", "Low EDTA TE", "MDA reaction buffer", "PBS", "Pellet", "RNAStable", "TE", "Water"], "title": "DnaSampleFormatEnum", "type": "string"}, "DnaseRnaEnum": {"description": "", "enum": ["no", "yes"], "title": "DnaseRnaEnum", "type": "string"}, "DoorCompTypeEnum": {"description": "", "enum": ["metal covered", "revolving", "sliding", "telescopic"], "title": "DoorCompTypeEnum", "type": "string"}, "DoorCondEnum": {"description": "", "enum": ["damaged", "needs repair", "new", "rupture", "visible wear"], "title": "DoorCondEnum", "type": "string"}, "DoorDirectEnum": {"description": "", "enum": ["inward", "outward", "sideways"], "title": "DoorDirectEnum", "type": "string"}, "DoorLocEnum": {"description": "", "enum": ["north", "south", "east", "west"], "title": "DoorLocEnum", "type": "string"}, "DoorMatEnum": {"description": "", "enum": ["aluminum", "cellular PVC", "engineered plastic", "fiberboard", "fiberglass", "metal", "thermoplastic alloy", "vinyl", "wood", "wood/plastic composite"], "title": "DoorMatEnum", "type": "string"}, "DoorMoveEnum": {"description": "", "enum": ["collapsible", "folding", "revolving", "rolling shutter", "sliding", "swinging"], "title": "DoorMoveEnum", "type": "string"}, "DoorTypeEnum": {"description": "", "enum": ["composite", "metal", "wooden"], "title": "DoorTypeEnum", "type": "string"}, "DoorTypeMetalEnum": {"description": "", "enum": ["collapsible", "corrugated steel", "hollow", "rolling shutters", "steel plate"], "title": "DoorTypeMetalEnum", "type": "string"}, "DoorTypeWoodEnum": {"description": "", "enum": ["bettened and ledged", "battened", "ledged and braced", "ledged and framed", "ledged, braced and frame", "framed and paneled", "glashed or sash", "flush", "louvered", "wire gauged"], "title": "DoorTypeWoodEnum", "type": "string"}, "DrainageClassEnum": {"description": "", "enum": ["very poorly", "poorly", "somewhat poorly", "moderately well", "well", "excessively drained"], "title": "DrainageClassEnum", "type": "string"}, "DrawingsEnum": {"description": "", "enum": ["operation", "as built", "construction", "bid", "design", "building navigation map", "diagram", "sketch"], "title": "DrawingsEnum", "type": "string"}, "EnvironmentalMaterialTerm": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "EnvironmentalMaterialTerm", "type": "object"}, "ExtWallOrientEnum": {"description": "", "enum": ["north", "south", "east", "west", "northeast", "southeast", "southwest", "northwest"], "title": "ExtWallOrientEnum", "type": "string"}, "ExtWindowOrientEnum": {"description": "", "enum": ["north", "south", "east", "west", "northeast", "southeast", "southwest", "northwest"], "title": "ExtWindowOrientEnum", "type": "string"}, "FaoClassEnum": {"description": "", "enum": ["Acrisols", "Andosols", "Arenosols", "Cambisols", "Chernozems", "Ferralsols", "Fluvisols", "Gleysols", "Greyzems", "Gypsisols", "Histosols", "Kastanozems", "Lithosols", "Luvisols", "Nitosols", "Phaeozems", "Planosols", "Podzols", "Podzoluvisols", "Rankers", "Regosols", "Rendzinas", "Solonchaks", "Solonetz", "Vertisols", "Yermosols"], "title": "FaoClassEnum", "type": "string"}, "FieldResearchSite": {"additionalProperties": false, "description": "A site, outside of a laboratory, from which biosamples may be collected.", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "FieldResearchSite", "type": "object"}, "FileTypeEnum": {"description": "", "enum": ["Metagenome Raw Reads", "FT ICR-MS Analysis Results", "GC-MS Metabolomics Results", "Metaproteomics Workflow Statistics", "Protein Report", "Peptide Report", "Unfiltered Metaproteomics Results", "Read Count and RPKM", "QC non-rRNA R2", "QC non-rRNA R1", "Metagenome Bins", "CheckM Statistics", "GOTTCHA2 Krona Plot", "GOTTCHA2 Classification Report", "GOTTCHA2 Report Full", "Kraken2 Krona Plot", "Centrifuge Krona Plot", "Centrifuge output report file", "Kraken2 Classification Report", "Kraken2 Taxonomic Classification", "Centrifuge Classification Report", "Centrifuge Taxonomic Classification", "Structural Annotation GFF", "Functional Annotation GFF", "Annotation Amino Acid FASTA", "Annotation Enzyme Commission", "Annotation KEGG Orthology", "Assembly Coverage BAM", "Assembly AGP", "Assembly Scaffolds", "Assembly Contigs", "Assembly Coverage Stats", "Filtered Sequencing Reads", "QC Statistics", "TIGRFam Annotation GFF", "CRT Annotation GFF", "Genmark Annotation GFF", "Prodigal Annotation GFF", "TRNA Annotation GFF", "Misc Annotation GFF", "RFAM Annotation GFF", "TMRNA Annotation GFF", "KO_EC Annotation GFF", "Product Names", "Gene Phylogeny tsv", "Crisprt Terms", "Clusters of Orthologous Groups (COG) Annotation GFF", "CATH FunFams (Functional Families) Annotation GFF", "SUPERFam Annotation GFF", "SMART Annotation GFF", "Pfam Annotation GFF", "Direct Infusion FT ICR-MS Raw Data"], "title": "FileTypeEnum", "type": "string"}, "FilterTypeEnum": {"description": "", "enum": ["particulate air filter", "chemical air filter", "low-MERV pleated media", "HEPA", "electrostatic", "gas-phase or ultraviolet air treatments"], "title": "FilterTypeEnum", "type": "string"}, "FloorCondEnum": {"description": "", "enum": ["new", "visible wear", "needs repair", "damaged", "rupture"], "title": "FloorCondEnum", "type": "string"}, "FloorFinishMatEnum": {"description": "", "enum": ["tile", "wood strip or parquet", "carpet", "rug", "laminate wood", "lineoleum", "vinyl composition tile", "sheet vinyl", "stone", "bamboo", "cork", "terrazo", "concrete", "none", "sealed", "clear finish", "paint", "none or unfinished"], "title": "FloorFinishMatEnum", "type": "string"}, "FloorStrucEnum": {"description": "", "enum": ["balcony", "floating floor", "glass floor", "raised floor", "sprung floor", "wood-framed", "concrete"], "title": "FloorStrucEnum", "type": "string"}, "FloorWaterMoldEnum": {"description": "", "enum": ["mold odor", "wet floor", "water stains", "wall discoloration", "floor discoloration", "ceiling discoloration", "peeling paint or wallpaper", "bulging walls", "condensation"], "title": "FloorWaterMoldEnum", "type": "string"}, "FreqCleanEnum": {"description": "", "enum": ["Daily", "Weekly", "Monthly", "Quarterly", "Annually", "other"], "title": "FreqCleanEnum", "type": "string"}, "FunctionalAnnotation": {"additionalProperties": false, "description": "An assignment of a function term (e.g. reaction or pathway) that is executed by a gene product, or which the gene product plays an active role in. Functional annotations can be assigned manually by curators, or automatically in workflows. In the context of NMDC, all function annotation is performed automatically, typically using HMM or Blast type methods", "properties": {"has_function": {"pattern": "^(KEGG_PATHWAY:\\w{2,4}\\d{5}|KEGG.REACTION:R\\d+|RHEA:\\d{5}|MetaCyc:[A-Za-z0-9+_.%-:]+|EC:\\d{1,2}(\\.\\d{0,3}){0,3}|GO:\\d{7}|MetaNetX:(MNXR\\d+|EMPTY)|SEED:\\w+|KEGG\\.ORTHOLOGY:K\\d+|EGGNOG:\\w+|PFAM:PF\\d{5}|TIGRFAM:TIGR\\d+|SUPFAM:\\w+|CATH:[1-6]\\.[0-9]+\\.[0-9]+\\.[0-9]+|PANTHER.FAMILY:PTHR\\d{5}(\\:SF\\d{1,3})?)$", "type": "string"}, "subject": {"type": "string"}, "was_generated_by": {"description": "provenance for the annotation.", "type": "string"}}, "title": "FunctionalAnnotation", "type": "object"}, "FurnitureEnum": {"description": "", "enum": ["cabinet", "chair", "desks"], "title": "FurnitureEnum", "type": "string"}, "GenderRestroomEnum": {"description": "", "enum": ["all gender", "female", "gender neurtral", "male", "male and female", "unisex"], "title": "GenderRestroomEnum", "type": "string"}, "GeneProduct": {"additionalProperties": false, "description": "A molecule encoded by a gene that has an evolved function", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "GeneProduct", "type": "object"}, "GenomeFeature": {"additionalProperties": false, "description": "A feature localized to an interval along a genome", "title": "GenomeFeature", "type": "object"}, "GeolocationValue": {"additionalProperties": false, "description": "A normalized value for a location on the earth's surface", "properties": {"has_raw_value": {"description": "The raw value for a geolocation should follow {lat} {long}", "type": "string"}, "latitude": {"description": "latitude", "type": "number"}, "longitude": {"description": "longitude", "type": "number"}, "was_generated_by": {"type": "string"}}, "title": "GeolocationValue", "type": "object"}, "GrowthHabitEnum": {"description": "", "enum": ["erect", "semi-erect", "spreading", "prostrate"], "title": "GrowthHabitEnum", "type": "string"}, "HandidnessEnum": {"description": "", "enum": ["ambidexterity", "left handedness", "mixed-handedness", "right handedness"], "title": "HandidnessEnum", "type": "string"}, "HcProducedEnum": {"description": "", "enum": ["Oil", "Gas-Condensate", "Gas", "Bitumen", "Coalbed Methane", "other"], "title": "HcProducedEnum", "type": "string"}, "HcrEnum": {"description": "", "enum": ["Oil Reservoir", "Gas Reservoir", "Oil Sand", "Coalbed", "Shale", "Tight Oil Reservoir", "Tight Gas Reservoir", "other"], "title": "HcrEnum", "type": "string"}, "HcrGeolAgeEnum": {"description": "", "enum": ["Archean", "Cambrian", "Carboniferous", "Cenozoic", "Cretaceous", "Devonian", "Jurassic", "Mesozoic", "Neogene", "Ordovician", "Paleogene", "Paleozoic", "Permian", "Precambrian", "Proterozoic", "Silurian", "Triassic", "other"], "title": "HcrGeolAgeEnum", "type": "string"}, "HeatCoolTypeEnum": {"description": "", "enum": ["radiant system", "heat pump", "forced air system", "steam forced heat", "wood stove"], "title": "HeatCoolTypeEnum", "type": "string"}, "HeatDelivLocEnum": {"description": "", "enum": ["north", "south", "east", "west"], "title": "HeatDelivLocEnum", "type": "string"}, "HorizonEnum": {"description": "", "enum": ["O horizon", "A horizon", "E horizon", "B horizon", "C horizon", "R layer", "Permafrost"], "title": "HorizonEnum", "type": "string"}, "HostSexEnum": {"description": "", "enum": ["female", "hermaphrodite", "male", "neuter"], "title": "HostSexEnum", "type": "string"}, "ImageValue": {"additionalProperties": false, "description": "An attribute value representing an image.", "properties": {"description": {"description": "a human-readable description of a thing", "type": "string"}, "display_order": {"description": "When rendering information, this attribute to specify the order in which the information should be rendered.", "type": "string"}, "has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "url": {"type": "string"}, "was_generated_by": {"type": "string"}}, "title": "ImageValue", "type": "object"}, "IndoorSpaceEnum": {"description": "", "enum": ["bedroom", "office", "bathroom", "foyer", "kitchen", "locker room", "hallway", "elevator"], "title": "IndoorSpaceEnum", "type": "string"}, "IndoorSurfEnum": {"description": "", "enum": ["cabinet", "ceiling", "counter top", "door", "shelving", "vent cover", "window", "wall"], "title": "IndoorSurfEnum", "type": "string"}, "Instrument": {"additionalProperties": false, "description": "A material entity that is designed to perform a function in a scientific investigation, but is not a reagent[OBI].", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "Instrument", "type": "object"}, "IntWallCondEnum": {"description": "", "enum": ["new", "visible wear", "needs repair", "damaged", "rupture"], "title": "IntWallCondEnum", "type": "string"}, "IntegerValue": {"additionalProperties": false, "description": "A value that is an integer", "properties": {"has_numeric_value": {"description": "Links a quantity value to a number", "type": "number"}, "has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "IntegerValue", "type": "object"}, "LabDevice": {"additionalProperties": false, "description": "", "properties": {"activity_speed": {"$ref": "#/$defs/QuantityValue"}, "activity_temperature": {"$ref": "#/$defs/QuantityValue"}, "activity_time": {"$ref": "#/$defs/QuantityValue"}, "device_type": {"$ref": "#/$defs/DeviceTypeEnum"}}, "title": "LabDevice", "type": "object"}, "LightTypeEnum": {"description": "", "enum": ["natural light", "electric light", "desk lamp", "flourescent lights", "none"], "title": "LightTypeEnum", "type": "string"}, "LithologyEnum": {"description": "", "enum": ["Basement", "Chalk", "Chert", "Coal", "Conglomerate", "Diatomite", "Dolomite", "Limestone", "Sandstone", "Shale", "Siltstone", "Volcanic", "other"], "title": "LithologyEnum", "type": "string"}, "MagBin": {"additionalProperties": false, "description": "", "properties": {"bin_name": {"type": "string"}, "bin_quality": {"type": "string"}, "completeness": {"type": "number"}, "contamination": {"type": "number"}, "gene_count": {"type": "integer"}, "gtdbtk_class": {"type": "string"}, "gtdbtk_domain": {"type": "string"}, "gtdbtk_family": {"type": "string"}, "gtdbtk_genus": {"type": "string"}, "gtdbtk_order": {"type": "string"}, "gtdbtk_phylum": {"type": "string"}, "gtdbtk_species": {"type": "string"}, "num_16s": {"type": "integer"}, "num_23s": {"type": "integer"}, "num_5s": {"type": "integer"}, "num_t_rna": {"type": "integer"}, "number_of_contig": {"type": "integer"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}}, "title": "MagBin", "type": "object"}, "MagsAnalysisActivity": {"additionalProperties": false, "description": "A workflow execution activity that uses computational binning tools to group assembled contigs into genomes", "properties": {"binned_contig_num": {"type": "integer"}, "ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "input_contig_num": {"type": "integer"}, "low_depth_contig_num": {"type": "integer"}, "mags_list": {"items": {"$ref": "#/$defs/MagBin"}, "type": "array"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "too_short_contig_num": {"type": "integer"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "unbinned_contig_num": {"type": "integer"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MagsAnalysisActivity", "type": "object"}, "MaterialContainer": {"additionalProperties": false, "description": "", "properties": {"container_size": {"$ref": "#/$defs/QuantityValue"}, "container_type": {"$ref": "#/$defs/ContainerTypeEnum"}}, "title": "MaterialContainer", "type": "object"}, "MaterialEntity": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "MaterialEntity", "type": "object"}, "MaterialSample": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "MaterialSample", "type": "object"}, "MaterialSamplingActivity": {"additionalProperties": false, "description": "", "properties": {"amount_collected": {"$ref": "#/$defs/QuantityValue"}, "biosample_input": {"type": "string"}, "collected_into": {"$ref": "#/$defs/MaterialContainer"}, "material_output": {"type": "string"}, "sampling_method": {"$ref": "#/$defs/SamplingMethodEnum"}}, "title": "MaterialSamplingActivity", "type": "object"}, "MechStrucEnum": {"description": "", "enum": ["subway", "coach", "carriage", "elevator", "escalator", "boat", "train", "car", "bus"], "title": "MechStrucEnum", "type": "string"}, "MetaboliteQuantification": {"additionalProperties": false, "description": "This is used to link a metabolomics analysis workflow to a specific metabolite", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}}, "title": "MetaboliteQuantification", "type": "object"}, "MetabolomicsAnalysisActivity": {"additionalProperties": false, "description": "", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"description": "The instrument used to collect the data used in the analysis", "type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetabolomicsAnalysisActivity", "type": "object"}, "MetagenomeAnnotationActivity": {"additionalProperties": false, "description": "A workflow execution activity that provides functional and structural annotation of assembled metagenome contigs", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "gold_analysis_project_identifiers": {"description": "identifiers for corresponding analysis project in GOLD", "items": {"type": "string"}, "pattern": "^GOLD:Ga[0-9]+$", "type": "array"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetagenomeAnnotationActivity", "type": "object"}, "MetagenomeAssembly": {"additionalProperties": false, "description": "A workflow execution activity that converts sequencing reads into an assembled metagenome.", "properties": {"asm_score": {"description": "A score for comparing metagenomic assembly quality from same sample.", "type": "number"}, "contig_bp": {"description": "Total size in bp of all contigs.", "type": "number"}, "contigs": {"description": "The sum of the (length*log(length)) of all contigs, times some constant. Increase the contiguity, the score will increase", "type": "number"}, "ctg_l50": {"description": "Given a set of contigs, the L50 is defined as the sequence length of the shortest contig at 50% of the total genome length.", "type": "number"}, "ctg_l90": {"description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all contigs of that length or longer contains at least 90% of the sum of the lengths of all contigs.", "type": "number"}, "ctg_logsum": {"description": "Maximum contig length.", "type": "number"}, "ctg_max": {"description": "Maximum contig length.", "type": "number"}, "ctg_n50": {"description": "Given a set of contigs, each with its own length, the N50 count is defined as the smallest number_of_contigs whose length sum makes up half of genome size.", "type": "number"}, "ctg_n90": {"description": "Given a set of contigs, each with its own length, the N90 count is defined as the smallest number of contigs whose length sum makes up 90% of genome size.", "type": "number"}, "ctg_powsum": {"description": "Powersum of all contigs is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", "type": "number"}, "ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "gap_pct": {"description": "The gap size percentage of all scaffolds.", "type": "number"}, "gc_avg": {"description": "Average of GC content of all contigs.", "type": "number"}, "gc_std": {"description": "Standard deviation of GC content of all contigs.", "type": "number"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "insdc_assembly_identifiers": {"pattern": "^insdc.sra:[A-Z]+[0-9]+(\\.[0-9]+)?$", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "num_aligned_reads": {"description": "The sequence count number of input reads aligned to assembled contigs.", "type": "number"}, "num_input_reads": {"description": "The sequence count number of input reads for assembly.", "type": "number"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "scaf_bp": {"description": "Total size in bp of all scaffolds.", "type": "number"}, "scaf_l50": {"description": "Given a set of scaffolds, the L50 is defined as the sequence length of the shortest scaffold at 50% of the total genome length.", "type": "number"}, "scaf_l90": {"description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all scaffolds of that length or longer contains at least 90% of the sum of the lengths of all scaffolds.", "type": "number"}, "scaf_l_gt50k": {"description": "Total size in bp of all scaffolds greater than 50 KB.", "type": "number"}, "scaf_logsum": {"description": "The sum of the (length*log(length)) of all scaffolds, times some constant. Increase the contiguity, the score will increase", "type": "number"}, "scaf_max": {"description": "Maximum scaffold length.", "type": "number"}, "scaf_n50": {"description": "Given a set of scaffolds, each with its own length, the N50 count is defined as the smallest number of scaffolds whose length sum makes up half of genome size.", "type": "number"}, "scaf_n90": {"description": "Given a set of scaffolds, each with its own length, the N90 count is defined as the smallest number of scaffolds whose length sum makes up 90% of genome size.", "type": "number"}, "scaf_n_gt50k": {"description": "Total sequence count of scaffolds greater than 50 KB.", "type": "number"}, "scaf_pct_gt50k": {"description": "Total sequence size percentage of scaffolds greater than 50 KB.", "type": "number"}, "scaf_powsum": {"description": "Powersum of all scaffolds is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", "type": "number"}, "scaffolds": {"description": "Total sequence count of all scaffolds.", "type": "number"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetagenomeAssembly", "type": "object"}, "MetaproteomicsAnalysisActivity": {"additionalProperties": false, "description": "", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"description": "The instrument used to collect the data used in the analysis", "type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetaproteomicsAnalysisActivity", "type": "object"}, "MetatranscriptomeActivity": {"additionalProperties": false, "description": "A metatranscriptome activity that e.g. pools assembly and annotation activity.", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetatranscriptomeActivity", "type": "object"}, "MetatranscriptomeAnnotationActivity": {"additionalProperties": false, "description": "", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "gold_analysis_project_identifiers": {"description": "identifiers for corresponding analysis project in GOLD", "items": {"type": "string"}, "pattern": "^GOLD:Ga[0-9]+$", "type": "array"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetatranscriptomeAnnotationActivity", "type": "object"}, "MetatranscriptomeAssembly": {"additionalProperties": false, "description": "", "properties": {"asm_score": {"description": "A score for comparing metagenomic assembly quality from same sample.", "type": "number"}, "contig_bp": {"description": "Total size in bp of all contigs.", "type": "number"}, "contigs": {"description": "The sum of the (length*log(length)) of all contigs, times some constant. Increase the contiguity, the score will increase", "type": "number"}, "ctg_l50": {"description": "Given a set of contigs, the L50 is defined as the sequence length of the shortest contig at 50% of the total genome length.", "type": "number"}, "ctg_l90": {"description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all contigs of that length or longer contains at least 90% of the sum of the lengths of all contigs.", "type": "number"}, "ctg_logsum": {"description": "Maximum contig length.", "type": "number"}, "ctg_max": {"description": "Maximum contig length.", "type": "number"}, "ctg_n50": {"description": "Given a set of contigs, each with its own length, the N50 count is defined as the smallest number_of_contigs whose length sum makes up half of genome size.", "type": "number"}, "ctg_n90": {"description": "Given a set of contigs, each with its own length, the N90 count is defined as the smallest number of contigs whose length sum makes up 90% of genome size.", "type": "number"}, "ctg_powsum": {"description": "Powersum of all contigs is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", "type": "number"}, "ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "gap_pct": {"description": "The gap size percentage of all scaffolds.", "type": "number"}, "gc_avg": {"description": "Average of GC content of all contigs.", "type": "number"}, "gc_std": {"description": "Standard deviation of GC content of all contigs.", "type": "number"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "insdc_assembly_identifiers": {"pattern": "^insdc.sra:[A-Z]+[0-9]+(\\.[0-9]+)?$", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "num_aligned_reads": {"description": "The sequence count number of input reads aligned to assembled contigs.", "type": "number"}, "num_input_reads": {"description": "The sequence count number of input reads for assembly.", "type": "number"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "scaf_bp": {"description": "Total size in bp of all scaffolds.", "type": "number"}, "scaf_l50": {"description": "Given a set of scaffolds, the L50 is defined as the sequence length of the shortest scaffold at 50% of the total genome length.", "type": "number"}, "scaf_l90": {"description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all scaffolds of that length or longer contains at least 90% of the sum of the lengths of all scaffolds.", "type": "number"}, "scaf_l_gt50k": {"description": "Total size in bp of all scaffolds greater than 50 KB.", "type": "number"}, "scaf_logsum": {"description": "The sum of the (length*log(length)) of all scaffolds, times some constant. Increase the contiguity, the score will increase", "type": "number"}, "scaf_max": {"description": "Maximum scaffold length.", "type": "number"}, "scaf_n50": {"description": "Given a set of scaffolds, each with its own length, the N50 count is defined as the smallest number of scaffolds whose length sum makes up half of genome size.", "type": "number"}, "scaf_n90": {"description": "Given a set of scaffolds, each with its own length, the N90 count is defined as the smallest number of scaffolds whose length sum makes up 90% of genome size.", "type": "number"}, "scaf_n_gt50k": {"description": "Total sequence count of scaffolds greater than 50 KB.", "type": "number"}, "scaf_pct_gt50k": {"description": "Total sequence size percentage of scaffolds greater than 50 KB.", "type": "number"}, "scaf_powsum": {"description": "Powersum of all scaffolds is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", "type": "number"}, "scaffolds": {"description": "Total sequence count of all scaffolds.", "type": "number"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetatranscriptomeAssembly", "type": "object"}, "NomAnalysisActivity": {"additionalProperties": false, "description": "", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"description": "The instrument used to collect the data used in the analysis", "type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "NomAnalysisActivity", "type": "object"}, "OccupDocumentEnum": {"description": "", "enum": ["automated count", "estimate", "manual count", "videos"], "title": "OccupDocumentEnum", "type": "string"}, "OmicsProcessing": {"additionalProperties": false, "description": "The methods and processes used to generate omics data from a biosample or organism.", "properties": {"add_date": {"description": "The date on which the information was added to the database.", "type": "string"}, "alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "chimera_check": {"$ref": "#/$defs/TextValue", "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences."}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "gold_sequencing_project_identifiers": {"description": "identifiers for corresponding sequencing project in GOLD", "items": {"type": "string"}, "pattern": "^GOLD:Gp[0-9]+$", "type": "array"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "insdc_experiment_identifiers": {"items": {"type": "string"}, "pattern": "^insdc.sra:(E|D|S)RX[0-9]{6,}$", "type": "array"}, "instrument_name": {"description": "The name of the instrument that was used for processing the sample.", "type": "string"}, "mod_date": {"description": "The last date on which the database information was modified.", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "ncbi_project_name": {"type": "string"}, "nucl_acid_amp": {"$ref": "#/$defs/TextValue", "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids"}, "nucl_acid_ext": {"$ref": "#/$defs/TextValue", "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample"}, "omics_type": {"$ref": "#/$defs/ControlledTermValue", "description": "The type of omics data"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "pcr_cond": {"$ref": "#/$defs/TextValue", "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'"}, "pcr_primers": {"$ref": "#/$defs/TextValue", "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters"}, "principal_investigator": {"$ref": "#/$defs/PersonValue", "description": "Principal Investigator who led the study and/or generated the dataset."}, "processing_institution": {"$ref": "#/$defs/ProcessingInstitutionEnum", "description": "The organization that processed the sample."}, "samp_vol_we_dna_ext": {"$ref": "#/$defs/QuantityValue", "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (mixs:0000001)."}, "seq_meth": {"$ref": "#/$defs/TextValue", "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)."}, "seq_quality_check": {"$ref": "#/$defs/TextValue", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA"}, "target_gene": {"$ref": "#/$defs/TextValue", "description": "Targeted gene or locus name for marker gene studies"}, "target_subfragment": {"$ref": "#/$defs/TextValue", "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}}, "required": ["has_input"], "title": "OmicsProcessing", "type": "object"}, "OntologyClass": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "OntologyClass", "type": "object"}, "OrganismCountEnum": {"description": "", "enum": ["ATP", "MPN", "other"], "title": "OrganismCountEnum", "type": "string"}, "OrthologyGroup": {"additionalProperties": false, "description": "A set of genes or gene products in which all members are orthologous", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "OrthologyGroup", "type": "object"}, "OxyStatSampEnum": {"description": "", "enum": ["aerobic", "anaerobic", "other"], "title": "OxyStatSampEnum", "type": "string"}, "Pathway": {"additionalProperties": false, "description": "A pathway is a sequence of steps/reactions carried out by an organism or community of organisms", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "Pathway", "type": "object"}, "PeptideQuantification": {"additionalProperties": false, "description": "This is used to link a metaproteomics analysis workflow to a specific peptide sequence and related information", "title": "PeptideQuantification", "type": "object"}, "Person": {"additionalProperties": false, "description": "represents a person, such as a researcher", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "Should be an ORCID. Specify in CURIE format. E.g ORCID:0000-1111-...", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "Person", "type": "object"}, "PersonValue": {"additionalProperties": false, "description": "An attribute value representing a person", "properties": {"email": {"description": "An email address for an entity such as a person. This should be the primarly email address used.", "type": "string"}, "has_raw_value": {"description": "The full name of the Investigator in format FIRST LAST.", "type": "string"}, "name": {"description": "The full name of the Investigator. It should follow the format FIRST [MIDDLE NAME| MIDDLE INITIAL] LAST, where MIDDLE NAME| MIDDLE INITIAL is optional.", "type": "string"}, "orcid": {"description": "The ORCID of a person.", "type": "string"}, "profile_image_url": {"description": "A url that points to an image of a person.", "type": "string"}, "was_generated_by": {"type": "string"}, "websites": {"description": "A list of websites that are associated with the entity.", "items": {"type": "string"}, "type": "array"}}, "title": "PersonValue", "type": "object"}, "PlannedProcess": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "has_inputs": {"items": {"type": "string"}, "type": "array"}, "has_outputs": {"items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "participating_agent": {"$ref": "#/$defs/Agent"}}, "required": ["id"], "title": "PlannedProcess", "type": "object"}, "PlantGrowthMedEnum": {"description": "", "enum": ["other artificial liquid medium", "other artificial solid medium", "peat moss", "perlite", "pumice", "sand", "soil", "vermiculite", "water"], "title": "PlantGrowthMedEnum", "type": "string"}, "PlantSexEnum": {"description": "", "enum": ["Androdioecious", "Androecious", "Androgynous", "Androgynomonoecious", "Andromonoecious", "Bisexual", "Dichogamous", "Diclinous", "Dioecious", "Gynodioecious", "Gynoecious", "Gynomonoecious", "Hermaphroditic", "Imperfect", "Monoclinous", "Monoecious", "Perfect", "Polygamodioecious", "Polygamomonoecious", "Polygamous", "Protandrous", "Protogynous", "Subandroecious", "Subdioecious", "Subgynoecious", "Synoecious", "Trimonoecious", "Trioecious", "Unisexual"], "title": "PlantSexEnum", "type": "string"}, "ProcessingInstitutionEnum": {"description": "", "enum": ["UCSD", "JGI", "EMSL"], "title": "ProcessingInstitutionEnum", "type": "string"}, "ProfilePositionEnum": {"description": "", "enum": ["summit", "shoulder", "backslope", "footslope", "toeslope"], "title": "ProfilePositionEnum", "type": "string"}, "ProteinQuantification": {"additionalProperties": false, "description": "This is used to link a metaproteomics analysis workflow to a specific protein", "title": "ProteinQuantification", "type": "object"}, "QuadPosEnum": {"description": "", "enum": ["North side", "West side", "South side", "East side"], "title": "QuadPosEnum", "type": "string"}, "QuantityValue": {"additionalProperties": false, "description": "A simple quantity, e.g. 2cm", "properties": {"has_maximum_numeric_value": {"description": "The maximum value part, expressed as number, of the quantity value when the value covers a range.", "type": "number"}, "has_minimum_numeric_value": {"description": "The minimum value part, expressed as number, of the quantity value when the value covers a range.", "type": "number"}, "has_numeric_value": {"description": "The number part of the quantity", "type": "number"}, "has_raw_value": {"description": "Unnormalized atomic string representation, should in syntax {number} {unit}", "type": "string"}, "has_unit": {"description": "The unit of the quantity", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "QuantityValue", "type": "object"}, "Reaction": {"additionalProperties": false, "description": "An individual biochemical transformation carried out by a functional unit of an organism, in which a collection of substrates are transformed into a collection of products. Can also represent transporters", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "Reaction", "type": "object"}, "ReactionActivity": {"additionalProperties": false, "description": "", "properties": {"material_input": {"type": "string"}, "material_output": {"type": "string"}, "reaction_aided_by": {"$ref": "#/$defs/LabDevice"}, "reaction_temperature": {"type": "string"}, "reaction_time": {"$ref": "#/$defs/QuantityValue"}}, "title": "ReactionActivity", "type": "object"}, "ReactionParticipant": {"additionalProperties": false, "description": "Instances of this link a reaction to a chemical entity participant", "title": "ReactionParticipant", "type": "object"}, "ReadBasedTaxonomyAnalysisActivity": {"additionalProperties": false, "description": "A workflow execution activity that performs taxonomy classification using sequencing reads", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "ReadBasedTaxonomyAnalysisActivity", "type": "object"}, "ReadQcAnalysisActivity": {"additionalProperties": false, "description": "A workflow execution activity that performs quality control on raw Illumina reads including quality trimming, artifact removal, linker trimming, adapter trimming, spike-in removal, and human/cat/dog/mouse/microbe contaminant removal", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "input_base_count": {"description": "The nucleotide base count number of input reads for QC analysis.", "type": "number"}, "input_read_count": {"description": "The sequence count number of input reads for QC analysis.", "type": "number"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "output_base_count": {"description": "After QC analysis nucleotide base count number.", "type": "number"}, "output_read_count": {"description": "After QC analysis sequence count number.", "type": "number"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "ReadQcAnalysisActivity", "type": "object"}, "RelSampLocEnum": {"description": "", "enum": ["edge of car", "center of car", "under a seat"], "title": "RelSampLocEnum", "type": "string"}, "RelToOxygenEnum": {"description": "", "enum": ["aerobe", "anaerobe", "facultative", "microaerophilic", "microanaerobe", "obligate aerobe", "obligate anaerobe"], "title": "RelToOxygenEnum", "type": "string"}, "RnaContTypeEnum": {"description": "", "enum": ["plate", "tube"], "title": "RnaContTypeEnum", "type": "string"}, "RnaSampleFormatEnum": {"description": "", "enum": ["10 mM Tris-HCl", "DNAStable", "Ethanol", "Low EDTA TE", "MDA reaction buffer", "PBS", "Pellet", "RNAStable", "TE", "Water"], "title": "RnaSampleFormatEnum", "type": "string"}, "RoomCondtEnum": {"description": "", "enum": ["new", "visible wear", "needs repair", "damaged", "rupture", "visible signs of mold/mildew"], "title": "RoomCondtEnum", "type": "string"}, "RoomConnectedEnum": {"description": "", "enum": ["attic", "bathroom", "closet", "conference room", "elevator", "examining room", "hallway", "kitchen", "mail room", "office", "stairwell"], "title": "RoomConnectedEnum", "type": "string"}, "RoomLocEnum": {"description": "", "enum": ["corner room", "interior room", "exterior wall"], "title": "RoomLocEnum", "type": "string"}, "RoomSampPosEnum": {"description": "", "enum": ["north corner", "south corner", "west corner", "east corner", "northeast corner", "northwest corner", "southeast corner", "southwest corner", "center"], "title": "RoomSampPosEnum", "type": "string"}, "RoomTypeEnum": {"description": "", "enum": ["attic", "bathroom", "closet", "conference room", "elevator", "examining room", "hallway", "kitchen", "mail room", "private office", "open office", "stairwell", ",restroom", "lobby", "vestibule", "mechanical or electrical room", "data center", "laboratory_wet", "laboratory_dry", "gymnasium", "natatorium", "auditorium", "lockers", "cafe", "warehouse"], "title": "RoomTypeEnum", "type": "string"}, "SampCaptStatusEnum": {"description": "", "enum": ["active surveillance in response to an outbreak", "active surveillance not initiated by an outbreak", "farm sample", "market sample", "other"], "title": "SampCaptStatusEnum", "type": "string"}, "SampCollectPointEnum": {"description": "", "enum": ["well", "test well", "drilling rig", "wellhead", "separator", "storage tank", "other"], "title": "SampCollectPointEnum", "type": "string"}, "SampDisStageEnum": {"description": "", "enum": ["dissemination", "growth and reproduction", "infection", "inoculation", "penetration", "other"], "title": "SampDisStageEnum", "type": "string"}, "SampFloorEnum": {"description": "", "enum": ["1st floor", "2nd floor", "basement", "lobby"], "title": "SampFloorEnum", "type": "string"}, "SampMdEnum": {"description": "", "enum": ["DF", "RT", "KB", "MSL", "other"], "title": "SampMdEnum", "type": "string"}, "SampSubtypeEnum": {"description": "", "enum": ["oil phase", "water phase", "biofilm", "not applicable", "other"], "title": "SampSubtypeEnum", "type": "string"}, "SampWeatherEnum": {"description": "", "enum": ["clear sky", "cloudy", "foggy", "hail", "rain", "snow", "sleet", "sunny", "windy"], "title": "SampWeatherEnum", "type": "string"}, "SampleTypeEnum": {"description": "", "enum": ["soil", "water_extract_soil"], "title": "SampleTypeEnum", "type": "string"}, "SamplingMethodEnum": {"description": "", "enum": ["weighing"], "title": "SamplingMethodEnum", "type": "string"}, "SeasonUseEnum": {"description": "", "enum": ["Spring", "Summer", "Fall", "Winter"], "title": "SeasonUseEnum", "type": "string"}, "SedimentTypeEnum": {"description": "", "enum": ["biogenous", "cosmogenous", "hydrogenous", "lithogenous"], "title": "SedimentTypeEnum", "type": "string"}, "ShadingDeviceCondEnum": {"description": "", "enum": ["damaged", "needs repair", "new", "rupture", "visible wear"], "title": "ShadingDeviceCondEnum", "type": "string"}, "ShadingDeviceTypeEnum": {"description": "", "enum": ["bahama shutters", "exterior roll blind", "gambrel awning", "hood awning", "porchroller awning", "sarasota shutters", "slatted aluminum", "solid aluminum awning", "sun screen", "tree", "trellis", "venetian awning"], "title": "ShadingDeviceTypeEnum", "type": "string"}, "Site": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "Site", "type": "object"}, "SoilHorizonEnum": {"description": "", "enum": ["O horizon", "A horizon", "E horizon", "B horizon", "C horizon", "R layer", "Permafrost"], "title": "SoilHorizonEnum", "type": "string"}, "SolventEnum": {"description": "", "enum": ["deionized_water", "methanol", "chloroform"], "title": "SolventEnum", "type": "string"}, "SpecificEnum": {"description": "", "enum": ["operation", "as built", "construction", "bid", "design", "photos"], "title": "SpecificEnum", "type": "string"}, "SrDepEnvEnum": {"description": "", "enum": ["Lacustine", "Fluvioldeltaic", "Fluviomarine", "Marine", "other"], "title": "SrDepEnvEnum", "type": "string"}, "SrGeolAgeEnum": {"description": "", "enum": ["Archean", "Cambrian", "Carboniferous", "Cenozoic", "Cretaceous", "Devonian", "Jurassic", "Mesozoic", "Neogene", "Ordovician", "Paleogene", "Paleozoic", "Permian", "Precambrian", "Proterozoic", "Silurian", "Triassic", "other"], "title": "SrGeolAgeEnum", "type": "string"}, "SrKerogTypeEnum": {"description": "", "enum": ["Type I", "Type II", "Type III", "Type IV", "other"], "title": "SrKerogTypeEnum", "type": "string"}, "SrLithologyEnum": {"description": "", "enum": ["Clastic", "Carbonate", "Coal", "Biosilicieous", "other"], "title": "SrLithologyEnum", "type": "string"}, "Study": {"additionalProperties": false, "description": "A study summarizes the overall goal of a research initiative and outlines the key objective of its underlying projects.", "properties": {"abstract": {"description": "The abstract of manuscript/grant associated with the entity; i.e., a summary of the resource.", "type": "string"}, "alternative_descriptions": {"description": "A list of alternative descriptions for the entity. The distinction between description and alternative descriptions is application-specific.", "items": {"type": "string"}, "type": "array"}, "alternative_identifiers": {"description": "Unique identifier for a study submitted to additional resources. Matches that which has been submitted to NMDC", "items": {"type": "string"}, "type": "array"}, "alternative_names": {"description": "A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.", "items": {"type": "string"}, "type": "array"}, "alternative_titles": {"description": "A list of alternative titles for the entity. The distinction between title and alternative titles is application-specific.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "A brief summary that details the study you're submitted to NMDC", "type": "string"}, "doi": {"$ref": "#/$defs/AttributeValue", "description": "The dataset citation for this study"}, "ecosystem": {"description": "An ecosystem is a combination of a physical environment (abiotic factors) and all the organisms (biotic factors) that interact with this environment. Ecosystem is in position 1/5 in a GOLD path.", "type": "string"}, "ecosystem_category": {"description": "Ecosystem categories represent divisions within the ecosystem based on specific characteristics of the environment from where an organism or sample is isolated. Ecosystem category is in position 2/5 in a GOLD path.", "type": "string"}, "ecosystem_subtype": {"description": "Ecosystem subtypes represent further subdivision of Ecosystem types into more distinct subtypes. Ecosystem subtype is in position 4/5 in a GOLD path.", "type": "string"}, "ecosystem_type": {"description": "Ecosystem types represent things having common characteristics within the Ecosystem Category. These common characteristics based grouping is still broad but specific to the characteristics of a given environment. Ecosystem type is in position 3/5 in a GOLD path.", "type": "string"}, "emsl_proposal_doi": {"description": "The DOI for the EMSL awarded study that relates to the NMDC submitted study", "type": "string"}, "emsl_proposal_identifier": {"description": "The proposal number assigned to the EMSL awarded study that relates to that which is represented in NMDC.", "type": "string"}, "ess_dive_datasets": {"description": "List of ESS-DIVE dataset DOIs", "items": {"type": "string"}, "type": "array"}, "funding_sources": {"items": {"type": "string"}, "type": "array"}, "gold_study_identifiers": {"description": "identifiers for corresponding project(s) in GOLD", "items": {"type": "string"}, "pattern": "^GOLD:Gs[0-9]+$", "type": "array"}, "has_credit_associations": {"description": "This slot links a study to a credit association. The credit association will be linked to a person value and to a CRediT Contributor Roles term. Overall semantics: person should get credit X for their participation in the study", "items": {"$ref": "#/$defs/CreditAssociation"}, "type": "array"}, "id": {"description": "An NMDC assigned unique identifier for a sample submitted to NMDC.", "type": "string"}, "mgnify_project_identifiers": {"description": "identifiers for corresponding project in MGnify", "pattern": "^mgnify.proj:[A-Z]+[0-9]+$", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "objective": {"description": "The scientific objectives associated with the entity. It SHOULD correspond to scientific norms for objectives field in a structured abstract.", "type": "string"}, "principal_investigator": {"$ref": "#/$defs/PersonValue", "description": "Principal Investigator who led the study and/or generated the dataset."}, "publications": {"description": "A list of publications that are associated with the entity. The publications SHOULD be given using an identifier, such as a DOI or Pubmed ID, if possible.", "items": {"type": "string"}, "type": "array"}, "related_identifiers": {"description": "Unique identifier for a study submitted to additional resources. Similar, but not necessarily identical to that which has been submitted to NMDC", "type": "string"}, "relevant_protocols": {"items": {"type": "string"}, "type": "array"}, "specific_ecosystem": {"description": "Specific ecosystems represent specific features of the environment like aphotic zone in an ocean or gastric mucosa within a host digestive system. Specific ecosystem is in position 5/5 in a GOLD path.", "type": "string"}, "study_image": {"description": "Links a study to one or more images.", "items": {"$ref": "#/$defs/ImageValue"}, "type": "array"}, "title": {"description": "A name given to the entity that differs from the name/label programmatically assigned to it. For example, when extracting study information for GOLD, the GOLD system has assigned a name/label. However, for display purposes, we may also wish the capture the title of the proposal that was used to fund the study.", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "websites": {"description": "A list of websites that are associated with the entity.", "items": {"type": "string"}, "type": "array"}}, "title": "Study", "type": "object"}, "SubstructureTypeEnum": {"description": "", "enum": ["crawlspace", "slab on grade", "basement"], "title": "SubstructureTypeEnum", "type": "string"}, "SurfAirContEnum": {"description": "", "enum": ["dust", "organic matter", "particulate matter", "volatile organic compounds", "biological contaminants", "radon", "nutrients", "biocides"], "title": "SurfAirContEnum", "type": "string"}, "SurfMaterialEnum": {"description": "", "enum": ["adobe", "carpet", "cinder blocks", "concrete", "hay bales", "glass", "metal", "paint", "plastic", "stainless steel", "stone", "stucco", "tile", "vinyl", "wood"], "title": "SurfMaterialEnum", "type": "string"}, "TextValue": {"additionalProperties": false, "description": "A basic string value", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "language": {"description": "Should use ISO 639-1 code e.g. \"en\", \"fr\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "TextValue", "type": "object"}, "TidalStageEnum": {"description": "", "enum": ["low tide", "ebb tide", "flood tide", "high tide"], "title": "TidalStageEnum", "type": "string"}, "TillageEnum": {"description": "", "enum": ["drill", "cutting disc", "ridge till", "strip tillage", "zonal tillage", "chisel", "tined", "mouldboard", "disc plough"], "title": "TillageEnum", "type": "string"}, "TimestampValue": {"additionalProperties": false, "description": "A value that is a timestamp. The range should be ISO-8601", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "TimestampValue", "type": "object"}, "TrainLineEnum": {"description": "", "enum": ["red", "green", "orange"], "title": "TrainLineEnum", "type": "string"}, "TrainStatLocEnum": {"description": "", "enum": ["south station above ground", "south station underground", "south station amtrak", "forest hills", "riverside"], "title": "TrainStatLocEnum", "type": "string"}, "TrainStopLocEnum": {"description": "", "enum": ["end", "mid", "downtown"], "title": "TrainStopLocEnum", "type": "string"}, "UrlValue": {"additionalProperties": false, "description": "A value that is a string that conforms to URL syntax", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "UrlValue", "type": "object"}, "VisMediaEnum": {"description": "", "enum": ["photos", "videos", "commonly of the building", "site context (adjacent buildings, vegetation, terrain, streets)", "interiors", "equipment", "3D scans"], "title": "VisMediaEnum", "type": "string"}, "WallConstTypeEnum": {"description": "", "enum": ["frame construction", "joisted masonry", "light noncombustible", "masonry noncombustible", "modified fire resistive", "fire resistive"], "title": "WallConstTypeEnum", "type": "string"}, "WallFinishMatEnum": {"description": "", "enum": ["plaster", "gypsum plaster", "veneer plaster", "gypsum board", "tile", "terrazzo", "stone facing", "acoustical treatment", "wood", "metal", "masonry"], "title": "WallFinishMatEnum", "type": "string"}, "WallLocEnum": {"description": "", "enum": ["north", "south", "east", "west"], "title": "WallLocEnum", "type": "string"}, "WallSurfTreatmentEnum": {"description": "", "enum": ["painted", "wall paper", "no treatment", "paneling", "stucco", "fabric"], "title": "WallSurfTreatmentEnum", "type": "string"}, "WallTextureEnum": {"description": "", "enum": ["crows feet", "crows-foot stomp", "double skip", "hawk and trowel", "knockdown", "popcorn", "orange peel", "rosebud stomp", "Santa-Fe texture", "skip trowel", "smooth", "stomp knockdown", "swirl"], "title": "WallTextureEnum", "type": "string"}, "WaterFeatTypeEnum": {"description": "", "enum": ["fountain", "pool", "standing feature", "stream", "waterfall"], "title": "WaterFeatTypeEnum", "type": "string"}, "WeekdayEnum": {"description": "", "enum": ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"], "title": "WeekdayEnum", "type": "string"}, "WindowCondEnum": {"description": "", "enum": ["damaged", "needs repair", "new", "rupture", "visible wear"], "title": "WindowCondEnum", "type": "string"}, "WindowCoverEnum": {"description": "", "enum": ["blinds", "curtains", "none"], "title": "WindowCoverEnum", "type": "string"}, "WindowHorizPosEnum": {"description": "", "enum": ["left", "middle", "right"], "title": "WindowHorizPosEnum", "type": "string"}, "WindowLocEnum": {"description": "", "enum": ["north", "south", "east", "west"], "title": "WindowLocEnum", "type": "string"}, "WindowMatEnum": {"description": "", "enum": ["clad", "fiberglass", "metal", "vinyl", "wood"], "title": "WindowMatEnum", "type": "string"}, "WindowTypeEnum": {"description": "", "enum": ["single-hung sash window", "horizontal sash window", "fixed window"], "title": "WindowTypeEnum", "type": "string"}, "WindowVertPosEnum": {"description": "", "enum": ["bottom", "middle", "top", "low", "high"], "title": "WindowVertPosEnum", "type": "string"}, "WorkflowExecutionActivity": {"additionalProperties": false, "description": "Represents an instance of an execution of a particular workflow", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"description": "the agent/entity associated with the generation of the file", "type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "WorkflowExecutionActivity", "type": "object"}}, "$id": "https://microbiomedata/schema", "$schema": "http://json-schema.org/draft-07/schema#", "additionalProperties": false, "metamodel_version": "1.7.0", "properties": {"activity_set": {"description": "This property links a database object to the set of workflow activities.", "items": {"$ref": "#/$defs/WorkflowExecutionActivity"}, "type": "array"}, "biosample_set": {"description": "This property links a database object to the set of samples within it.", "items": {"$ref": "#/$defs/Biosample"}, "type": "array"}, "cbfs_set": {"items": {"$ref": "#/$defs/CollectingBiosamplesFromSite"}, "type": "array"}, "data_object_set": {"description": "This property links a database object to the set of data objects within it.", "items": {"$ref": "#/$defs/DataObject"}, "type": "array"}, "dissolving_activity_set": {"items": {"$ref": "#/$defs/DissolvingActivity"}, "type": "array"}, "frs_set": {"items": {"$ref": "#/$defs/FieldResearchSite"}, "type": "array"}, "functional_annotation_set": {"description": "This property links a database object to the set of all functional annotations", "items": {"$ref": "#/$defs/FunctionalAnnotation"}, "type": "array"}, "genome_feature_set": {"description": "This property links a database object to the set of all features", "items": {"$ref": "#/$defs/GenomeFeature"}, "type": "array"}, "mags_activity_set": {"description": "This property links a database object to the set of MAGs analysis activities.", "items": {"$ref": "#/$defs/MagsAnalysisActivity"}, "type": "array"}, "material_sample_set": {"items": {"$ref": "#/$defs/MaterialSample"}, "type": "array"}, "material_sampling_activity_set": {"items": {"$ref": "#/$defs/MaterialSamplingActivity"}, "type": "array"}, "metabolomics_analysis_activity_set": {"description": "This property links a database object to the set of metabolomics analysis activities.", "items": {"$ref": "#/$defs/MetabolomicsAnalysisActivity"}, "type": "array"}, "metagenome_annotation_activity_set": {"description": "This property links a database object to the set of metagenome annotation activities.", "items": {"$ref": "#/$defs/MetagenomeAnnotationActivity"}, "type": "array"}, "metagenome_assembly_set": {"description": "This property links a database object to the set of metagenome assembly activities.", "items": {"$ref": "#/$defs/MetagenomeAssembly"}, "type": "array"}, "metaproteomics_analysis_activity_set": {"description": "This property links a database object to the set of metaproteomics analysis activities.", "items": {"$ref": "#/$defs/MetaproteomicsAnalysisActivity"}, "type": "array"}, "metatranscriptome_activity_set": {"description": "TODO", "items": {"$ref": "#/$defs/MetatranscriptomeActivity"}, "type": "array"}, "nom_analysis_activity_set": {"description": "This property links a database object to the set of natural organic matter (NOM) analysis activities.", "items": {"$ref": "#/$defs/NomAnalysisActivity"}, "type": "array"}, "omics_processing_set": {"description": "This property links a database object to the set of omics processings within it.", "items": {"$ref": "#/$defs/OmicsProcessing"}, "type": "array"}, "reaction_activity_set": {"items": {"$ref": "#/$defs/ReactionActivity"}, "type": "array"}, "read_based_taxonomy_analysis_activity_set": {"description": "This property links a database object to the set of read based analysis activities.", "items": {"$ref": "#/$defs/ReadBasedTaxonomyAnalysisActivity"}, "type": "array"}, "read_qc_analysis_activity_set": {"description": "This property links a database object to the set of read QC analysis activities.", "items": {"$ref": "#/$defs/ReadQcAnalysisActivity"}, "type": "array"}, "study_set": {"description": "This property links a database object to the set of studies within it.", "items": {"$ref": "#/$defs/Study"}, "type": "array"}}, "title": "NMDC", "type": "object", "version": "7.0.0"} +{"$defs": {"Activity": {"additionalProperties": false, "description": "a provence-generating activity", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"$ref": "#/$defs/Agent"}, "was_informed_by": {"type": "string"}}, "required": ["id"], "title": "Activity", "type": "object"}, "Agent": {"additionalProperties": false, "description": "a provence-generating agent", "properties": {"acted_on_behalf_of": {"$ref": "#/$defs/Agent"}, "was_informed_by": {"type": "string"}}, "title": "Agent", "type": "object"}, "AnalysisTypeEnum": {"description": "", "enum": ["metabolomics", "metagenomics", "metaproteomics", "metatranscriptomics", "natural organic matter"], "title": "AnalysisTypeEnum", "type": "string"}, "AnalyticalSample": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "AnalyticalSample", "type": "object"}, "ArchStrucEnum": {"description": "", "enum": ["building", "shed", "home"], "title": "ArchStrucEnum", "type": "string"}, "AttributeValue": {"additionalProperties": false, "description": "The value for any value of a attribute for a sample. This object can hold both the un-normalized atomic value and the structured value", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "AttributeValue", "type": "object"}, "BiolStatEnum": {"description": "", "enum": ["wild", "natural", "semi-natural", "inbred line", "breeder's line", "hybrid", "clonal selection", "mutant"], "title": "BiolStatEnum", "type": "string"}, "Biosample": {"additionalProperties": false, "description": "Biological source material which can be characterized by an experiment.", "properties": {"add_date": {"description": "The date on which the information was added to the database.", "type": "string"}, "agrochem_addition": {"$ref": "#/$defs/QuantityValue", "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications"}, "air_temp_regm": {"$ref": "#/$defs/QuantityValue", "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens"}, "al_sat": {"$ref": "#/$defs/QuantityValue", "description": "The relative abundance of aluminum in the sample"}, "al_sat_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining Aluminum saturation"}, "alkalinity": {"$ref": "#/$defs/QuantityValue", "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate"}, "alkalinity_method": {"$ref": "#/$defs/TextValue", "description": "Method used for alkalinity measurement"}, "alkyl_diethers": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of alkyl diethers"}, "alt": {"$ref": "#/$defs/QuantityValue", "description": "Altitude is a term used to identify heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air"}, "alternative_identifiers": {"description": "Unique identifier for a biosample submitted to additional resources. Matches the entity that has been submitted to NMDC", "items": {"type": "string"}, "type": "array"}, "aminopept_act": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of aminopeptidase activity"}, "ammonium": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of ammonium in the sample"}, "ammonium_nitrogen": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of ammonium nitrogen in the sample"}, "analysis_type": {"description": "Select all the data types associated or available for this biosample", "items": {"$ref": "#/$defs/AnalysisTypeEnum"}, "type": "array"}, "annual_precpt": {"$ref": "#/$defs/QuantityValue", "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps."}, "annual_temp": {"$ref": "#/$defs/QuantityValue", "description": "Mean annual temperature"}, "bacteria_carb_prod": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of bacterial carbon production"}, "biosample_categories": {"items": {"$ref": "#/$defs/BiosampleCategoryEnum"}, "type": "array"}, "biotic_regm": {"$ref": "#/$defs/TextValue", "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi."}, "biotic_relationship": {"$ref": "#/$defs/TextValue", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object"}, "bishomohopanol": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of bishomohopanol"}, "bromide": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of bromide"}, "calcium": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of calcium in the sample"}, "carb_nitro_ratio": {"$ref": "#/$defs/QuantityValue", "description": "Ratio of amount or concentrations of carbon to nitrogen"}, "chem_administration": {"$ref": "#/$defs/ControlledTermValue", "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi"}, "chloride": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of chloride in the sample"}, "chlorophyll": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of chlorophyll"}, "climate_environment": {"$ref": "#/$defs/TextValue", "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates"}, "collected_from": {"description": "The Site from which a Biosample was collected", "type": "string"}, "collection_date": {"$ref": "#/$defs/TimestampValue", "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant"}, "collection_date_inc": {"description": "Date the incubation was harvested/collected/ended. Only relevant for incubation samples.", "type": "string"}, "collection_time": {"description": "The time of sampling, either as an instance (single point) or interval.", "type": "string"}, "collection_time_inc": {"description": "Time the incubation was harvested/collected/ended. Only relevant for incubation samples.", "type": "string"}, "community": {"type": "string"}, "crop_rotation": {"$ref": "#/$defs/TextValue", "description": "Whether or not crop is rotated, and if yes, rotation schedule"}, "cur_land_use": {"$ref": "#/$defs/TextValue", "description": "Present state of sample site"}, "cur_vegetation": {"$ref": "#/$defs/TextValue", "description": "Vegetation classification from one or more standard classification systems, or agricultural crop"}, "cur_vegetation_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in vegetation classification"}, "density": {"$ref": "#/$defs/QuantityValue", "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)"}, "depth": {"$ref": "#/$defs/QuantityValue", "description": "The vertical distance below local surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples."}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "diss_carb_dioxide": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample"}, "diss_hydrogen": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of dissolved hydrogen"}, "diss_inorg_carb": {"$ref": "#/$defs/QuantityValue", "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter"}, "diss_inorg_phosp": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of dissolved inorganic phosphorus in the sample"}, "diss_org_carb": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid"}, "diss_org_nitro": {"$ref": "#/$defs/QuantityValue", "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2"}, "diss_oxygen": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of dissolved oxygen"}, "dna_absorb1": {"description": "260/280 measurement of DNA sample purity", "type": "string"}, "dna_absorb2": {"description": "260/230 measurement of DNA sample purity", "type": "string"}, "dna_collect_site": {"description": "Provide information on the site your DNA sample was collected from", "type": "string"}, "dna_concentration": {"maximum": 2000, "minimum": 0, "type": "string"}, "dna_cont_type": {"$ref": "#/$defs/DnaContTypeEnum", "description": "Tube or plate (96-well)"}, "dna_cont_well": {"type": "string"}, "dna_container_id": {"type": "string"}, "dna_dnase": {"$ref": "#/$defs/DnaDnaseEnum"}, "dna_isolate_meth": {"description": "Describe the method/protocol/kit used to extract DNA/RNA.", "type": "string"}, "dna_organisms": {"description": "List any organisms known or suspected to grow in co-culture, as well as estimated % of the organism in that culture.", "type": "string"}, "dna_project_contact": {"type": "string"}, "dna_samp_id": {"type": "string"}, "dna_sample_format": {"$ref": "#/$defs/DnaSampleFormatEnum", "description": "Solution in which the DNA sample has been suspended"}, "dna_sample_name": {"description": "Give the DNA sample a name that is meaningful to you. Sample names must be unique across all JGI projects and contain a-z, A-Z, 0-9, - and _ only.", "type": "string"}, "dna_seq_project": {"type": "string"}, "dna_seq_project_name": {"type": "string"}, "dna_seq_project_pi": {"type": "string"}, "dna_volume": {"maximum": 1000, "minimum": 0, "type": "string"}, "dnase_rna": {"$ref": "#/$defs/DnaseRnaEnum"}, "drainage_class": {"$ref": "#/$defs/TextValue", "description": "Drainage classification from a standard system such as the USDA system"}, "ecosystem": {"description": "An ecosystem is a combination of a physical environment (abiotic factors) and all the organisms (biotic factors) that interact with this environment. Ecosystem is in position 1/5 in a GOLD path.", "type": "string"}, "ecosystem_category": {"description": "Ecosystem categories represent divisions within the ecosystem based on specific characteristics of the environment from where an organism or sample is isolated. Ecosystem category is in position 2/5 in a GOLD path.", "type": "string"}, "ecosystem_subtype": {"description": "Ecosystem subtypes represent further subdivision of Ecosystem types into more distinct subtypes. Ecosystem subtype is in position 4/5 in a GOLD path.", "type": "string"}, "ecosystem_type": {"description": "Ecosystem types represent things having common characteristics within the Ecosystem Category. These common characteristics based grouping is still broad but specific to the characteristics of a given environment. Ecosystem type is in position 3/5 in a GOLD path.", "type": "string"}, "elev": {"$ref": "#/$defs/QuantityValue", "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit."}, "emsl_biosample_identifiers": {"description": "A list of identifiers for the biosample from the EMSL database. This is used to link the biosample, as modeled by NMDC, to the biosample in the planned EMSL NEXUS database.", "items": {"type": "string"}, "type": "array"}, "env_broad_scale": {"$ref": "#/$defs/ControlledIdentifiedTermValue", "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO\u2019s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS"}, "env_local_scale": {"$ref": "#/$defs/ControlledIdentifiedTermValue", "description": "Report the entity or entities which are in the sample or specimen\u2019s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS."}, "env_medium": {"$ref": "#/$defs/ControlledIdentifiedTermValue", "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)."}, "env_package": {"$ref": "#/$defs/TextValue", "description": "MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported", "pattern": "[air|built environment|host\\-associated|human\\-associated|human\\-skin|human\\-oral|human\\-gut|human\\-vaginal|hydrocarbon resources\\-cores|hydrocarbon resources\\-fluids\\/swabs|microbial mat\\/biofilm|misc environment|plant\\-associated|sediment|soil|wastewater\\/sludge|water]"}, "experimental_factor": {"$ref": "#/$defs/ControlledTermValue", "description": "Experimental factors are essentially the variable aspects of an experiment design which can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI). For a browser of EFO (v 2.95) terms, please see http://purl.bioontology.org/ontology/EFO; for a browser of OBI (v 2018-02-12) terms please see http://purl.bioontology.org/ontology/OBI"}, "experimental_factor_other": {"description": "Other details about your sample that you feel can't be accurately represented in the available columns.", "type": "string"}, "extreme_event": {"description": "Unusual physical events that may have affected microbial populations", "type": "string"}, "fao_class": {"$ref": "#/$defs/TextValue", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups"}, "filter_method": {"description": "Type of filter used or how the sample was filtered", "type": "string"}, "fire": {"description": "Historical and/or physical evidence of fire", "pattern": "^[12]\\d{3}(?:(?:-(?:0[1-9]|1[0-2]))(?:-(?:0[1-9]|[12]\\d|3[01]))?)?(\\s+to\\s+[12]\\d{3}(?:(?:-(?:0[1-9]|1[0-2]))(?:-(?:0[1-9]|[12]\\d|3[01]))?)?)?$", "type": "string"}, "flooding": {"description": "Historical and/or physical evidence of flooding", "type": "string"}, "gaseous_environment": {"$ref": "#/$defs/QuantityValue", "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens"}, "geo_loc_name": {"$ref": "#/$defs/TextValue", "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)"}, "glucosidase_act": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of glucosidase activity"}, "gold_biosample_identifiers": {"description": "Unique identifier for a biosample submitted to GOLD that matches the NMDC submitted biosample", "items": {"type": "string"}, "pattern": "^GOLD:Gb[0-9]+$", "type": "array"}, "growth_facil": {"$ref": "#/$defs/ControlledTermValue", "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research"}, "habitat": {"type": "string"}, "heavy_metals": {"description": "Heavy metals present in the sample and their concentrations.", "items": {"$ref": "#/$defs/QuantityValue"}, "type": "array"}, "heavy_metals_meth": {"description": "Reference or method used in determining heavy metals", "items": {"$ref": "#/$defs/TextValue"}, "type": "array"}, "host_name": {"type": "string"}, "humidity_regm": {"$ref": "#/$defs/QuantityValue", "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens"}, "id": {"description": "An NMDC assigned unique identifier for a biosample submitted to NMDC.", "pattern": "^nmdc:", "type": "string"}, "igsn_biosample_identifiers": {"description": "A list of identifiers for the biosample from the IGSN database.", "items": {"type": "string"}, "type": "array"}, "img_identifiers": {"description": "A list of identifiers that relate the biosample to records in the IMG database.", "items": {"type": "string"}, "type": "array"}, "insdc_biosample_identifiers": {"description": "identifiers for corresponding sample in INSDC", "items": {"type": "string"}, "pattern": "^biosample:SAM[NED]([A-Z])?[0-9]+$", "type": "array"}, "isotope_exposure": {"description": "List isotope exposure or addition applied to your sample.", "type": "string"}, "lat_lon": {"$ref": "#/$defs/GeolocationValue", "description": "This is currently a required field but it's not clear if this should be required for human hosts"}, "lbc_thirty": {"$ref": "#/$defs/QuantityValue", "description": "lime buffer capacity, determined after 30 minute incubation"}, "lbceq": {"$ref": "#/$defs/QuantityValue", "description": "lime buffer capacity, determined at equilibrium after 5 day incubation"}, "light_regm": {"$ref": "#/$defs/QuantityValue", "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality."}, "link_addit_analys": {"$ref": "#/$defs/TextValue", "description": "Link to additional analysis results performed on the sample"}, "link_class_info": {"$ref": "#/$defs/TextValue", "description": "Link to digitized soil maps or other soil classification information"}, "link_climate_info": {"$ref": "#/$defs/TextValue", "description": "Link to climate resource"}, "local_class": {"$ref": "#/$defs/TextValue", "description": "Soil classification based on local soil classification system"}, "local_class_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining the local soil classification"}, "location": {"type": "string"}, "magnesium": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of magnesium in the sample"}, "manganese": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of manganese in the sample"}, "mean_frict_vel": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of mean friction velocity"}, "mean_peak_frict_vel": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of mean peak friction velocity"}, "micro_biomass_c_meth": {"description": "Reference or method used in determining microbial biomass carbon", "type": "string"}, "micro_biomass_n_meth": {"description": "Reference or method used in determining microbial biomass nitrogen", "type": "string"}, "microbial_biomass_c": {"description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer.", "type": "string"}, "microbial_biomass_n": {"description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer.", "type": "string"}, "misc_param": {"$ref": "#/$defs/QuantityValue", "description": "Any other measurement performed or parameter collected, that is not listed here"}, "mod_date": {"description": "The last date on which the database information was modified.", "type": "string"}, "n_alkanes": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of n-alkanes; can include multiple n-alkanes"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "ncbi_taxonomy_name": {"type": "string"}, "nitrate": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of nitrate in the sample"}, "nitrate_nitrogen": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of nitrate nitrogen in the sample"}, "nitrite": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of nitrite in the sample"}, "nitrite_nitrogen": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of nitrite nitrogen in the sample"}, "non_microb_biomass": {"description": "Amount of biomass; should include the name for the part of biomass measured, e.g.insect, plant, total. Can include multiple measurements separated by ;", "type": "string"}, "non_microb_biomass_method": {"description": "Reference or method used in determining biomass", "type": "string"}, "org_matter": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of organic matter"}, "org_nitro": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of organic nitrogen"}, "org_nitro_method": {"description": "Method used for obtaining organic nitrogen", "type": "string"}, "organism_count": {"$ref": "#/$defs/QuantityValue", "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)"}, "other_treatment": {"description": "Other treatments applied to your samples that are not applicable to the provided fields", "type": "string"}, "oxy_stat_samp": {"$ref": "#/$defs/TextValue", "description": "Oxygenation status of sample"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "part_org_carb": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of particulate organic carbon"}, "perturbation": {"$ref": "#/$defs/TextValue", "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types"}, "petroleum_hydrocarb": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of petroleum hydrocarbon"}, "ph": {"$ref": "#/$defs/QuantityValue", "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid"}, "ph_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining ph"}, "phaeopigments": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of phaeopigments; can include multiple phaeopigments"}, "phosphate": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of phosphate"}, "phosplipid_fatt_acid": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of phospholipid fatty acids; can include multiple values"}, "pool_dna_extracts": {"$ref": "#/$defs/TextValue", "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given"}, "potassium": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of potassium in the sample"}, "pressure": {"$ref": "#/$defs/QuantityValue", "description": "Pressure to which the sample is subject to, in atmospheres"}, "profile_position": {"$ref": "#/$defs/TextValue", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas"}, "project_id": {"description": "Proposal IDs or names associated with dataset", "type": "string"}, "proport_woa_temperature": {"type": "string"}, "proposal_dna": {"type": "string"}, "proposal_rna": {"type": "string"}, "redox_potential": {"$ref": "#/$defs/QuantityValue", "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential"}, "rel_to_oxygen": {"$ref": "#/$defs/TextValue", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments"}, "replicate_number": {"description": "If sending biological replicates, indicate the rep number here.", "type": "string"}, "rna_absorb1": {"description": "260/280 measurement of RNA sample purity", "type": "string"}, "rna_absorb2": {"description": "260/230 measurement of RNA sample purity", "type": "string"}, "rna_collect_site": {"description": "Provide information on the site your RNA sample was collected from", "type": "string"}, "rna_concentration": {"maximum": 1000, "minimum": 0, "type": "string"}, "rna_cont_type": {"$ref": "#/$defs/RnaContTypeEnum", "description": "Tube or plate (96-well)"}, "rna_cont_well": {"type": "string"}, "rna_container_id": {"type": "string"}, "rna_isolate_meth": {"description": "Describe the method/protocol/kit used to extract DNA/RNA.", "type": "string"}, "rna_organisms": {"description": "List any organisms known or suspected to grow in co-culture, as well as estimated % of the organism in that culture.", "type": "string"}, "rna_project_contact": {"type": "string"}, "rna_samp_id": {"type": "string"}, "rna_sample_format": {"$ref": "#/$defs/RnaSampleFormatEnum", "description": "Solution in which the RNA sample has been suspended"}, "rna_sample_name": {"description": "Give the RNA sample a name that is meaningful to you. Sample names must be unique across all JGI projects and contain a-z, A-Z, 0-9, - and _ only.", "maximum": 2000, "minimum": 0, "type": "string"}, "rna_seq_project": {"type": "string"}, "rna_seq_project_name": {"type": "string"}, "rna_seq_project_pi": {"type": "string"}, "rna_volume": {"type": "string"}, "salinity": {"$ref": "#/$defs/QuantityValue", "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater."}, "salinity_category": {"description": "Categorcial description of the sample's salinity. Examples: halophile, halotolerant, hypersaline, huryhaline", "type": "string"}, "salinity_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining salinity"}, "samp_collec_method": {"description": "The method employed for collecting the sample.", "type": "string"}, "samp_mat_process": {"$ref": "#/$defs/ControlledTermValue", "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed."}, "samp_name": {"description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name.", "type": "string"}, "samp_size": {"$ref": "#/$defs/QuantityValue", "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected."}, "samp_store_dur": {"$ref": "#/$defs/TextValue", "description": "Duration for which the sample was stored"}, "samp_store_loc": {"$ref": "#/$defs/TextValue", "description": "Location at which sample was stored, usually name of a specific freezer/room"}, "samp_store_temp": {"$ref": "#/$defs/QuantityValue", "description": "Temperature at which sample was stored, e.g. -80 degree Celsius"}, "samp_vol_we_dna_ext": {"$ref": "#/$defs/QuantityValue", "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (mixs:0000001)."}, "sample_collection_site": {"type": "string"}, "sample_link": {"description": "JsonObj()", "items": {"type": "string"}, "type": "array"}, "sample_shipped": {"description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample sent to EMSL.", "type": "string"}, "sample_type": {"$ref": "#/$defs/SampleTypeEnum", "description": "Type of sample being submitted"}, "season_precpt": {"$ref": "#/$defs/QuantityValue", "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps."}, "season_temp": {"$ref": "#/$defs/QuantityValue", "description": "Mean seasonal temperature"}, "sieving": {"$ref": "#/$defs/QuantityValue", "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved"}, "size_frac_low": {"$ref": "#/$defs/QuantityValue", "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample"}, "size_frac_up": {"$ref": "#/$defs/QuantityValue", "description": "Refers to the mesh/pore size used to retain the sample. Materials smaller than the size threshold are excluded from the sample"}, "slope_aspect": {"$ref": "#/$defs/QuantityValue", "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees). - This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration."}, "slope_gradient": {"$ref": "#/$defs/QuantityValue", "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer"}, "sodium": {"$ref": "#/$defs/QuantityValue", "description": "Sodium concentration in the sample"}, "soil_type": {"$ref": "#/$defs/TextValue", "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes."}, "soil_type_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining soil series name or other lower-level classification"}, "soluble_iron_micromol": {"type": "string"}, "source_mat_id": {"$ref": "#/$defs/TextValue", "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)."}, "specific_ecosystem": {"description": "Specific ecosystems represent specific features of the environment like aphotic zone in an ocean or gastric mucosa within a host digestive system. Specific ecosystem is in position 5/5 in a GOLD path.", "type": "string"}, "start_date_inc": {"description": "Date the incubation was started. Only relevant for incubation samples.", "type": "string"}, "start_time_inc": {"description": "Time the incubation was started. Only relevant for incubation samples.", "type": "string"}, "store_cond": {"$ref": "#/$defs/TextValue", "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)."}, "subsurface_depth": {"$ref": "#/$defs/QuantityValue"}, "sulfate": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of sulfate in the sample"}, "sulfide": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of sulfide in the sample"}, "technical_reps": {"description": "If sending technical replicates of the same sample, indicate the replicate count.", "type": "string"}, "temp": {"$ref": "#/$defs/QuantityValue", "description": "Temperature of the sample at the time of sampling."}, "tidal_stage": {"$ref": "#/$defs/TextValue", "description": "Stage of tide"}, "tillage": {"$ref": "#/$defs/TextValue", "description": "Note method(s) used for tilling"}, "tot_carb": {"$ref": "#/$defs/QuantityValue", "description": "Total carbon content"}, "tot_depth_water_col": {"$ref": "#/$defs/QuantityValue", "description": "Measurement of total depth of water column"}, "tot_diss_nitro": {"$ref": "#/$defs/QuantityValue", "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen"}, "tot_nitro_cont_meth": {"description": "Reference or method used in determining the total nitrogen", "type": "string"}, "tot_nitro_content": {"$ref": "#/$defs/QuantityValue", "description": "Total nitrogen content of the sample"}, "tot_org_c_meth": {"$ref": "#/$defs/TextValue", "description": "Reference or method used in determining total organic carbon"}, "tot_org_carb": {"$ref": "#/$defs/QuantityValue", "description": "Definition for soil: total organic carbon content of the soil, definition otherwise: total organic carbon content"}, "tot_phosp": {"$ref": "#/$defs/QuantityValue", "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "water_cont_soil_meth": {"description": "Reference or method used in determining the water content of soil", "type": "string"}, "water_content": {"description": "Water content measurement", "type": "string"}, "watering_regm": {"$ref": "#/$defs/QuantityValue", "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens"}, "zinc": {"$ref": "#/$defs/QuantityValue", "description": "Concentration of zinc in the sample"}}, "required": ["part_of", "id", "env_broad_scale", "env_local_scale", "env_medium"], "title": "Biosample", "type": "object"}, "BiosampleCategoryEnum": {"description": "Funding-based, sample location-based, or experimental method-based defined categories", "enum": ["LTER", "SIP", "SFA", "FICUS", "NEON"], "title": "BiosampleCategoryEnum", "type": "string"}, "BiosampleProcessing": {"additionalProperties": false, "description": "A process that takes one or more biosamples as inputs and generates one or as outputs. Examples of outputs include samples cultivated from another sample or data objects created by instruments runs.", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "BiosampleProcessing", "type": "object"}, "BioticRelationshipEnum": {"description": "", "enum": ["free living", "parasite", "commensal", "symbiont"], "title": "BioticRelationshipEnum", "type": "string"}, "BooleanValue": {"additionalProperties": false, "description": "A value that is a boolean", "properties": {"has_boolean_value": {"description": "Links a quantity value to a boolean", "type": "boolean"}, "has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "BooleanValue", "type": "object"}, "BuildDocsEnum": {"description": "", "enum": ["building information model", "commissioning report", "complaint logs", "contract administration", "cost estimate", "janitorial schedules or logs", "maintenance plans", "schedule", "sections", "shop drawings", "submittals", "ventilation system", "windows"], "title": "BuildDocsEnum", "type": "string"}, "BuildOccupTypeEnum": {"description": "", "enum": ["office", "market", "restaurant", "residence", "school", "residential", "commercial", "low rise", "high rise", "wood framed", "health care", "airport", "sports complex"], "title": "BuildOccupTypeEnum", "type": "string"}, "BuildingSettingEnum": {"description": "", "enum": ["urban", "suburban", "exurban", "rural"], "title": "BuildingSettingEnum", "type": "string"}, "CeilCondEnum": {"description": "", "enum": ["new", "visible wear", "needs repair", "damaged", "rupture"], "title": "CeilCondEnum", "type": "string"}, "CeilFinishMatEnum": {"description": "", "enum": ["drywall", "mineral fibre", "tiles", "PVC", "plasterboard", "metal", "fiberglass", "stucco", "mineral wool/calcium silicate", "wood"], "title": "CeilFinishMatEnum", "type": "string"}, "CeilTextureEnum": {"description": "", "enum": ["crows feet", "crows-foot stomp", "double skip", "hawk and trowel", "knockdown", "popcorn", "orange peel", "rosebud stomp", "Santa-Fe texture", "skip trowel", "smooth", "stomp knockdown", "swirl"], "title": "CeilTextureEnum", "type": "string"}, "CeilTypeEnum": {"description": "", "enum": ["cathedral", "dropped", "concave", "barrel-shaped", "coffered", "cove", "stretched"], "title": "CeilTypeEnum", "type": "string"}, "ChemicalEntity": {"additionalProperties": false, "description": "An atom or molecule that can be represented with a chemical formula. Include lipids, glycans, natural products, drugs. There may be different terms for distinct acid-base forms, protonation states", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "ChemicalEntity", "type": "object"}, "CollectingBiosamplesFromSite": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "has_inputs": {"items": {"type": "string"}, "type": "array"}, "has_outputs": {"items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "participating_agent": {"$ref": "#/$defs/Agent"}}, "required": ["has_inputs", "has_outputs", "id"], "title": "CollectingBiosamplesFromSite", "type": "object"}, "ContainerTypeEnum": {"description": "", "enum": ["screw_top_conical"], "title": "ContainerTypeEnum", "type": "string"}, "ControlledIdentifiedTermValue": {"additionalProperties": false, "description": "A controlled term or class from an ontology, requiring the presence of term with an id", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "term": {"$ref": "#/$defs/OntologyClass", "description": "pointer to an ontology class"}, "was_generated_by": {"type": "string"}}, "required": ["term"], "title": "ControlledIdentifiedTermValue", "type": "object"}, "ControlledTermValue": {"additionalProperties": false, "description": "A controlled term or class from an ontology", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "term": {"$ref": "#/$defs/OntologyClass", "description": "pointer to an ontology class"}, "was_generated_by": {"type": "string"}}, "title": "ControlledTermValue", "type": "object"}, "CreditAssociation": {"additionalProperties": false, "description": "This class supports binding associated researchers to studies. There will be at least a slot for a CRediT Contributor Role (https://casrai.org/credit/) and for a person value Specifically see the associated researchers tab on the NMDC_SampleMetadata-V4_CommentsForUpdates at https://docs.google.com/spreadsheets/d/1INlBo5eoqn2efn4H2P2i8rwRBtnbDVTqXrochJEAPko/edit#gid=0", "properties": {"applied_role": {"$ref": "#/$defs/CreditEnum"}, "applied_roles": {"items": {"$ref": "#/$defs/CreditEnum"}, "type": "array"}, "applies_to_person": {"$ref": "#/$defs/PersonValue"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}}, "required": ["applies_to_person", "applied_roles"], "title": "CreditAssociation", "type": "object"}, "CreditEnum": {"description": "", "enum": ["Conceptualization", "Data curation", "Formal Analysis", "Funding acquisition", "Investigation", "Methodology", "Project administration", "Resources", "Software", "Supervision", "Validation", "Visualization", "Writing original draft", "Writing review and editing", "Principal Investigator", "Submitter"], "title": "CreditEnum", "type": "string"}, "CurLandUseEnum": {"description": "", "enum": ["cities", "farmstead", "industrial areas", "roads/railroads", "rock", "sand", "gravel", "mudflats", "salt flats", "badlands", "permanent snow or ice", "saline seeps", "mines/quarries", "oil waste areas", "small grains", "row crops", "vegetable crops", "horticultural plants (e.g. tulips)", "marshlands (grass,sedges,rushes)", "tundra (mosses,lichens)", "rangeland", "pastureland (grasslands used for livestock grazing)", "hayland", "meadows (grasses,alfalfa,fescue,bromegrass,timothy)", "shrub land (e.g. mesquite,sage-brush,creosote bush,shrub oak,eucalyptus)", "successional shrub land (tree saplings,hazels,sumacs,chokecherry,shrub dogwoods,blackberries)", "shrub crops (blueberries,nursery ornamentals,filberts)", "vine crops (grapes)", "conifers (e.g. pine,spruce,fir,cypress)", "hardwoods (e.g. oak,hickory,elm,aspen)", "intermixed hardwood and conifers", "tropical (e.g. mangrove,palms)", "rainforest (evergreen forest receiving greater than 406 cm annual rainfall)", "swamp (permanent or semi-permanent water body dominated by woody plants)", "crop trees (nuts,fruit,christmas trees,nursery trees)"], "title": "CurLandUseEnum", "type": "string"}, "DataObject": {"additionalProperties": false, "description": "An object that primarily consists of symbols that represent information. Files, records, and omics data are examples of data objects.", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "compression_type": {"description": "If provided, specifies the compression type", "type": "string"}, "data_object_type": {"$ref": "#/$defs/FileTypeEnum", "description": "The type of file represented by the data object."}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "file_size_bytes": {"description": "Size of the file in bytes", "type": "integer"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "md5_checksum": {"description": "MD5 checksum of file (pre-compressed)", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "url": {"type": "string"}, "was_generated_by": {"type": "string"}}, "required": ["id", "name", "description"], "title": "DataObject", "type": "object"}, "Database": {"additionalProperties": false, "description": "An abstract holder for any set of metadata and data. It does not need to correspond to an actual managed database top level holder class. When translated to JSON-Schema this is the 'root' object. It should contain pointers to other objects of interest", "properties": {"activity_set": {"description": "This property links a database object to the set of workflow activities.", "items": {"$ref": "#/$defs/WorkflowExecutionActivity"}, "type": "array"}, "biosample_set": {"description": "This property links a database object to the set of samples within it.", "items": {"$ref": "#/$defs/Biosample"}, "type": "array"}, "collecting_biosamples_from_site_set": {"items": {"$ref": "#/$defs/CollectingBiosamplesFromSite"}, "type": "array"}, "data_object_set": {"description": "This property links a database object to the set of data objects within it.", "items": {"$ref": "#/$defs/DataObject"}, "type": "array"}, "dissolving_activity_set": {"items": {"$ref": "#/$defs/DissolvingActivity"}, "type": "array"}, "field_research_site_set": {"items": {"$ref": "#/$defs/FieldResearchSite"}, "type": "array"}, "functional_annotation_set": {"description": "This property links a database object to the set of all functional annotations", "items": {"$ref": "#/$defs/FunctionalAnnotation"}, "type": "array"}, "genome_feature_set": {"description": "This property links a database object to the set of all features", "items": {"$ref": "#/$defs/GenomeFeature"}, "type": "array"}, "mags_activity_set": {"description": "This property links a database object to the set of MAGs analysis activities.", "items": {"$ref": "#/$defs/MagsAnalysisActivity"}, "type": "array"}, "material_sample_set": {"items": {"$ref": "#/$defs/MaterialSample"}, "type": "array"}, "material_sampling_activity_set": {"items": {"$ref": "#/$defs/MaterialSamplingActivity"}, "type": "array"}, "metabolomics_analysis_activity_set": {"description": "This property links a database object to the set of metabolomics analysis activities.", "items": {"$ref": "#/$defs/MetabolomicsAnalysisActivity"}, "type": "array"}, "metagenome_annotation_activity_set": {"description": "This property links a database object to the set of metagenome annotation activities.", "items": {"$ref": "#/$defs/MetagenomeAnnotationActivity"}, "type": "array"}, "metagenome_assembly_set": {"description": "This property links a database object to the set of metagenome assembly activities.", "items": {"$ref": "#/$defs/MetagenomeAssembly"}, "type": "array"}, "metaproteomics_analysis_activity_set": {"description": "This property links a database object to the set of metaproteomics analysis activities.", "items": {"$ref": "#/$defs/MetaproteomicsAnalysisActivity"}, "type": "array"}, "metatranscriptome_activity_set": {"description": "TODO", "items": {"$ref": "#/$defs/MetatranscriptomeActivity"}, "type": "array"}, "nom_analysis_activity_set": {"description": "This property links a database object to the set of natural organic matter (NOM) analysis activities.", "items": {"$ref": "#/$defs/NomAnalysisActivity"}, "type": "array"}, "omics_processing_set": {"description": "This property links a database object to the set of omics processings within it.", "items": {"$ref": "#/$defs/OmicsProcessing"}, "type": "array"}, "reaction_activity_set": {"items": {"$ref": "#/$defs/ReactionActivity"}, "type": "array"}, "read_based_taxonomy_analysis_activity_set": {"description": "This property links a database object to the set of read based analysis activities.", "items": {"$ref": "#/$defs/ReadBasedTaxonomyAnalysisActivity"}, "type": "array"}, "read_qc_analysis_activity_set": {"description": "This property links a database object to the set of read QC analysis activities.", "items": {"$ref": "#/$defs/ReadQcAnalysisActivity"}, "type": "array"}, "study_set": {"description": "This property links a database object to the set of studies within it.", "items": {"$ref": "#/$defs/Study"}, "type": "array"}}, "title": "Database", "type": "object"}, "DeposEnvEnum": {"description": "", "enum": ["Continental - Alluvial", "Continental - Aeolian", "Continental - Fluvial", "Continental - Lacustrine", "Transitional - Deltaic", "Transitional - Tidal", "Transitional - Lagoonal", "Transitional - Beach", "Transitional - Lake", "Marine - Shallow", "Marine - Deep", "Marine - Reef", "Other - Evaporite", "Other - Glacial", "Other - Volcanic", "other"], "title": "DeposEnvEnum", "type": "string"}, "DeviceTypeEnum": {"description": "", "enum": ["orbital_shaker", "thermomixer"], "title": "DeviceTypeEnum", "type": "string"}, "DissolvingActivity": {"additionalProperties": false, "description": "", "properties": {"dissolution_aided_by": {"$ref": "#/$defs/LabDevice"}, "dissolution_reagent": {"$ref": "#/$defs/SolventEnum"}, "dissolution_volume": {"$ref": "#/$defs/QuantityValue"}, "dissolved_in": {"$ref": "#/$defs/MaterialContainer"}, "material_input": {"type": "string"}, "material_output": {"type": "string"}}, "title": "DissolvingActivity", "type": "object"}, "DnaContTypeEnum": {"description": "", "enum": ["plate", "tube"], "title": "DnaContTypeEnum", "type": "string"}, "DnaDnaseEnum": {"description": "", "enum": ["no", "yes"], "title": "DnaDnaseEnum", "type": "string"}, "DnaSampleFormatEnum": {"description": "", "enum": ["10 mM Tris-HCl", "DNAStable", "Ethanol", "Low EDTA TE", "MDA reaction buffer", "PBS", "Pellet", "RNAStable", "TE", "Water"], "title": "DnaSampleFormatEnum", "type": "string"}, "DnaseRnaEnum": {"description": "", "enum": ["no", "yes"], "title": "DnaseRnaEnum", "type": "string"}, "DoorCompTypeEnum": {"description": "", "enum": ["metal covered", "revolving", "sliding", "telescopic"], "title": "DoorCompTypeEnum", "type": "string"}, "DoorCondEnum": {"description": "", "enum": ["damaged", "needs repair", "new", "rupture", "visible wear"], "title": "DoorCondEnum", "type": "string"}, "DoorDirectEnum": {"description": "", "enum": ["inward", "outward", "sideways"], "title": "DoorDirectEnum", "type": "string"}, "DoorLocEnum": {"description": "", "enum": ["north", "south", "east", "west"], "title": "DoorLocEnum", "type": "string"}, "DoorMatEnum": {"description": "", "enum": ["aluminum", "cellular PVC", "engineered plastic", "fiberboard", "fiberglass", "metal", "thermoplastic alloy", "vinyl", "wood", "wood/plastic composite"], "title": "DoorMatEnum", "type": "string"}, "DoorMoveEnum": {"description": "", "enum": ["collapsible", "folding", "revolving", "rolling shutter", "sliding", "swinging"], "title": "DoorMoveEnum", "type": "string"}, "DoorTypeEnum": {"description": "", "enum": ["composite", "metal", "wooden"], "title": "DoorTypeEnum", "type": "string"}, "DoorTypeMetalEnum": {"description": "", "enum": ["collapsible", "corrugated steel", "hollow", "rolling shutters", "steel plate"], "title": "DoorTypeMetalEnum", "type": "string"}, "DoorTypeWoodEnum": {"description": "", "enum": ["bettened and ledged", "battened", "ledged and braced", "ledged and framed", "ledged, braced and frame", "framed and paneled", "glashed or sash", "flush", "louvered", "wire gauged"], "title": "DoorTypeWoodEnum", "type": "string"}, "DrainageClassEnum": {"description": "", "enum": ["very poorly", "poorly", "somewhat poorly", "moderately well", "well", "excessively drained"], "title": "DrainageClassEnum", "type": "string"}, "DrawingsEnum": {"description": "", "enum": ["operation", "as built", "construction", "bid", "design", "building navigation map", "diagram", "sketch"], "title": "DrawingsEnum", "type": "string"}, "EnvironmentalMaterialTerm": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "EnvironmentalMaterialTerm", "type": "object"}, "ExtWallOrientEnum": {"description": "", "enum": ["north", "south", "east", "west", "northeast", "southeast", "southwest", "northwest"], "title": "ExtWallOrientEnum", "type": "string"}, "ExtWindowOrientEnum": {"description": "", "enum": ["north", "south", "east", "west", "northeast", "southeast", "southwest", "northwest"], "title": "ExtWindowOrientEnum", "type": "string"}, "FaoClassEnum": {"description": "", "enum": ["Acrisols", "Andosols", "Arenosols", "Cambisols", "Chernozems", "Ferralsols", "Fluvisols", "Gleysols", "Greyzems", "Gypsisols", "Histosols", "Kastanozems", "Lithosols", "Luvisols", "Nitosols", "Phaeozems", "Planosols", "Podzols", "Podzoluvisols", "Rankers", "Regosols", "Rendzinas", "Solonchaks", "Solonetz", "Vertisols", "Yermosols"], "title": "FaoClassEnum", "type": "string"}, "FieldResearchSite": {"additionalProperties": false, "description": "A site, outside of a laboratory, from which biosamples may be collected.", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "FieldResearchSite", "type": "object"}, "FileTypeEnum": {"description": "", "enum": ["Metagenome Raw Reads", "FT ICR-MS Analysis Results", "GC-MS Metabolomics Results", "Metaproteomics Workflow Statistics", "Protein Report", "Peptide Report", "Unfiltered Metaproteomics Results", "Read Count and RPKM", "QC non-rRNA R2", "QC non-rRNA R1", "Metagenome Bins", "CheckM Statistics", "GOTTCHA2 Krona Plot", "GOTTCHA2 Classification Report", "GOTTCHA2 Report Full", "Kraken2 Krona Plot", "Centrifuge Krona Plot", "Centrifuge output report file", "Kraken2 Classification Report", "Kraken2 Taxonomic Classification", "Centrifuge Classification Report", "Centrifuge Taxonomic Classification", "Structural Annotation GFF", "Functional Annotation GFF", "Annotation Amino Acid FASTA", "Annotation Enzyme Commission", "Annotation KEGG Orthology", "Assembly Coverage BAM", "Assembly AGP", "Assembly Scaffolds", "Assembly Contigs", "Assembly Coverage Stats", "Filtered Sequencing Reads", "QC Statistics", "TIGRFam Annotation GFF", "CRT Annotation GFF", "Genmark Annotation GFF", "Prodigal Annotation GFF", "TRNA Annotation GFF", "Misc Annotation GFF", "RFAM Annotation GFF", "TMRNA Annotation GFF", "KO_EC Annotation GFF", "Product Names", "Gene Phylogeny tsv", "Crisprt Terms", "Clusters of Orthologous Groups (COG) Annotation GFF", "CATH FunFams (Functional Families) Annotation GFF", "SUPERFam Annotation GFF", "SMART Annotation GFF", "Pfam Annotation GFF", "Direct Infusion FT ICR-MS Raw Data"], "title": "FileTypeEnum", "type": "string"}, "FilterTypeEnum": {"description": "", "enum": ["particulate air filter", "chemical air filter", "low-MERV pleated media", "HEPA", "electrostatic", "gas-phase or ultraviolet air treatments"], "title": "FilterTypeEnum", "type": "string"}, "FloorCondEnum": {"description": "", "enum": ["new", "visible wear", "needs repair", "damaged", "rupture"], "title": "FloorCondEnum", "type": "string"}, "FloorFinishMatEnum": {"description": "", "enum": ["tile", "wood strip or parquet", "carpet", "rug", "laminate wood", "lineoleum", "vinyl composition tile", "sheet vinyl", "stone", "bamboo", "cork", "terrazo", "concrete", "none", "sealed", "clear finish", "paint", "none or unfinished"], "title": "FloorFinishMatEnum", "type": "string"}, "FloorStrucEnum": {"description": "", "enum": ["balcony", "floating floor", "glass floor", "raised floor", "sprung floor", "wood-framed", "concrete"], "title": "FloorStrucEnum", "type": "string"}, "FloorWaterMoldEnum": {"description": "", "enum": ["mold odor", "wet floor", "water stains", "wall discoloration", "floor discoloration", "ceiling discoloration", "peeling paint or wallpaper", "bulging walls", "condensation"], "title": "FloorWaterMoldEnum", "type": "string"}, "FreqCleanEnum": {"description": "", "enum": ["Daily", "Weekly", "Monthly", "Quarterly", "Annually", "other"], "title": "FreqCleanEnum", "type": "string"}, "FunctionalAnnotation": {"additionalProperties": false, "description": "An assignment of a function term (e.g. reaction or pathway) that is executed by a gene product, or which the gene product plays an active role in. Functional annotations can be assigned manually by curators, or automatically in workflows. In the context of NMDC, all function annotation is performed automatically, typically using HMM or Blast type methods", "properties": {"has_function": {"pattern": "^(KEGG_PATHWAY:\\w{2,4}\\d{5}|KEGG.REACTION:R\\d+|RHEA:\\d{5}|MetaCyc:[A-Za-z0-9+_.%-:]+|EC:\\d{1,2}(\\.\\d{0,3}){0,3}|GO:\\d{7}|MetaNetX:(MNXR\\d+|EMPTY)|SEED:\\w+|KEGG\\.ORTHOLOGY:K\\d+|EGGNOG:\\w+|PFAM:PF\\d{5}|TIGRFAM:TIGR\\d+|SUPFAM:\\w+|CATH:[1-6]\\.[0-9]+\\.[0-9]+\\.[0-9]+|PANTHER.FAMILY:PTHR\\d{5}(\\:SF\\d{1,3})?)$", "type": "string"}, "subject": {"type": "string"}, "was_generated_by": {"description": "provenance for the annotation.", "type": "string"}}, "title": "FunctionalAnnotation", "type": "object"}, "FurnitureEnum": {"description": "", "enum": ["cabinet", "chair", "desks"], "title": "FurnitureEnum", "type": "string"}, "GenderRestroomEnum": {"description": "", "enum": ["all gender", "female", "gender neurtral", "male", "male and female", "unisex"], "title": "GenderRestroomEnum", "type": "string"}, "GeneProduct": {"additionalProperties": false, "description": "A molecule encoded by a gene that has an evolved function", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "GeneProduct", "type": "object"}, "GenomeFeature": {"additionalProperties": false, "description": "A feature localized to an interval along a genome", "title": "GenomeFeature", "type": "object"}, "GeolocationValue": {"additionalProperties": false, "description": "A normalized value for a location on the earth's surface", "properties": {"has_raw_value": {"description": "The raw value for a geolocation should follow {lat} {long}", "type": "string"}, "latitude": {"description": "latitude", "type": "number"}, "longitude": {"description": "longitude", "type": "number"}, "was_generated_by": {"type": "string"}}, "title": "GeolocationValue", "type": "object"}, "GrowthHabitEnum": {"description": "", "enum": ["erect", "semi-erect", "spreading", "prostrate"], "title": "GrowthHabitEnum", "type": "string"}, "HandidnessEnum": {"description": "", "enum": ["ambidexterity", "left handedness", "mixed-handedness", "right handedness"], "title": "HandidnessEnum", "type": "string"}, "HcProducedEnum": {"description": "", "enum": ["Oil", "Gas-Condensate", "Gas", "Bitumen", "Coalbed Methane", "other"], "title": "HcProducedEnum", "type": "string"}, "HcrEnum": {"description": "", "enum": ["Oil Reservoir", "Gas Reservoir", "Oil Sand", "Coalbed", "Shale", "Tight Oil Reservoir", "Tight Gas Reservoir", "other"], "title": "HcrEnum", "type": "string"}, "HcrGeolAgeEnum": {"description": "", "enum": ["Archean", "Cambrian", "Carboniferous", "Cenozoic", "Cretaceous", "Devonian", "Jurassic", "Mesozoic", "Neogene", "Ordovician", "Paleogene", "Paleozoic", "Permian", "Precambrian", "Proterozoic", "Silurian", "Triassic", "other"], "title": "HcrGeolAgeEnum", "type": "string"}, "HeatCoolTypeEnum": {"description": "", "enum": ["radiant system", "heat pump", "forced air system", "steam forced heat", "wood stove"], "title": "HeatCoolTypeEnum", "type": "string"}, "HeatDelivLocEnum": {"description": "", "enum": ["north", "south", "east", "west"], "title": "HeatDelivLocEnum", "type": "string"}, "HorizonEnum": {"description": "", "enum": ["O horizon", "A horizon", "E horizon", "B horizon", "C horizon", "R layer", "Permafrost"], "title": "HorizonEnum", "type": "string"}, "HostSexEnum": {"description": "", "enum": ["female", "hermaphrodite", "male", "neuter"], "title": "HostSexEnum", "type": "string"}, "ImageValue": {"additionalProperties": false, "description": "An attribute value representing an image.", "properties": {"description": {"description": "a human-readable description of a thing", "type": "string"}, "display_order": {"description": "When rendering information, this attribute to specify the order in which the information should be rendered.", "type": "string"}, "has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "url": {"type": "string"}, "was_generated_by": {"type": "string"}}, "title": "ImageValue", "type": "object"}, "IndoorSpaceEnum": {"description": "", "enum": ["bedroom", "office", "bathroom", "foyer", "kitchen", "locker room", "hallway", "elevator"], "title": "IndoorSpaceEnum", "type": "string"}, "IndoorSurfEnum": {"description": "", "enum": ["cabinet", "ceiling", "counter top", "door", "shelving", "vent cover", "window", "wall"], "title": "IndoorSurfEnum", "type": "string"}, "Instrument": {"additionalProperties": false, "description": "A material entity that is designed to perform a function in a scientific investigation, but is not a reagent[OBI].", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "Instrument", "type": "object"}, "IntWallCondEnum": {"description": "", "enum": ["new", "visible wear", "needs repair", "damaged", "rupture"], "title": "IntWallCondEnum", "type": "string"}, "IntegerValue": {"additionalProperties": false, "description": "A value that is an integer", "properties": {"has_numeric_value": {"description": "Links a quantity value to a number", "type": "number"}, "has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "IntegerValue", "type": "object"}, "LabDevice": {"additionalProperties": false, "description": "", "properties": {"activity_speed": {"$ref": "#/$defs/QuantityValue"}, "activity_temperature": {"$ref": "#/$defs/QuantityValue"}, "activity_time": {"$ref": "#/$defs/QuantityValue"}, "device_type": {"$ref": "#/$defs/DeviceTypeEnum"}}, "title": "LabDevice", "type": "object"}, "LightTypeEnum": {"description": "", "enum": ["natural light", "electric light", "desk lamp", "flourescent lights", "none"], "title": "LightTypeEnum", "type": "string"}, "LithologyEnum": {"description": "", "enum": ["Basement", "Chalk", "Chert", "Coal", "Conglomerate", "Diatomite", "Dolomite", "Limestone", "Sandstone", "Shale", "Siltstone", "Volcanic", "other"], "title": "LithologyEnum", "type": "string"}, "MagBin": {"additionalProperties": false, "description": "", "properties": {"bin_name": {"type": "string"}, "bin_quality": {"type": "string"}, "completeness": {"type": "number"}, "contamination": {"type": "number"}, "gene_count": {"type": "integer"}, "gtdbtk_class": {"type": "string"}, "gtdbtk_domain": {"type": "string"}, "gtdbtk_family": {"type": "string"}, "gtdbtk_genus": {"type": "string"}, "gtdbtk_order": {"type": "string"}, "gtdbtk_phylum": {"type": "string"}, "gtdbtk_species": {"type": "string"}, "num_16s": {"type": "integer"}, "num_23s": {"type": "integer"}, "num_5s": {"type": "integer"}, "num_t_rna": {"type": "integer"}, "number_of_contig": {"type": "integer"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}}, "title": "MagBin", "type": "object"}, "MagsAnalysisActivity": {"additionalProperties": false, "description": "A workflow execution activity that uses computational binning tools to group assembled contigs into genomes", "properties": {"binned_contig_num": {"type": "integer"}, "ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "input_contig_num": {"type": "integer"}, "low_depth_contig_num": {"type": "integer"}, "mags_list": {"items": {"$ref": "#/$defs/MagBin"}, "type": "array"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "too_short_contig_num": {"type": "integer"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "unbinned_contig_num": {"type": "integer"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MagsAnalysisActivity", "type": "object"}, "MaterialContainer": {"additionalProperties": false, "description": "", "properties": {"container_size": {"$ref": "#/$defs/QuantityValue"}, "container_type": {"$ref": "#/$defs/ContainerTypeEnum"}}, "title": "MaterialContainer", "type": "object"}, "MaterialEntity": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "MaterialEntity", "type": "object"}, "MaterialSample": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "MaterialSample", "type": "object"}, "MaterialSamplingActivity": {"additionalProperties": false, "description": "", "properties": {"amount_collected": {"$ref": "#/$defs/QuantityValue"}, "biosample_input": {"type": "string"}, "collected_into": {"$ref": "#/$defs/MaterialContainer"}, "material_output": {"type": "string"}, "sampling_method": {"$ref": "#/$defs/SamplingMethodEnum"}}, "title": "MaterialSamplingActivity", "type": "object"}, "MechStrucEnum": {"description": "", "enum": ["subway", "coach", "carriage", "elevator", "escalator", "boat", "train", "car", "bus"], "title": "MechStrucEnum", "type": "string"}, "MetaboliteQuantification": {"additionalProperties": false, "description": "This is used to link a metabolomics analysis workflow to a specific metabolite", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}}, "title": "MetaboliteQuantification", "type": "object"}, "MetabolomicsAnalysisActivity": {"additionalProperties": false, "description": "", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"description": "The instrument used to collect the data used in the analysis", "type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetabolomicsAnalysisActivity", "type": "object"}, "MetagenomeAnnotationActivity": {"additionalProperties": false, "description": "A workflow execution activity that provides functional and structural annotation of assembled metagenome contigs", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "gold_analysis_project_identifiers": {"description": "identifiers for corresponding analysis project in GOLD", "items": {"type": "string"}, "pattern": "^GOLD:Ga[0-9]+$", "type": "array"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetagenomeAnnotationActivity", "type": "object"}, "MetagenomeAssembly": {"additionalProperties": false, "description": "A workflow execution activity that converts sequencing reads into an assembled metagenome.", "properties": {"asm_score": {"description": "A score for comparing metagenomic assembly quality from same sample.", "type": "number"}, "contig_bp": {"description": "Total size in bp of all contigs.", "type": "number"}, "contigs": {"description": "The sum of the (length*log(length)) of all contigs, times some constant. Increase the contiguity, the score will increase", "type": "number"}, "ctg_l50": {"description": "Given a set of contigs, the L50 is defined as the sequence length of the shortest contig at 50% of the total genome length.", "type": "number"}, "ctg_l90": {"description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all contigs of that length or longer contains at least 90% of the sum of the lengths of all contigs.", "type": "number"}, "ctg_logsum": {"description": "Maximum contig length.", "type": "number"}, "ctg_max": {"description": "Maximum contig length.", "type": "number"}, "ctg_n50": {"description": "Given a set of contigs, each with its own length, the N50 count is defined as the smallest number_of_contigs whose length sum makes up half of genome size.", "type": "number"}, "ctg_n90": {"description": "Given a set of contigs, each with its own length, the N90 count is defined as the smallest number of contigs whose length sum makes up 90% of genome size.", "type": "number"}, "ctg_powsum": {"description": "Powersum of all contigs is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", "type": "number"}, "ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "gap_pct": {"description": "The gap size percentage of all scaffolds.", "type": "number"}, "gc_avg": {"description": "Average of GC content of all contigs.", "type": "number"}, "gc_std": {"description": "Standard deviation of GC content of all contigs.", "type": "number"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "insdc_assembly_identifiers": {"pattern": "^insdc.sra:[A-Z]+[0-9]+(\\.[0-9]+)?$", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "num_aligned_reads": {"description": "The sequence count number of input reads aligned to assembled contigs.", "type": "number"}, "num_input_reads": {"description": "The sequence count number of input reads for assembly.", "type": "number"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "scaf_bp": {"description": "Total size in bp of all scaffolds.", "type": "number"}, "scaf_l50": {"description": "Given a set of scaffolds, the L50 is defined as the sequence length of the shortest scaffold at 50% of the total genome length.", "type": "number"}, "scaf_l90": {"description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all scaffolds of that length or longer contains at least 90% of the sum of the lengths of all scaffolds.", "type": "number"}, "scaf_l_gt50k": {"description": "Total size in bp of all scaffolds greater than 50 KB.", "type": "number"}, "scaf_logsum": {"description": "The sum of the (length*log(length)) of all scaffolds, times some constant. Increase the contiguity, the score will increase", "type": "number"}, "scaf_max": {"description": "Maximum scaffold length.", "type": "number"}, "scaf_n50": {"description": "Given a set of scaffolds, each with its own length, the N50 count is defined as the smallest number of scaffolds whose length sum makes up half of genome size.", "type": "number"}, "scaf_n90": {"description": "Given a set of scaffolds, each with its own length, the N90 count is defined as the smallest number of scaffolds whose length sum makes up 90% of genome size.", "type": "number"}, "scaf_n_gt50k": {"description": "Total sequence count of scaffolds greater than 50 KB.", "type": "number"}, "scaf_pct_gt50k": {"description": "Total sequence size percentage of scaffolds greater than 50 KB.", "type": "number"}, "scaf_powsum": {"description": "Powersum of all scaffolds is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", "type": "number"}, "scaffolds": {"description": "Total sequence count of all scaffolds.", "type": "number"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetagenomeAssembly", "type": "object"}, "MetaproteomicsAnalysisActivity": {"additionalProperties": false, "description": "", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"description": "The instrument used to collect the data used in the analysis", "type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetaproteomicsAnalysisActivity", "type": "object"}, "MetatranscriptomeActivity": {"additionalProperties": false, "description": "A metatranscriptome activity that e.g. pools assembly and annotation activity.", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetatranscriptomeActivity", "type": "object"}, "MetatranscriptomeAnnotationActivity": {"additionalProperties": false, "description": "", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "gold_analysis_project_identifiers": {"description": "identifiers for corresponding analysis project in GOLD", "items": {"type": "string"}, "pattern": "^GOLD:Ga[0-9]+$", "type": "array"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetatranscriptomeAnnotationActivity", "type": "object"}, "MetatranscriptomeAssembly": {"additionalProperties": false, "description": "", "properties": {"asm_score": {"description": "A score for comparing metagenomic assembly quality from same sample.", "type": "number"}, "contig_bp": {"description": "Total size in bp of all contigs.", "type": "number"}, "contigs": {"description": "The sum of the (length*log(length)) of all contigs, times some constant. Increase the contiguity, the score will increase", "type": "number"}, "ctg_l50": {"description": "Given a set of contigs, the L50 is defined as the sequence length of the shortest contig at 50% of the total genome length.", "type": "number"}, "ctg_l90": {"description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all contigs of that length or longer contains at least 90% of the sum of the lengths of all contigs.", "type": "number"}, "ctg_logsum": {"description": "Maximum contig length.", "type": "number"}, "ctg_max": {"description": "Maximum contig length.", "type": "number"}, "ctg_n50": {"description": "Given a set of contigs, each with its own length, the N50 count is defined as the smallest number_of_contigs whose length sum makes up half of genome size.", "type": "number"}, "ctg_n90": {"description": "Given a set of contigs, each with its own length, the N90 count is defined as the smallest number of contigs whose length sum makes up 90% of genome size.", "type": "number"}, "ctg_powsum": {"description": "Powersum of all contigs is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", "type": "number"}, "ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "gap_pct": {"description": "The gap size percentage of all scaffolds.", "type": "number"}, "gc_avg": {"description": "Average of GC content of all contigs.", "type": "number"}, "gc_std": {"description": "Standard deviation of GC content of all contigs.", "type": "number"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "insdc_assembly_identifiers": {"pattern": "^insdc.sra:[A-Z]+[0-9]+(\\.[0-9]+)?$", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "num_aligned_reads": {"description": "The sequence count number of input reads aligned to assembled contigs.", "type": "number"}, "num_input_reads": {"description": "The sequence count number of input reads for assembly.", "type": "number"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "scaf_bp": {"description": "Total size in bp of all scaffolds.", "type": "number"}, "scaf_l50": {"description": "Given a set of scaffolds, the L50 is defined as the sequence length of the shortest scaffold at 50% of the total genome length.", "type": "number"}, "scaf_l90": {"description": "The L90 statistic is less than or equal to the L50 statistic; it is the length for which the collection of all scaffolds of that length or longer contains at least 90% of the sum of the lengths of all scaffolds.", "type": "number"}, "scaf_l_gt50k": {"description": "Total size in bp of all scaffolds greater than 50 KB.", "type": "number"}, "scaf_logsum": {"description": "The sum of the (length*log(length)) of all scaffolds, times some constant. Increase the contiguity, the score will increase", "type": "number"}, "scaf_max": {"description": "Maximum scaffold length.", "type": "number"}, "scaf_n50": {"description": "Given a set of scaffolds, each with its own length, the N50 count is defined as the smallest number of scaffolds whose length sum makes up half of genome size.", "type": "number"}, "scaf_n90": {"description": "Given a set of scaffolds, each with its own length, the N90 count is defined as the smallest number of scaffolds whose length sum makes up 90% of genome size.", "type": "number"}, "scaf_n_gt50k": {"description": "Total sequence count of scaffolds greater than 50 KB.", "type": "number"}, "scaf_pct_gt50k": {"description": "Total sequence size percentage of scaffolds greater than 50 KB.", "type": "number"}, "scaf_powsum": {"description": "Powersum of all scaffolds is the same as logsum except that it uses the sum of (length*(length^P)) for some power P (default P=0.25).", "type": "number"}, "scaffolds": {"description": "Total sequence count of all scaffolds.", "type": "number"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "MetatranscriptomeAssembly", "type": "object"}, "NomAnalysisActivity": {"additionalProperties": false, "description": "", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"description": "The instrument used to collect the data used in the analysis", "type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "NomAnalysisActivity", "type": "object"}, "OccupDocumentEnum": {"description": "", "enum": ["automated count", "estimate", "manual count", "videos"], "title": "OccupDocumentEnum", "type": "string"}, "OmicsProcessing": {"additionalProperties": false, "description": "The methods and processes used to generate omics data from a biosample or organism.", "properties": {"add_date": {"description": "The date on which the information was added to the database.", "type": "string"}, "alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "chimera_check": {"$ref": "#/$defs/TextValue", "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences."}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "gold_sequencing_project_identifiers": {"description": "identifiers for corresponding sequencing project in GOLD", "items": {"type": "string"}, "pattern": "^GOLD:Gp[0-9]+$", "type": "array"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "insdc_experiment_identifiers": {"items": {"type": "string"}, "pattern": "^insdc.sra:(E|D|S)RX[0-9]{6,}$", "type": "array"}, "instrument_name": {"description": "The name of the instrument that was used for processing the sample.", "type": "string"}, "mod_date": {"description": "The last date on which the database information was modified.", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "ncbi_project_name": {"type": "string"}, "nucl_acid_amp": {"$ref": "#/$defs/TextValue", "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids"}, "nucl_acid_ext": {"$ref": "#/$defs/TextValue", "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample"}, "omics_type": {"$ref": "#/$defs/ControlledTermValue", "description": "The type of omics data"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "pcr_cond": {"$ref": "#/$defs/TextValue", "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'"}, "pcr_primers": {"$ref": "#/$defs/TextValue", "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters"}, "principal_investigator": {"$ref": "#/$defs/PersonValue", "description": "Principal Investigator who led the study and/or generated the dataset."}, "processing_institution": {"$ref": "#/$defs/ProcessingInstitutionEnum", "description": "The organization that processed the sample."}, "samp_vol_we_dna_ext": {"$ref": "#/$defs/QuantityValue", "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (mixs:0000001)."}, "seq_meth": {"$ref": "#/$defs/TextValue", "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)."}, "seq_quality_check": {"$ref": "#/$defs/TextValue", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA"}, "target_gene": {"$ref": "#/$defs/TextValue", "description": "Targeted gene or locus name for marker gene studies"}, "target_subfragment": {"$ref": "#/$defs/TextValue", "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}}, "required": ["has_input"], "title": "OmicsProcessing", "type": "object"}, "OntologyClass": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "OntologyClass", "type": "object"}, "OrganismCountEnum": {"description": "", "enum": ["ATP", "MPN", "other"], "title": "OrganismCountEnum", "type": "string"}, "OrthologyGroup": {"additionalProperties": false, "description": "A set of genes or gene products in which all members are orthologous", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "OrthologyGroup", "type": "object"}, "OxyStatSampEnum": {"description": "", "enum": ["aerobic", "anaerobic", "other"], "title": "OxyStatSampEnum", "type": "string"}, "Pathway": {"additionalProperties": false, "description": "A pathway is a sequence of steps/reactions carried out by an organism or community of organisms", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "Pathway", "type": "object"}, "PeptideQuantification": {"additionalProperties": false, "description": "This is used to link a metaproteomics analysis workflow to a specific peptide sequence and related information", "title": "PeptideQuantification", "type": "object"}, "Person": {"additionalProperties": false, "description": "represents a person, such as a researcher", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "Should be an ORCID. Specify in CURIE format. E.g ORCID:0000-1111-...", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "Person", "type": "object"}, "PersonValue": {"additionalProperties": false, "description": "An attribute value representing a person", "properties": {"email": {"description": "An email address for an entity such as a person. This should be the primarly email address used.", "type": "string"}, "has_raw_value": {"description": "The full name of the Investigator in format FIRST LAST.", "type": "string"}, "name": {"description": "The full name of the Investigator. It should follow the format FIRST [MIDDLE NAME| MIDDLE INITIAL] LAST, where MIDDLE NAME| MIDDLE INITIAL is optional.", "type": "string"}, "orcid": {"description": "The ORCID of a person.", "type": "string"}, "profile_image_url": {"description": "A url that points to an image of a person.", "type": "string"}, "was_generated_by": {"type": "string"}, "websites": {"description": "A list of websites that are associated with the entity.", "items": {"type": "string"}, "type": "array"}}, "title": "PersonValue", "type": "object"}, "PlannedProcess": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "has_inputs": {"items": {"type": "string"}, "type": "array"}, "has_outputs": {"items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "participating_agent": {"$ref": "#/$defs/Agent"}}, "required": ["id"], "title": "PlannedProcess", "type": "object"}, "PlantGrowthMedEnum": {"description": "", "enum": ["other artificial liquid medium", "other artificial solid medium", "peat moss", "perlite", "pumice", "sand", "soil", "vermiculite", "water"], "title": "PlantGrowthMedEnum", "type": "string"}, "PlantSexEnum": {"description": "", "enum": ["Androdioecious", "Androecious", "Androgynous", "Androgynomonoecious", "Andromonoecious", "Bisexual", "Dichogamous", "Diclinous", "Dioecious", "Gynodioecious", "Gynoecious", "Gynomonoecious", "Hermaphroditic", "Imperfect", "Monoclinous", "Monoecious", "Perfect", "Polygamodioecious", "Polygamomonoecious", "Polygamous", "Protandrous", "Protogynous", "Subandroecious", "Subdioecious", "Subgynoecious", "Synoecious", "Trimonoecious", "Trioecious", "Unisexual"], "title": "PlantSexEnum", "type": "string"}, "ProcessingInstitutionEnum": {"description": "", "enum": ["UCSD", "JGI", "EMSL"], "title": "ProcessingInstitutionEnum", "type": "string"}, "ProfilePositionEnum": {"description": "", "enum": ["summit", "shoulder", "backslope", "footslope", "toeslope"], "title": "ProfilePositionEnum", "type": "string"}, "ProteinQuantification": {"additionalProperties": false, "description": "This is used to link a metaproteomics analysis workflow to a specific protein", "title": "ProteinQuantification", "type": "object"}, "QuadPosEnum": {"description": "", "enum": ["North side", "West side", "South side", "East side"], "title": "QuadPosEnum", "type": "string"}, "QuantityValue": {"additionalProperties": false, "description": "A simple quantity, e.g. 2cm", "properties": {"has_maximum_numeric_value": {"description": "The maximum value part, expressed as number, of the quantity value when the value covers a range.", "type": "number"}, "has_minimum_numeric_value": {"description": "The minimum value part, expressed as number, of the quantity value when the value covers a range.", "type": "number"}, "has_numeric_value": {"description": "The number part of the quantity", "type": "number"}, "has_raw_value": {"description": "Unnormalized atomic string representation, should in syntax {number} {unit}", "type": "string"}, "has_unit": {"description": "The unit of the quantity", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "QuantityValue", "type": "object"}, "Reaction": {"additionalProperties": false, "description": "An individual biochemical transformation carried out by a functional unit of an organism, in which a collection of substrates are transformed into a collection of products. Can also represent transporters", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "Reaction", "type": "object"}, "ReactionActivity": {"additionalProperties": false, "description": "", "properties": {"material_input": {"type": "string"}, "material_output": {"type": "string"}, "reaction_aided_by": {"$ref": "#/$defs/LabDevice"}, "reaction_temperature": {"type": "string"}, "reaction_time": {"$ref": "#/$defs/QuantityValue"}}, "title": "ReactionActivity", "type": "object"}, "ReactionParticipant": {"additionalProperties": false, "description": "Instances of this link a reaction to a chemical entity participant", "title": "ReactionParticipant", "type": "object"}, "ReadBasedTaxonomyAnalysisActivity": {"additionalProperties": false, "description": "A workflow execution activity that performs taxonomy classification using sequencing reads", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "ReadBasedTaxonomyAnalysisActivity", "type": "object"}, "ReadQcAnalysisActivity": {"additionalProperties": false, "description": "A workflow execution activity that performs quality control on raw Illumina reads including quality trimming, artifact removal, linker trimming, adapter trimming, spike-in removal, and human/cat/dog/mouse/microbe contaminant removal", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "input_base_count": {"description": "The nucleotide base count number of input reads for QC analysis.", "type": "number"}, "input_read_count": {"description": "The sequence count number of input reads for QC analysis.", "type": "number"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "output_base_count": {"description": "After QC analysis nucleotide base count number.", "type": "number"}, "output_read_count": {"description": "After QC analysis sequence count number.", "type": "number"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "ReadQcAnalysisActivity", "type": "object"}, "RelSampLocEnum": {"description": "", "enum": ["edge of car", "center of car", "under a seat"], "title": "RelSampLocEnum", "type": "string"}, "RelToOxygenEnum": {"description": "", "enum": ["aerobe", "anaerobe", "facultative", "microaerophilic", "microanaerobe", "obligate aerobe", "obligate anaerobe"], "title": "RelToOxygenEnum", "type": "string"}, "RnaContTypeEnum": {"description": "", "enum": ["plate", "tube"], "title": "RnaContTypeEnum", "type": "string"}, "RnaSampleFormatEnum": {"description": "", "enum": ["10 mM Tris-HCl", "DNAStable", "Ethanol", "Low EDTA TE", "MDA reaction buffer", "PBS", "Pellet", "RNAStable", "TE", "Water"], "title": "RnaSampleFormatEnum", "type": "string"}, "RoomCondtEnum": {"description": "", "enum": ["new", "visible wear", "needs repair", "damaged", "rupture", "visible signs of mold/mildew"], "title": "RoomCondtEnum", "type": "string"}, "RoomConnectedEnum": {"description": "", "enum": ["attic", "bathroom", "closet", "conference room", "elevator", "examining room", "hallway", "kitchen", "mail room", "office", "stairwell"], "title": "RoomConnectedEnum", "type": "string"}, "RoomLocEnum": {"description": "", "enum": ["corner room", "interior room", "exterior wall"], "title": "RoomLocEnum", "type": "string"}, "RoomSampPosEnum": {"description": "", "enum": ["north corner", "south corner", "west corner", "east corner", "northeast corner", "northwest corner", "southeast corner", "southwest corner", "center"], "title": "RoomSampPosEnum", "type": "string"}, "RoomTypeEnum": {"description": "", "enum": ["attic", "bathroom", "closet", "conference room", "elevator", "examining room", "hallway", "kitchen", "mail room", "private office", "open office", "stairwell", ",restroom", "lobby", "vestibule", "mechanical or electrical room", "data center", "laboratory_wet", "laboratory_dry", "gymnasium", "natatorium", "auditorium", "lockers", "cafe", "warehouse"], "title": "RoomTypeEnum", "type": "string"}, "SampCaptStatusEnum": {"description": "", "enum": ["active surveillance in response to an outbreak", "active surveillance not initiated by an outbreak", "farm sample", "market sample", "other"], "title": "SampCaptStatusEnum", "type": "string"}, "SampCollectPointEnum": {"description": "", "enum": ["well", "test well", "drilling rig", "wellhead", "separator", "storage tank", "other"], "title": "SampCollectPointEnum", "type": "string"}, "SampDisStageEnum": {"description": "", "enum": ["dissemination", "growth and reproduction", "infection", "inoculation", "penetration", "other"], "title": "SampDisStageEnum", "type": "string"}, "SampFloorEnum": {"description": "", "enum": ["1st floor", "2nd floor", "basement", "lobby"], "title": "SampFloorEnum", "type": "string"}, "SampMdEnum": {"description": "", "enum": ["DF", "RT", "KB", "MSL", "other"], "title": "SampMdEnum", "type": "string"}, "SampSubtypeEnum": {"description": "", "enum": ["oil phase", "water phase", "biofilm", "not applicable", "other"], "title": "SampSubtypeEnum", "type": "string"}, "SampWeatherEnum": {"description": "", "enum": ["clear sky", "cloudy", "foggy", "hail", "rain", "snow", "sleet", "sunny", "windy"], "title": "SampWeatherEnum", "type": "string"}, "SampleTypeEnum": {"description": "", "enum": ["soil", "water_extract_soil"], "title": "SampleTypeEnum", "type": "string"}, "SamplingMethodEnum": {"description": "", "enum": ["weighing"], "title": "SamplingMethodEnum", "type": "string"}, "SeasonUseEnum": {"description": "", "enum": ["Spring", "Summer", "Fall", "Winter"], "title": "SeasonUseEnum", "type": "string"}, "SedimentTypeEnum": {"description": "", "enum": ["biogenous", "cosmogenous", "hydrogenous", "lithogenous"], "title": "SedimentTypeEnum", "type": "string"}, "ShadingDeviceCondEnum": {"description": "", "enum": ["damaged", "needs repair", "new", "rupture", "visible wear"], "title": "ShadingDeviceCondEnum", "type": "string"}, "ShadingDeviceTypeEnum": {"description": "", "enum": ["bahama shutters", "exterior roll blind", "gambrel awning", "hood awning", "porchroller awning", "sarasota shutters", "slatted aluminum", "solid aluminum awning", "sun screen", "tree", "trellis", "venetian awning"], "title": "ShadingDeviceTypeEnum", "type": "string"}, "Site": {"additionalProperties": false, "description": "", "properties": {"alternative_identifiers": {"description": "A list of alternative identifiers for the entity.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "a human-readable description of a thing", "type": "string"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}}, "required": ["id"], "title": "Site", "type": "object"}, "SoilHorizonEnum": {"description": "", "enum": ["O horizon", "A horizon", "E horizon", "B horizon", "C horizon", "R layer", "Permafrost"], "title": "SoilHorizonEnum", "type": "string"}, "SolventEnum": {"description": "", "enum": ["deionized_water", "methanol", "chloroform"], "title": "SolventEnum", "type": "string"}, "SpecificEnum": {"description": "", "enum": ["operation", "as built", "construction", "bid", "design", "photos"], "title": "SpecificEnum", "type": "string"}, "SrDepEnvEnum": {"description": "", "enum": ["Lacustine", "Fluvioldeltaic", "Fluviomarine", "Marine", "other"], "title": "SrDepEnvEnum", "type": "string"}, "SrGeolAgeEnum": {"description": "", "enum": ["Archean", "Cambrian", "Carboniferous", "Cenozoic", "Cretaceous", "Devonian", "Jurassic", "Mesozoic", "Neogene", "Ordovician", "Paleogene", "Paleozoic", "Permian", "Precambrian", "Proterozoic", "Silurian", "Triassic", "other"], "title": "SrGeolAgeEnum", "type": "string"}, "SrKerogTypeEnum": {"description": "", "enum": ["Type I", "Type II", "Type III", "Type IV", "other"], "title": "SrKerogTypeEnum", "type": "string"}, "SrLithologyEnum": {"description": "", "enum": ["Clastic", "Carbonate", "Coal", "Biosilicieous", "other"], "title": "SrLithologyEnum", "type": "string"}, "Study": {"additionalProperties": false, "description": "A study summarizes the overall goal of a research initiative and outlines the key objective of its underlying projects.", "properties": {"abstract": {"description": "The abstract of manuscript/grant associated with the entity; i.e., a summary of the resource.", "type": "string"}, "alternative_descriptions": {"description": "A list of alternative descriptions for the entity. The distinction between description and alternative descriptions is application-specific.", "items": {"type": "string"}, "type": "array"}, "alternative_identifiers": {"description": "Unique identifier for a study submitted to additional resources. Matches that which has been submitted to NMDC", "items": {"type": "string"}, "type": "array"}, "alternative_names": {"description": "A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.", "items": {"type": "string"}, "type": "array"}, "alternative_titles": {"description": "A list of alternative titles for the entity. The distinction between title and alternative titles is application-specific.", "items": {"type": "string"}, "type": "array"}, "description": {"description": "A brief summary that details the study you're submitted to NMDC", "type": "string"}, "doi": {"$ref": "#/$defs/AttributeValue", "description": "The dataset citation for this study"}, "ecosystem": {"description": "An ecosystem is a combination of a physical environment (abiotic factors) and all the organisms (biotic factors) that interact with this environment. Ecosystem is in position 1/5 in a GOLD path.", "type": "string"}, "ecosystem_category": {"description": "Ecosystem categories represent divisions within the ecosystem based on specific characteristics of the environment from where an organism or sample is isolated. Ecosystem category is in position 2/5 in a GOLD path.", "type": "string"}, "ecosystem_subtype": {"description": "Ecosystem subtypes represent further subdivision of Ecosystem types into more distinct subtypes. Ecosystem subtype is in position 4/5 in a GOLD path.", "type": "string"}, "ecosystem_type": {"description": "Ecosystem types represent things having common characteristics within the Ecosystem Category. These common characteristics based grouping is still broad but specific to the characteristics of a given environment. Ecosystem type is in position 3/5 in a GOLD path.", "type": "string"}, "emsl_proposal_doi": {"description": "The DOI for the EMSL awarded study that relates to the NMDC submitted study", "type": "string"}, "emsl_proposal_identifier": {"description": "The proposal number assigned to the EMSL awarded study that relates to that which is represented in NMDC.", "type": "string"}, "ess_dive_datasets": {"description": "List of ESS-DIVE dataset DOIs", "items": {"type": "string"}, "type": "array"}, "funding_sources": {"items": {"type": "string"}, "type": "array"}, "gold_study_identifiers": {"description": "identifiers for corresponding project(s) in GOLD", "items": {"type": "string"}, "pattern": "^GOLD:Gs[0-9]+$", "type": "array"}, "has_credit_associations": {"description": "This slot links a study to a credit association. The credit association will be linked to a person value and to a CRediT Contributor Roles term. Overall semantics: person should get credit X for their participation in the study", "items": {"$ref": "#/$defs/CreditAssociation"}, "type": "array"}, "id": {"description": "An NMDC assigned unique identifier for a sample submitted to NMDC.", "type": "string"}, "mgnify_project_identifiers": {"description": "identifiers for corresponding project in MGnify", "pattern": "^mgnify.proj:[A-Z]+[0-9]+$", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "objective": {"description": "The scientific objectives associated with the entity. It SHOULD correspond to scientific norms for objectives field in a structured abstract.", "type": "string"}, "principal_investigator": {"$ref": "#/$defs/PersonValue", "description": "Principal Investigator who led the study and/or generated the dataset."}, "publications": {"description": "A list of publications that are associated with the entity. The publications SHOULD be given using an identifier, such as a DOI or Pubmed ID, if possible.", "items": {"type": "string"}, "type": "array"}, "related_identifiers": {"description": "Unique identifier for a study submitted to additional resources. Similar, but not necessarily identical to that which has been submitted to NMDC", "type": "string"}, "relevant_protocols": {"items": {"type": "string"}, "type": "array"}, "specific_ecosystem": {"description": "Specific ecosystems represent specific features of the environment like aphotic zone in an ocean or gastric mucosa within a host digestive system. Specific ecosystem is in position 5/5 in a GOLD path.", "type": "string"}, "study_image": {"description": "Links a study to one or more images.", "items": {"$ref": "#/$defs/ImageValue"}, "type": "array"}, "title": {"description": "A name given to the entity that differs from the name/label programmatically assigned to it. For example, when extracting study information for GOLD, the GOLD system has assigned a name/label. However, for display purposes, we may also wish the capture the title of the proposal that was used to fund the study.", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "websites": {"description": "A list of websites that are associated with the entity.", "items": {"type": "string"}, "type": "array"}}, "title": "Study", "type": "object"}, "SubstructureTypeEnum": {"description": "", "enum": ["crawlspace", "slab on grade", "basement"], "title": "SubstructureTypeEnum", "type": "string"}, "SurfAirContEnum": {"description": "", "enum": ["dust", "organic matter", "particulate matter", "volatile organic compounds", "biological contaminants", "radon", "nutrients", "biocides"], "title": "SurfAirContEnum", "type": "string"}, "SurfMaterialEnum": {"description": "", "enum": ["adobe", "carpet", "cinder blocks", "concrete", "hay bales", "glass", "metal", "paint", "plastic", "stainless steel", "stone", "stucco", "tile", "vinyl", "wood"], "title": "SurfMaterialEnum", "type": "string"}, "TextValue": {"additionalProperties": false, "description": "A basic string value", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "language": {"description": "Should use ISO 639-1 code e.g. \"en\", \"fr\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "TextValue", "type": "object"}, "TidalStageEnum": {"description": "", "enum": ["low tide", "ebb tide", "flood tide", "high tide"], "title": "TidalStageEnum", "type": "string"}, "TillageEnum": {"description": "", "enum": ["drill", "cutting disc", "ridge till", "strip tillage", "zonal tillage", "chisel", "tined", "mouldboard", "disc plough"], "title": "TillageEnum", "type": "string"}, "TimestampValue": {"additionalProperties": false, "description": "A value that is a timestamp. The range should be ISO-8601", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "TimestampValue", "type": "object"}, "TrainLineEnum": {"description": "", "enum": ["red", "green", "orange"], "title": "TrainLineEnum", "type": "string"}, "TrainStatLocEnum": {"description": "", "enum": ["south station above ground", "south station underground", "south station amtrak", "forest hills", "riverside"], "title": "TrainStatLocEnum", "type": "string"}, "TrainStopLocEnum": {"description": "", "enum": ["end", "mid", "downtown"], "title": "TrainStopLocEnum", "type": "string"}, "UrlValue": {"additionalProperties": false, "description": "A value that is a string that conforms to URL syntax", "properties": {"has_raw_value": {"description": "The value that was specified for an annotation in raw form, i.e. a string. E.g. \"2 cm\" or \"2-4 cm\"", "type": "string"}, "was_generated_by": {"type": "string"}}, "title": "UrlValue", "type": "object"}, "VisMediaEnum": {"description": "", "enum": ["photos", "videos", "commonly of the building", "site context (adjacent buildings, vegetation, terrain, streets)", "interiors", "equipment", "3D scans"], "title": "VisMediaEnum", "type": "string"}, "WallConstTypeEnum": {"description": "", "enum": ["frame construction", "joisted masonry", "light noncombustible", "masonry noncombustible", "modified fire resistive", "fire resistive"], "title": "WallConstTypeEnum", "type": "string"}, "WallFinishMatEnum": {"description": "", "enum": ["plaster", "gypsum plaster", "veneer plaster", "gypsum board", "tile", "terrazzo", "stone facing", "acoustical treatment", "wood", "metal", "masonry"], "title": "WallFinishMatEnum", "type": "string"}, "WallLocEnum": {"description": "", "enum": ["north", "south", "east", "west"], "title": "WallLocEnum", "type": "string"}, "WallSurfTreatmentEnum": {"description": "", "enum": ["painted", "wall paper", "no treatment", "paneling", "stucco", "fabric"], "title": "WallSurfTreatmentEnum", "type": "string"}, "WallTextureEnum": {"description": "", "enum": ["crows feet", "crows-foot stomp", "double skip", "hawk and trowel", "knockdown", "popcorn", "orange peel", "rosebud stomp", "Santa-Fe texture", "skip trowel", "smooth", "stomp knockdown", "swirl"], "title": "WallTextureEnum", "type": "string"}, "WaterFeatTypeEnum": {"description": "", "enum": ["fountain", "pool", "standing feature", "stream", "waterfall"], "title": "WaterFeatTypeEnum", "type": "string"}, "WeekdayEnum": {"description": "", "enum": ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"], "title": "WeekdayEnum", "type": "string"}, "WindowCondEnum": {"description": "", "enum": ["damaged", "needs repair", "new", "rupture", "visible wear"], "title": "WindowCondEnum", "type": "string"}, "WindowCoverEnum": {"description": "", "enum": ["blinds", "curtains", "none"], "title": "WindowCoverEnum", "type": "string"}, "WindowHorizPosEnum": {"description": "", "enum": ["left", "middle", "right"], "title": "WindowHorizPosEnum", "type": "string"}, "WindowLocEnum": {"description": "", "enum": ["north", "south", "east", "west"], "title": "WindowLocEnum", "type": "string"}, "WindowMatEnum": {"description": "", "enum": ["clad", "fiberglass", "metal", "vinyl", "wood"], "title": "WindowMatEnum", "type": "string"}, "WindowTypeEnum": {"description": "", "enum": ["single-hung sash window", "horizontal sash window", "fixed window"], "title": "WindowTypeEnum", "type": "string"}, "WindowVertPosEnum": {"description": "", "enum": ["bottom", "middle", "top", "low", "high"], "title": "WindowVertPosEnum", "type": "string"}, "WorkflowExecutionActivity": {"additionalProperties": false, "description": "Represents an instance of an execution of a particular workflow", "properties": {"ended_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "execution_resource": {"description": "Example: NERSC-Cori", "type": "string"}, "git_url": {"description": "Example: https://github.com/microbiomedata/mg_annotation/releases/tag/0.1", "type": "string"}, "has_input": {"description": "An input to a process.", "items": {"type": "string"}, "type": "array"}, "has_output": {"description": "An output biosample to a processing step", "items": {"type": "string"}, "type": "array"}, "id": {"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "type": "string"}, "name": {"description": "A human readable label for an entity", "type": "string"}, "part_of": {"description": "Links a resource to another resource that either logically or physically includes it.", "items": {"type": "string"}, "type": "array"}, "started_at_time": {"format": "date-time", "pattern": "^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$", "type": "string"}, "type": {"description": "An optional string that specifies the type object. This is used to allow for searches for different kinds of objects.", "type": "string"}, "used": {"type": "string"}, "was_associated_with": {"description": "the agent/entity associated with the generation of the file", "type": "string"}, "was_informed_by": {"type": "string"}}, "required": ["execution_resource", "git_url", "has_input", "has_output", "id", "started_at_time", "ended_at_time", "was_informed_by"], "title": "WorkflowExecutionActivity", "type": "object"}}, "$id": "https://microbiomedata/schema", "$schema": "http://json-schema.org/draft-07/schema#", "additionalProperties": false, "metamodel_version": "1.7.0", "properties": {"activity_set": {"description": "This property links a database object to the set of workflow activities.", "items": {"$ref": "#/$defs/WorkflowExecutionActivity"}, "type": "array"}, "biosample_set": {"description": "This property links a database object to the set of samples within it.", "items": {"$ref": "#/$defs/Biosample"}, "type": "array"}, "collecting_biosamples_from_site_set": {"items": {"$ref": "#/$defs/CollectingBiosamplesFromSite"}, "type": "array"}, "data_object_set": {"description": "This property links a database object to the set of data objects within it.", "items": {"$ref": "#/$defs/DataObject"}, "type": "array"}, "dissolving_activity_set": {"items": {"$ref": "#/$defs/DissolvingActivity"}, "type": "array"}, "field_research_site_set": {"items": {"$ref": "#/$defs/FieldResearchSite"}, "type": "array"}, "functional_annotation_set": {"description": "This property links a database object to the set of all functional annotations", "items": {"$ref": "#/$defs/FunctionalAnnotation"}, "type": "array"}, "genome_feature_set": {"description": "This property links a database object to the set of all features", "items": {"$ref": "#/$defs/GenomeFeature"}, "type": "array"}, "mags_activity_set": {"description": "This property links a database object to the set of MAGs analysis activities.", "items": {"$ref": "#/$defs/MagsAnalysisActivity"}, "type": "array"}, "material_sample_set": {"items": {"$ref": "#/$defs/MaterialSample"}, "type": "array"}, "material_sampling_activity_set": {"items": {"$ref": "#/$defs/MaterialSamplingActivity"}, "type": "array"}, "metabolomics_analysis_activity_set": {"description": "This property links a database object to the set of metabolomics analysis activities.", "items": {"$ref": "#/$defs/MetabolomicsAnalysisActivity"}, "type": "array"}, "metagenome_annotation_activity_set": {"description": "This property links a database object to the set of metagenome annotation activities.", "items": {"$ref": "#/$defs/MetagenomeAnnotationActivity"}, "type": "array"}, "metagenome_assembly_set": {"description": "This property links a database object to the set of metagenome assembly activities.", "items": {"$ref": "#/$defs/MetagenomeAssembly"}, "type": "array"}, "metaproteomics_analysis_activity_set": {"description": "This property links a database object to the set of metaproteomics analysis activities.", "items": {"$ref": "#/$defs/MetaproteomicsAnalysisActivity"}, "type": "array"}, "metatranscriptome_activity_set": {"description": "TODO", "items": {"$ref": "#/$defs/MetatranscriptomeActivity"}, "type": "array"}, "nom_analysis_activity_set": {"description": "This property links a database object to the set of natural organic matter (NOM) analysis activities.", "items": {"$ref": "#/$defs/NomAnalysisActivity"}, "type": "array"}, "omics_processing_set": {"description": "This property links a database object to the set of omics processings within it.", "items": {"$ref": "#/$defs/OmicsProcessing"}, "type": "array"}, "reaction_activity_set": {"items": {"$ref": "#/$defs/ReactionActivity"}, "type": "array"}, "read_based_taxonomy_analysis_activity_set": {"description": "This property links a database object to the set of read based analysis activities.", "items": {"$ref": "#/$defs/ReadBasedTaxonomyAnalysisActivity"}, "type": "array"}, "read_qc_analysis_activity_set": {"description": "This property links a database object to the set of read QC analysis activities.", "items": {"$ref": "#/$defs/ReadQcAnalysisActivity"}, "type": "array"}, "study_set": {"description": "This property links a database object to the set of studies within it.", "items": {"$ref": "#/$defs/Study"}, "type": "array"}}, "title": "NMDC", "type": "object", "version": "7.0.0"} diff --git a/src/schema/portal/emsl.yaml b/src/schema/portal/emsl.yaml index 1c53c91d1f..8154f2980e 100644 --- a/src/schema/portal/emsl.yaml +++ b/src/schema/portal/emsl.yaml @@ -94,7 +94,7 @@ slots: recommended: true technical_reps: name: technical_reps - description: If sending technical replicates of the same sample, indicate the replicate number. + description: If sending technical replicates of the same sample, indicate the replicate count. title: number technical replicate examples: - value: 2 diff --git a/test/data/invalid_data/minimal_biosample_invalid_fire.json b/test/data/invalid_data/minimal_biosample_invalid_fire.json index c61c3a94d0..e13877147d 100644 --- a/test/data/invalid_data/minimal_biosample_invalid_fire.json +++ b/test/data/invalid_data/minimal_biosample_invalid_fire.json @@ -26,4 +26,4 @@ } } ] -} \ No newline at end of file +} diff --git a/test/data/invalid_data/minimal_biosample_with_fire_test.json b/test/data/invalid_data/minimal_biosample_with_fire_test.json deleted file mode 100644 index 391dd4a49e..0000000000 --- a/test/data/invalid_data/minimal_biosample_with_fire_test.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "biosample_set": [ - { - "id": "nmdc:bsm-99-dtTMNb", - "fire": "1871-10-01 to 1871-10-31", - "part_of": [ - "gold:Gs0110115" - ], - "env_broad_scale": { - "has_raw_value": "ENVO:00002030", - "term": { - "id": "ENVO:00002030" - } - }, - "env_local_scale": { - "has_raw_value": "ENVO:00002169", - "term": { - "id": "ENVO:00002169" - } - }, - "env_medium": { - "has_raw_value": "ENVO:00005792", - "term": { - "id": "ENVO:00005792" - } - } - } - ] -} \ No newline at end of file diff --git a/test/data/minimal_valid_biosample_test.json b/test/data/minimal_valid_biosample_test.json index 4ce6772b90..bc8931b642 100644 --- a/test/data/minimal_valid_biosample_test.json +++ b/test/data/minimal_valid_biosample_test.json @@ -1,6 +1,3 @@ -// 20230124150223 -// https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/test/data/minimal_valid_biosample_test.json - { "biosample_set": [ { @@ -29,4 +26,4 @@ } } ] -} \ No newline at end of file +} diff --git a/test/data/minimal_valid_biosample_with_fire.json b/test/data/minimal_valid_biosample_with_fire.json index 391dd4a49e..bc8931b642 100644 --- a/test/data/minimal_valid_biosample_with_fire.json +++ b/test/data/minimal_valid_biosample_with_fire.json @@ -26,4 +26,4 @@ } } ] -} \ No newline at end of file +} From 2354e9e5d9c213473f8d3403ab365fd13b891594 Mon Sep 17 00:00:00 2001 From: "Mark A. Miller" Date: Tue, 24 Jan 2023 15:47:50 -0500 Subject: [PATCH 12/12] water content indentation --- src/schema/nmdc.yaml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml index 669d0d94af..3e52db225c 100644 --- a/src/schema/nmdc.yaml +++ b/src/schema/nmdc.yaml @@ -673,22 +673,22 @@ classes: - Soil water content can be measure MANY ways and often, multiple ways are used in one experiment (gravimetric water content and water holding capacity and water filled pore space, to name a few). - Should this be multi valued? How to we manage and validate this? water_content: -# annotations: -# expected_value: -# tag: expected_value -# value: string -# preferred_unit: -# tag: preferred_unit -# #value: gram per gram or cubic centimeter per cubic centimeter -# #multivalued: true + annotations: + expected_value: + tag: expected_value + value: string + preferred_unit: + tag: preferred_unit + value: gram per gram or cubic centimeter per cubic centimeter + multivalued: true range: string -# examples: -# - value: 75% -# - value: 75 % -# - value: 0.75 g water/g dry soil -# - value: 75% water holding capacity -# - value: 1.1 g fresh weight/ dry weight -# - value: 10% water filled pore space + examples: + - value: 75% + - value: 75 % + - value: 0.75 g water/g dry soil + - value: 75% water holding capacity + - value: 1.1 g fresh weight/ dry weight + - value: 10% water filled pore space todos: - value in preferred unit is too limiting. need to change this - check and correct validation so examples are accepted @@ -720,7 +720,7 @@ classes: examples: - value: 5 mg N/ L todos: - - check description. How are they different? + - check description. How are they different? salinity_meth: examples: - value: https://doi.org/10.1007/978-1-61779-986-0_28