From 07e6d736bc6e2376c429a7a531f11e3f92f773c5 Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Wed, 13 Dec 2023 13:47:45 -0800 Subject: [PATCH 1/6] Copy min/max constraints over from todos to support previous convention --- lib/Validator.js | 16 ++++++++++++++++ tests/Validator.test.js | 25 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/lib/Validator.js b/lib/Validator.js index bc7f4251..7de0f08d 100644 --- a/lib/Validator.js +++ b/lib/Validator.js @@ -43,6 +43,22 @@ class Validator { ); } + // LinkML does not yet have support for non-numeric minimum_value and maximum_value. In the + // meantime, DataHarmonizer has a convention of putting these values in todos with specific + // prefixes. + for (const slotDefinition of Object.values(this.#targetClassInducedSlots)) { + const slotType = this.#schema.types?.[slotDefinition.range] + if (slotType?.uri === 'xsd:date' && slotDefinition.todos) { + for (const todo of slotDefinition.todos) { + if (todo.substring(0, 2) === '>=') { + slotDefinition.minimum_value = todo.substring(2); + } else if (todo.substring(0, 2) === '<=') { + slotDefinition.maximum_value = todo.substring(2); + } + } + } + } + this.#uniqueKeys = []; if (classDefinition.unique_keys) { this.#uniqueKeys = Object.entries(classDefinition.unique_keys).map( diff --git a/tests/Validator.test.js b/tests/Validator.test.js index a123f587..eff0fa1a 100644 --- a/tests/Validator.test.js +++ b/tests/Validator.test.js @@ -117,11 +117,25 @@ const SCHEMA = { minimum_value: '2010-02-12', maximum_value: '2010-02-28', }, + // This is not standard LinkML but it is supported while LinkML adds better support + // for non-numeric minimum and maximum values + during_vancouver_olympics_todos: { + name: 'during_vancouver_olympics_todos', + range: 'date', + todos: ['>=2010-02-12', '<=2010-02-28'], + }, + // The special '{today}' value is not standard LinkML, but it is supported as a + // DataHarmonizer convention not_the_future: { name: 'not_the_future', range: 'date', maximum_value: '{today}', }, + not_the_future_todos: { + name: 'not_the_future_todos', + range: 'date', + todos: ['<={today}'], + }, a_constant: { name: 'a_constant', range: 'string', @@ -508,10 +522,21 @@ describe('Validator', () => { expect(fn('2010-02-20')).toBeUndefined(); expect(fn('2010-03-01')).toEqual('Value is greater than maximum value'); + fn = validator.getValidatorForSlot('during_vancouver_olympics_todos'); + expect(fn(undefined)).toBeUndefined(); + expect(fn('2010-01-01')).toEqual('Value is less than minimum value'); + expect(fn('2010-02-20')).toBeUndefined(); + expect(fn('2010-03-01')).toEqual('Value is greater than maximum value'); + fn = validator.getValidatorForSlot('not_the_future'); expect(fn(undefined)).toBeUndefined(); expect(fn('2021-01-01')).toBeUndefined(); expect(fn('3000-01-01')).toEqual('Value is greater than maximum value'); + + fn = validator.getValidatorForSlot('not_the_future_todos'); + expect(fn(undefined)).toBeUndefined(); + expect(fn('2021-01-01')).toBeUndefined(); + expect(fn('3000-01-01')).toEqual('Value is greater than maximum value'); }); it('should validate constant constraints', () => { From 18198e97461e0cf9b37baa53bcdf2aca48b604ad Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Wed, 13 Dec 2023 14:02:29 -0800 Subject: [PATCH 2/6] Allow min/max todos to propagate down to boolean subschemas --- lib/Validator.js | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/lib/Validator.js b/lib/Validator.js index 7de0f08d..c53c98da 100644 --- a/lib/Validator.js +++ b/lib/Validator.js @@ -46,10 +46,13 @@ class Validator { // LinkML does not yet have support for non-numeric minimum_value and maximum_value. In the // meantime, DataHarmonizer has a convention of putting these values in todos with specific // prefixes. - for (const slotDefinition of Object.values(this.#targetClassInducedSlots)) { - const slotType = this.#schema.types?.[slotDefinition.range] - if (slotType?.uri === 'xsd:date' && slotDefinition.todos) { - for (const todo of slotDefinition.todos) { + const processTodos = (slotDefinition, todos) => { + if (!todos || !todos.length) { + return; + } + const slotType = this.#schema.types?.[slotDefinition.range]; + if (slotType?.uri === 'xsd:date') { + for (const todo of todos) { if (todo.substring(0, 2) === '>=') { slotDefinition.minimum_value = todo.substring(2); } else if (todo.substring(0, 2) === '<=') { @@ -57,6 +60,21 @@ class Validator { } } } + for (const def of slotDefinition.any_of || []) { + processTodos(def, todos); + } + for (const def of slotDefinition.all_of || []) { + processTodos(def, todos); + } + for (const def of slotDefinition.exactly_one_of || []) { + processTodos(def, todos); + } + for (const def of slotDefinition.none_of || []) { + processTodos(def, todos); + } + }; + for (const slotDefinition of Object.values(this.#targetClassInducedSlots)) { + processTodos(slotDefinition, slotDefinition.todos); } this.#uniqueKeys = []; From 13d6a81dd4005e57ba86cc6585257d90fd716fa7 Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 14 Dec 2023 10:03:49 -0800 Subject: [PATCH 3/6] Add support for min/max based on other slots in todos --- lib/Validator.js | 86 +++++++++++++++++++++++++++++++++++++++++ tests/Validator.test.js | 37 ++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/lib/Validator.js b/lib/Validator.js index c53c98da..924ad274 100644 --- a/lib/Validator.js +++ b/lib/Validator.js @@ -11,6 +11,8 @@ class Validator { #valueValidatorMap; #identifiers; #uniqueKeys; + #dependantMinimumValuesMap; + #dependantMaximumValuesMap; #results; constructor(schema, multivaluedDelimiter = '; ', datatypeOptions = {}) { @@ -77,6 +79,37 @@ class Validator { processTodos(slotDefinition, slotDefinition.todos); } + // DataHarmonizer has a convention for using todos to specify that for a given row the value + // of one column is the min/max value of another column (e.g. ">={other slot name}"). Index + // info about that here. + this.#dependantMinimumValuesMap = new Map(); + this.#dependantMaximumValuesMap = new Map(); + for (const slotDefinition of Object.values(this.#targetClassInducedSlots)) { + const { todos } = slotDefinition; + if (!todos || !todos.length) { + continue; + } + for (const todo of todos) { + const match = todo.match(/^([><])={(.*?)}$/); + if (match == null) { + continue; + } + if ( + !Object.prototype.hasOwnProperty.call( + this.#targetClassInducedSlots, + match[2] + ) + ) { + continue; + } + if (match[1] === '>') { + this.#dependantMinimumValuesMap.set(slotDefinition.name, match[2]); + } else if (match[1] === '<') { + this.#dependantMaximumValuesMap.set(slotDefinition.name, match[2]); + } + } + } + this.#uniqueKeys = []; if (classDefinition.unique_keys) { this.#uniqueKeys = Object.entries(classDefinition.unique_keys).map( @@ -335,6 +368,21 @@ class Validator { ); } + this.#doDependantComparisonValidation( + this.#dependantMinimumValuesMap, + data, + header, + (a, b) => a >= b, + 'is less than' + ); + this.#doDependantComparisonValidation( + this.#dependantMaximumValuesMap, + data, + header, + (a, b) => a <= b, + 'is greater than' + ); + const rules = this.#targetClass.rules ?? []; for (let idx = 0; idx < rules.length; idx += 1) { const rule = rules[idx]; @@ -436,6 +484,44 @@ class Validator { } } + #doDependantComparisonValidation( + dependantMap, + data, + header, + compareFn, + message + ) { + for (const [slotName, compareSlotName] of dependantMap.entries()) { + const column = header.indexOf(slotName); + const compareColumn = header.indexOf(compareSlotName); + if (column < 0 || compareColumn < 0) { + continue; + } + const slotDefinition = this.#targetClassInducedSlots[slotName]; + const slotType = this.#schema.types?.[slotDefinition.range]; + for (let row = 0; row < data.length; row += 1) { + const compareValue = this.#parser.parse( + data[row][compareColumn], + slotType.uri + ); + if (!compareValue) { + continue; + } + const value = this.#parser.parse(data[row][column], slotType.uri); + if (!value) { + continue; + } + if (!compareFn(value, compareValue)) { + this.#addResult( + row, + column, + `Value ${message} value of ${compareSlotName} column` + ); + } + } + } + } + #buildSlotConditionGettersAndValidators( classExpression, header, diff --git a/tests/Validator.test.js b/tests/Validator.test.js index eff0fa1a..a80e6a34 100644 --- a/tests/Validator.test.js +++ b/tests/Validator.test.js @@ -136,6 +136,18 @@ const SCHEMA = { range: 'date', todos: ['<={today}'], }, + // This is another DataHarmonizer convention that we support but isn't expressible in + // standard LinkML + after_a_date: { + name: 'after_a_date', + range: 'date', + todos: ['>={a_date}'], + }, + before_a_date: { + name: 'before_a_date', + range: 'date', + todos: ['<={a_date}'], + }, a_constant: { name: 'a_constant', range: 'string', @@ -821,4 +833,29 @@ describe('Validator', () => { }, }); }); + + it('should validate min/max constrains based on other slots', () => { + const validator = new Validator(SCHEMA); + validator.useTargetClass('Test'); + + const header = ['a_date', 'before_a_date', 'after_a_date']; + const data = [ + ['2023-06-01', '2023-05-29', ''], + ['2023-06-01', '2023-06-01', ''], + ['2023-06-01', '2023-06-03', ''], + ['2023-06-01', '', '2023-05-29'], + ['2023-06-01', '', '2023-06-01'], + ['2023-06-01', '', '2023-06-03'], + ['', '2023-06-03', '2023-05-29'], + ]; + const results = validator.validate(data, header); + expect(results).toEqual({ + 2: { + 1: 'Value is greater than value of a_date column', + }, + 3: { + 2: 'Value is less than value of a_date column', + }, + }); + }); }); From d2562e2560becef934cb2240b6dbbc97584bdb06 Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 14 Dec 2023 10:19:00 -0800 Subject: [PATCH 4/6] Get range for dependent slots from range or first any_of range to match existing convention --- lib/.babelrc.json | 3 ++- lib/Validator.js | 6 +++++- web/templates/menu.json | 7 +++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/.babelrc.json b/lib/.babelrc.json index 1320b9a3..8ec9064d 100644 --- a/lib/.babelrc.json +++ b/lib/.babelrc.json @@ -1,3 +1,4 @@ { - "presets": ["@babel/preset-env"] + "presets": ["@babel/preset-env"], + "sourceMaps": "inline" } diff --git a/lib/Validator.js b/lib/Validator.js index 924ad274..2803f089 100644 --- a/lib/Validator.js +++ b/lib/Validator.js @@ -498,7 +498,11 @@ class Validator { continue; } const slotDefinition = this.#targetClassInducedSlots[slotName]; - const slotType = this.#schema.types?.[slotDefinition.range]; + // This DH convention is also a bit particular about ranges. It looks at either the slot range + // or the first any_of range. + const slotRange = + slotDefinition.range || slotDefinition.any_of?.[0]?.range; + const slotType = this.#schema.types?.[slotRange]; for (let row = 0; row < data.length; row += 1) { const compareValue = this.#parser.parse( data[row][compareColumn], diff --git a/web/templates/menu.json b/web/templates/menu.json index c932af8c..ce249f5b 100644 --- a/web/templates/menu.json +++ b/web/templates/menu.json @@ -6,6 +6,13 @@ "display": true } }, + "test": { + "Address": { + "name": "Address", + "status": "published", + "display": true + } + }, "gisaid": { "GISAID": { "name": "GISAID", From 86be2697579f251d5082c2cc9fed06a08279f03c Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 14 Dec 2023 10:20:20 -0800 Subject: [PATCH 5/6] Revert testing changes --- lib/.babelrc.json | 3 +-- web/templates/menu.json | 7 ------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/lib/.babelrc.json b/lib/.babelrc.json index 8ec9064d..1320b9a3 100644 --- a/lib/.babelrc.json +++ b/lib/.babelrc.json @@ -1,4 +1,3 @@ { - "presets": ["@babel/preset-env"], - "sourceMaps": "inline" + "presets": ["@babel/preset-env"] } diff --git a/web/templates/menu.json b/web/templates/menu.json index ce249f5b..c932af8c 100644 --- a/web/templates/menu.json +++ b/web/templates/menu.json @@ -6,13 +6,6 @@ "display": true } }, - "test": { - "Address": { - "name": "Address", - "status": "published", - "display": true - } - }, "gisaid": { "GISAID": { "name": "GISAID", From dc6a8d7a73141f98c246ebe5978643f0a235898f Mon Sep 17 00:00:00 2001 From: Damion Dooley Date: Tue, 19 Dec 2023 13:23:20 -0800 Subject: [PATCH 6/6] version bump --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 06ef89c3..6f2ef2ab 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "data-harmonizer", - "version": "1.6.4", + "version": "1.6.5", "description": "A standardized spreadsheet editor and validator that can be run offline and locally", "repository": "git@github.com:cidgoh/DataHarmonizer.git", "license": "MIT",