Skip to content

Commit

Permalink
Merge pull request #426 from cidgoh/fix-min-max-from-todos
Browse files Browse the repository at this point in the history
Use existing todos conventions for min/max constraints
  • Loading branch information
ddooley committed Dec 19, 2023
2 parents d1e4778 + dc6a8d7 commit c076baf
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 1 deletion.
124 changes: 124 additions & 0 deletions lib/Validator.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class Validator {
#valueValidatorMap;
#identifiers;
#uniqueKeys;
#dependantMinimumValuesMap;
#dependantMaximumValuesMap;
#results;

constructor(schema, multivaluedDelimiter = '; ', datatypeOptions = {}) {
Expand Down Expand Up @@ -43,6 +45,71 @@ class Validator {
);
}

// LinkML does not yet have support for non-numeric minimum_value and maximum_value. In the
// meantime, DataHarmonizer has a convention of putting these values in todos with specific
// prefixes.
const processTodos = (slotDefinition, todos) => {
if (!todos || !todos.length) {
return;
}
const slotType = this.#schema.types?.[slotDefinition.range];
if (slotType?.uri === 'xsd:date') {
for (const todo of todos) {
if (todo.substring(0, 2) === '>=') {
slotDefinition.minimum_value = todo.substring(2);
} else if (todo.substring(0, 2) === '<=') {
slotDefinition.maximum_value = todo.substring(2);
}
}
}
for (const def of slotDefinition.any_of || []) {
processTodos(def, todos);
}
for (const def of slotDefinition.all_of || []) {
processTodos(def, todos);
}
for (const def of slotDefinition.exactly_one_of || []) {
processTodos(def, todos);
}
for (const def of slotDefinition.none_of || []) {
processTodos(def, todos);
}
};
for (const slotDefinition of Object.values(this.#targetClassInducedSlots)) {
processTodos(slotDefinition, slotDefinition.todos);
}

// DataHarmonizer has a convention for using todos to specify that for a given row the value
// of one column is the min/max value of another column (e.g. ">={other slot name}"). Index
// info about that here.
this.#dependantMinimumValuesMap = new Map();
this.#dependantMaximumValuesMap = new Map();
for (const slotDefinition of Object.values(this.#targetClassInducedSlots)) {
const { todos } = slotDefinition;
if (!todos || !todos.length) {
continue;
}
for (const todo of todos) {
const match = todo.match(/^([><])={(.*?)}$/);
if (match == null) {
continue;
}
if (
!Object.prototype.hasOwnProperty.call(
this.#targetClassInducedSlots,
match[2]
)
) {
continue;
}
if (match[1] === '>') {
this.#dependantMinimumValuesMap.set(slotDefinition.name, match[2]);
} else if (match[1] === '<') {
this.#dependantMaximumValuesMap.set(slotDefinition.name, match[2]);
}
}
}

this.#uniqueKeys = [];
if (classDefinition.unique_keys) {
this.#uniqueKeys = Object.entries(classDefinition.unique_keys).map(
Expand Down Expand Up @@ -301,6 +368,21 @@ class Validator {
);
}

this.#doDependantComparisonValidation(
this.#dependantMinimumValuesMap,
data,
header,
(a, b) => a >= b,
'is less than'
);
this.#doDependantComparisonValidation(
this.#dependantMaximumValuesMap,
data,
header,
(a, b) => a <= b,
'is greater than'
);

const rules = this.#targetClass.rules ?? [];
for (let idx = 0; idx < rules.length; idx += 1) {
const rule = rules[idx];
Expand Down Expand Up @@ -402,6 +484,48 @@ class Validator {
}
}

#doDependantComparisonValidation(
dependantMap,
data,
header,
compareFn,
message
) {
for (const [slotName, compareSlotName] of dependantMap.entries()) {
const column = header.indexOf(slotName);
const compareColumn = header.indexOf(compareSlotName);
if (column < 0 || compareColumn < 0) {
continue;
}
const slotDefinition = this.#targetClassInducedSlots[slotName];
// This DH convention is also a bit particular about ranges. It looks at either the slot range
// or the first any_of range.
const slotRange =
slotDefinition.range || slotDefinition.any_of?.[0]?.range;
const slotType = this.#schema.types?.[slotRange];
for (let row = 0; row < data.length; row += 1) {
const compareValue = this.#parser.parse(
data[row][compareColumn],
slotType.uri
);
if (!compareValue) {
continue;
}
const value = this.#parser.parse(data[row][column], slotType.uri);
if (!value) {
continue;
}
if (!compareFn(value, compareValue)) {
this.#addResult(
row,
column,
`Value ${message} value of ${compareSlotName} column`
);
}
}
}
}

#buildSlotConditionGettersAndValidators(
classExpression,
header,
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "data-harmonizer",
"version": "1.6.4",
"version": "1.6.5",
"description": "A standardized spreadsheet editor and validator that can be run offline and locally",
"repository": "git@github.com:cidgoh/DataHarmonizer.git",
"license": "MIT",
Expand Down
62 changes: 62 additions & 0 deletions tests/Validator.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,37 @@ const SCHEMA = {
minimum_value: '2010-02-12',
maximum_value: '2010-02-28',
},
// This is not standard LinkML but it is supported while LinkML adds better support
// for non-numeric minimum and maximum values
during_vancouver_olympics_todos: {
name: 'during_vancouver_olympics_todos',
range: 'date',
todos: ['>=2010-02-12', '<=2010-02-28'],
},
// The special '{today}' value is not standard LinkML, but it is supported as a
// DataHarmonizer convention
not_the_future: {
name: 'not_the_future',
range: 'date',
maximum_value: '{today}',
},
not_the_future_todos: {
name: 'not_the_future_todos',
range: 'date',
todos: ['<={today}'],
},
// This is another DataHarmonizer convention that we support but isn't expressible in
// standard LinkML
after_a_date: {
name: 'after_a_date',
range: 'date',
todos: ['>={a_date}'],
},
before_a_date: {
name: 'before_a_date',
range: 'date',
todos: ['<={a_date}'],
},
a_constant: {
name: 'a_constant',
range: 'string',
Expand Down Expand Up @@ -508,10 +534,21 @@ describe('Validator', () => {
expect(fn('2010-02-20')).toBeUndefined();
expect(fn('2010-03-01')).toEqual('Value is greater than maximum value');

fn = validator.getValidatorForSlot('during_vancouver_olympics_todos');
expect(fn(undefined)).toBeUndefined();
expect(fn('2010-01-01')).toEqual('Value is less than minimum value');
expect(fn('2010-02-20')).toBeUndefined();
expect(fn('2010-03-01')).toEqual('Value is greater than maximum value');

fn = validator.getValidatorForSlot('not_the_future');
expect(fn(undefined)).toBeUndefined();
expect(fn('2021-01-01')).toBeUndefined();
expect(fn('3000-01-01')).toEqual('Value is greater than maximum value');

fn = validator.getValidatorForSlot('not_the_future_todos');
expect(fn(undefined)).toBeUndefined();
expect(fn('2021-01-01')).toBeUndefined();
expect(fn('3000-01-01')).toEqual('Value is greater than maximum value');
});

it('should validate constant constraints', () => {
Expand Down Expand Up @@ -796,4 +833,29 @@ describe('Validator', () => {
},
});
});

it('should validate min/max constrains based on other slots', () => {
const validator = new Validator(SCHEMA);
validator.useTargetClass('Test');

const header = ['a_date', 'before_a_date', 'after_a_date'];
const data = [
['2023-06-01', '2023-05-29', ''],
['2023-06-01', '2023-06-01', ''],
['2023-06-01', '2023-06-03', ''],
['2023-06-01', '', '2023-05-29'],
['2023-06-01', '', '2023-06-01'],
['2023-06-01', '', '2023-06-03'],
['', '2023-06-03', '2023-05-29'],
];
const results = validator.validate(data, header);
expect(results).toEqual({
2: {
1: 'Value is greater than value of a_date column',
},
3: {
2: 'Value is less than value of a_date column',
},
});
});
});

0 comments on commit c076baf

Please sign in to comment.