Skip to content

Commit

Permalink
feat+refactor: Move extraction functions to a separate utils file. Im…
Browse files Browse the repository at this point in the history
…prove robustness of extraction logic (convert objects to strings, detect if inputs are numeric or dates). Exclude strings from 'grater/smaller'-comparisons. Include date comparison for date-strings following ISO, RFC2822, or HTTP patterns.
  • Loading branch information
Nico-AP committed Jul 16, 2024
1 parent 54a89fc commit 5b3a0af
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 42 deletions.
84 changes: 42 additions & 42 deletions frontend/src/components/FileUploader.vue
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,17 @@ import JSZip from "jszip";
import DonationInstructions from "./DonationInstructions";
import axios from "axios";
import Papa from 'papaparse';
import {
regexDeleteMatch,
regexReplaceMatch,
regexDeleteRow,
valueIsEqual,
valueIsNotEqual,
valueIsSmallerOrEqual,
valueIsGreaterOrEqual,
valueIsSmaller,
valueIsGreater
} from '../utils/FileUploaderExtractionFunctions'
export default {
Expand Down Expand Up @@ -586,88 +597,78 @@ export default {
result[rule.field] = entry[key];
break;
case '==':
if (entry[key] !== rule.comparison_value) {
// keep entry
} else {
if (valueIsEqual(entry[key], rule.comparison_value)) {
// discard entry
throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.`
}
break;
case '!=':
if (entry[key] === rule.comparison_value) {
// keep entry
} else {
if (valueIsNotEqual(entry[key], rule.comparison_value)) {
// discard entry
throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.`
}
break;
case '<=':
if (entry[key] > rule.comparison_value) {
// keep entry
} else {
if (valueIsSmallerOrEqual(entry[key], rule.comparison_value)) {
// discard entry
throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.`
}
break;
case '>=':
if (entry[key] < rule.comparison_value) {
// keep entry
} else {
if (valueIsGreaterOrEqual(entry[key], rule.comparison_value)) {
// discard entry
throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.`
}
break;
case '<':
if (entry[key] >= rule.comparison_value) {
// keep entry
} else {
if (valueIsSmaller(entry[key], rule.comparison_value)) {
// discard entry
throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.`
}
break;
case '>':
if (entry[key] <= rule.comparison_value) {
// keep entry
} else {
if (valueIsGreater(entry[key], rule.comparison_value)) {
// discard entry
throw `Field "${key}" matches filter value "${rule.comparison_value}".`
throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.`
}
break;
case 'regex-delete-match':
if (key in result) {
let originalValue = entry[key];
if (typeof entry[key] !== 'string') {
originalValue = JSON.stringify(entry[key]);
try {
let newValue = regexDeleteMatch(entry[key], rule.comparison_value);
result[rule.field] = newValue;
entry[key] = newValue;
} catch {
let errorMsg = `RegexDeleteMatch failed for field ${rule.field}.`;
uploader.postError(4220, errorMsg, blueprint.id);
result[rule.field] = entry[key];
}
let newValue = originalValue.replaceAll(RegExp(rule.comparison_value, 'g'), '');
result[rule.field] = newValue;
entry[key] = newValue;
}
break;
case 'regex-replace-match':
if (key in result) {
let originalValue = entry[key];
if (typeof entry[key] !== 'string') {
originalValue = JSON.stringify(entry[key]);
try {
let newValue = regexReplaceMatch(entry[key], rule.comparison_value, rule.replacement_value);
result[rule.field] = newValue;
entry[key] = newValue;
} catch {
let errorMsg = `RegexReplaceMatch failed for field ${rule.field}.`;
uploader.postError(4221, errorMsg, blueprint.id);
result[rule.field] = entry[key];
}
let newValue = originalValue.replaceAll(RegExp(rule.comparison_value, 'g'), rule.replacement_value);
result[rule.field] = newValue;
entry[key] = newValue;
}
break;
case 'regex-delete-row':
if (key in entry) {
let originalValue = entry[key];
if (typeof entry[key] !== 'string') {
originalValue = JSON.stringify(entry[key]);
let deleteRow = false;
try {
deleteRow = regexDeleteRow(entry[key], rule.comparison_value);
} catch {
let errorMsg = `RegexDeleteRow failed for field ${rule.field}.`;
uploader.postError(4222, errorMsg, blueprint.id);
break;
}
let comparisonValue = RegExp(rule.comparison_value, 'g');
if (!comparisonValue.test(originalValue)) {
// keep entry
} else {
if (deleteRow) {
// discard entry
throw `Field "${key}" matches RegExp "${rule.comparison_value}".`
}
Expand All @@ -680,7 +681,6 @@ export default {
extractedData.push(result);
} catch (e) {
nEntriesFilteredOut += 1;
// uploader.postError(4206, `${e}`, blueprint.id)
}
for (let [key, value] of keyMap.entries()) {
Expand Down
136 changes: 136 additions & 0 deletions frontend/src/utils/FileUploaderExtractionFunctions.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import { DateTime } from "luxon";

function getOriginalValueAsString(v) {
if (typeof v !== 'string') {
return JSON.stringify(v);
} else {
return v;
}
}

function isNumeric(v){
if (typeof v === 'number') return true;
if (typeof v != 'string') return false;
return !isNaN(v) && !isNaN(parseFloat(v));
}

function bothDates(entryValue, comparisonValue) {
try {
if (DateTime.fromISO(entryValue).isValid && DateTime.fromISO(comparisonValue).isValid ) {
// e.g., "2016-05-25T092415.123", "2016-05-25"
return true;

} else if (DateTime.fromRFC2822(entryValue).isValid && DateTime.fromRFC2822(comparisonValue).isValid ) {
// e.g., "Tue, 01 Nov 2016 13:23:12 +0630"
return true;

} else if (DateTime.fromHTTP(entryValue).isValid && DateTime.fromHTTP(comparisonValue).isValid ) {
// e.g., "Sunday, 06-Nov-94 08:49:37 GMT"
return true;

} else {
return false;
}
}
catch(err) {
return false;
}
}

function getDates(entryValue, comparisonValue) {
if (DateTime.fromISO(entryValue).isValid && DateTime.fromISO(comparisonValue).isValid) {
return [DateTime.fromISO(entryValue), DateTime.fromISO(comparisonValue)];

} else if (DateTime.fromRFC2822(entryValue).isValid && DateTime.fromRFC2822(comparisonValue).isValid) {
return [DateTime.fromRFC2822(entryValue), DateTime.fromRFC2822(comparisonValue)];

} else if (DateTime.fromHTTP(entryValue).isValid && DateTime.fromHTTP(comparisonValue).isValid) {
return [DateTime.fromHTTP(entryValue), DateTime.fromHTTP(comparisonValue)];

} else {
return [null, null]
}
}

function prepareValues(entryValue, comparisonValue) {
if (isNumeric(entryValue) && isNumeric(comparisonValue)) {
entryValue = parseFloat(entryValue);
comparisonValue = parseFloat(comparisonValue);
} else if (bothDates(entryValue, comparisonValue)) {
[entryValue, comparisonValue] = getDates(entryValue, comparisonValue)
} else {
entryValue = getOriginalValueAsString(entryValue);
comparisonValue = getOriginalValueAsString(comparisonValue);
}
return [entryValue, comparisonValue];
}

export function valueIsEqual(entryValue, comparisonValue) {
entryValue = getOriginalValueAsString(entryValue);
comparisonValue = getOriginalValueAsString(comparisonValue);
return entryValue === comparisonValue;
}

export function valueIsNotEqual(entryValue, comparisonValue) {
entryValue = getOriginalValueAsString(entryValue);
comparisonValue = getOriginalValueAsString(comparisonValue);
return entryValue !== comparisonValue;
}

export function valueIsSmallerOrEqual(entryValue, comparisonValue) {
if (bothDates(entryValue, comparisonValue) ||
(isNumeric(entryValue) && isNumeric(comparisonValue))) {
[entryValue, comparisonValue] = prepareValues(entryValue, comparisonValue);
return entryValue <= comparisonValue;
} else {
return false;
}
}

export function valueIsGreaterOrEqual(entryValue, comparisonValue) {
if (bothDates(entryValue, comparisonValue) ||
(isNumeric(entryValue) && isNumeric(comparisonValue))) {
[entryValue, comparisonValue] = prepareValues(entryValue, comparisonValue);
return entryValue >= comparisonValue;
} else {
return false;
}
}

export function valueIsSmaller(entryValue, comparisonValue) {
if (bothDates(entryValue, comparisonValue) ||
(isNumeric(entryValue) && isNumeric(comparisonValue))) {
[entryValue, comparisonValue] = prepareValues(entryValue, comparisonValue);
return entryValue < comparisonValue;
} else {
return false;
}
}

export function valueIsGreater(entryValue, comparisonValue) {
if (bothDates(entryValue, comparisonValue) ||
(isNumeric(entryValue) && isNumeric(comparisonValue))) {
[entryValue, comparisonValue] = prepareValues(entryValue, comparisonValue);
return entryValue > comparisonValue;
} else {
return false;
}
}

export function regexDeleteMatch(entryValue, comparisonValue) {
let originalValue = getOriginalValueAsString(entryValue);
let comparisonRegExp = RegExp(comparisonValue, 'g');
return originalValue.replaceAll(comparisonRegExp, '');
}

export function regexReplaceMatch(entryValue, comparisonValue, replacementValue) {
let originalValue = getOriginalValueAsString(entryValue);
let comparisonRegExp = RegExp(comparisonValue, 'g');
return originalValue.replaceAll(comparisonRegExp, replacementValue);
}

export function regexDeleteRow(entryValue, comparisonValue) {
let originalValue = getOriginalValueAsString(entryValue);
let comparisonRegExp = RegExp(comparisonValue, 'g');
return comparisonRegExp.test(originalValue);
}

0 comments on commit 5b3a0af

Please sign in to comment.