From 5b3a0afc8de63abc4ba1f679623bbacb638debde Mon Sep 17 00:00:00 2001 From: "Nico Pfiffner (nipfif)" Date: Tue, 16 Jul 2024 22:11:10 +0200 Subject: [PATCH] feat+refactor: Move extraction functions to a separate utils file. Improve robustness of extraction logic (convert objects to strings, detect if inputs are numeric or dates). Exclude strings from 'grater/smaller'-comparisons. Include date comparison for date-strings following ISO, RFC2822, or HTTP patterns. --- frontend/src/components/FileUploader.vue | 84 +++++------ .../utils/FileUploaderExtractionFunctions.js | 136 ++++++++++++++++++ 2 files changed, 178 insertions(+), 42 deletions(-) create mode 100644 frontend/src/utils/FileUploaderExtractionFunctions.js diff --git a/frontend/src/components/FileUploader.vue b/frontend/src/components/FileUploader.vue index 97c2ad0..2e0e1e4 100644 --- a/frontend/src/components/FileUploader.vue +++ b/frontend/src/components/FileUploader.vue @@ -305,6 +305,17 @@ import JSZip from "jszip"; import DonationInstructions from "./DonationInstructions"; import axios from "axios"; import Papa from 'papaparse'; +import { + regexDeleteMatch, + regexReplaceMatch, + regexDeleteRow, + valueIsEqual, + valueIsNotEqual, + valueIsSmallerOrEqual, + valueIsGreaterOrEqual, + valueIsSmaller, + valueIsGreater +} from '../utils/FileUploaderExtractionFunctions' export default { @@ -586,88 +597,78 @@ export default { result[rule.field] = entry[key]; break; case '==': - if (entry[key] !== rule.comparison_value) { - // keep entry - } else { + if (valueIsEqual(entry[key], rule.comparison_value)) { // discard entry throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.` } break; case '!=': - if (entry[key] === rule.comparison_value) { - // keep entry - } else { + if (valueIsNotEqual(entry[key], rule.comparison_value)) { // discard entry throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.` } break; case '<=': - if (entry[key] > rule.comparison_value) { - // keep entry - } else { + if (valueIsSmallerOrEqual(entry[key], rule.comparison_value)) { // discard entry throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.` } break; case '>=': - if (entry[key] < rule.comparison_value) { - // keep entry - } else { + if (valueIsGreaterOrEqual(entry[key], rule.comparison_value)) { // discard entry throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.` } break; case '<': - if (entry[key] >= rule.comparison_value) { - // keep entry - } else { + if (valueIsSmaller(entry[key], rule.comparison_value)) { // discard entry throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.` } break; case '>': - if (entry[key] <= rule.comparison_value) { - // keep entry - } else { + if (valueIsGreater(entry[key], rule.comparison_value)) { // discard entry - throw `Field "${key}" matches filter value "${rule.comparison_value}".` + throw `Field "${key}" matches filter value "${rule.comparison_value}" for entry.` } break; case 'regex-delete-match': if (key in result) { - let originalValue = entry[key]; - if (typeof entry[key] !== 'string') { - originalValue = JSON.stringify(entry[key]); + try { + let newValue = regexDeleteMatch(entry[key], rule.comparison_value); + result[rule.field] = newValue; + entry[key] = newValue; + } catch { + let errorMsg = `RegexDeleteMatch failed for field ${rule.field}.`; + uploader.postError(4220, errorMsg, blueprint.id); + result[rule.field] = entry[key]; } - - let newValue = originalValue.replaceAll(RegExp(rule.comparison_value, 'g'), ''); - result[rule.field] = newValue; - entry[key] = newValue; } break; case 'regex-replace-match': if (key in result) { - let originalValue = entry[key]; - if (typeof entry[key] !== 'string') { - originalValue = JSON.stringify(entry[key]); + try { + let newValue = regexReplaceMatch(entry[key], rule.comparison_value, rule.replacement_value); + result[rule.field] = newValue; + entry[key] = newValue; + } catch { + let errorMsg = `RegexReplaceMatch failed for field ${rule.field}.`; + uploader.postError(4221, errorMsg, blueprint.id); + result[rule.field] = entry[key]; } - - let newValue = originalValue.replaceAll(RegExp(rule.comparison_value, 'g'), rule.replacement_value); - result[rule.field] = newValue; - entry[key] = newValue; } break; case 'regex-delete-row': if (key in entry) { - let originalValue = entry[key]; - if (typeof entry[key] !== 'string') { - originalValue = JSON.stringify(entry[key]); + let deleteRow = false; + try { + deleteRow = regexDeleteRow(entry[key], rule.comparison_value); + } catch { + let errorMsg = `RegexDeleteRow failed for field ${rule.field}.`; + uploader.postError(4222, errorMsg, blueprint.id); + break; } - - let comparisonValue = RegExp(rule.comparison_value, 'g'); - if (!comparisonValue.test(originalValue)) { - // keep entry - } else { + if (deleteRow) { // discard entry throw `Field "${key}" matches RegExp "${rule.comparison_value}".` } @@ -680,7 +681,6 @@ export default { extractedData.push(result); } catch (e) { nEntriesFilteredOut += 1; - // uploader.postError(4206, `${e}`, blueprint.id) } for (let [key, value] of keyMap.entries()) { diff --git a/frontend/src/utils/FileUploaderExtractionFunctions.js b/frontend/src/utils/FileUploaderExtractionFunctions.js new file mode 100644 index 0000000..1d3c759 --- /dev/null +++ b/frontend/src/utils/FileUploaderExtractionFunctions.js @@ -0,0 +1,136 @@ +import { DateTime } from "luxon"; + +function getOriginalValueAsString(v) { + if (typeof v !== 'string') { + return JSON.stringify(v); + } else { + return v; + } +} + +function isNumeric(v){ + if (typeof v === 'number') return true; + if (typeof v != 'string') return false; + return !isNaN(v) && !isNaN(parseFloat(v)); +} + +function bothDates(entryValue, comparisonValue) { + try { + if (DateTime.fromISO(entryValue).isValid && DateTime.fromISO(comparisonValue).isValid ) { + // e.g., "2016-05-25T092415.123", "2016-05-25" + return true; + + } else if (DateTime.fromRFC2822(entryValue).isValid && DateTime.fromRFC2822(comparisonValue).isValid ) { + // e.g., "Tue, 01 Nov 2016 13:23:12 +0630" + return true; + + } else if (DateTime.fromHTTP(entryValue).isValid && DateTime.fromHTTP(comparisonValue).isValid ) { + // e.g., "Sunday, 06-Nov-94 08:49:37 GMT" + return true; + + } else { + return false; + } + } + catch(err) { + return false; + } +} + +function getDates(entryValue, comparisonValue) { + if (DateTime.fromISO(entryValue).isValid && DateTime.fromISO(comparisonValue).isValid) { + return [DateTime.fromISO(entryValue), DateTime.fromISO(comparisonValue)]; + + } else if (DateTime.fromRFC2822(entryValue).isValid && DateTime.fromRFC2822(comparisonValue).isValid) { + return [DateTime.fromRFC2822(entryValue), DateTime.fromRFC2822(comparisonValue)]; + + } else if (DateTime.fromHTTP(entryValue).isValid && DateTime.fromHTTP(comparisonValue).isValid) { + return [DateTime.fromHTTP(entryValue), DateTime.fromHTTP(comparisonValue)]; + + } else { + return [null, null] + } +} + +function prepareValues(entryValue, comparisonValue) { + if (isNumeric(entryValue) && isNumeric(comparisonValue)) { + entryValue = parseFloat(entryValue); + comparisonValue = parseFloat(comparisonValue); + } else if (bothDates(entryValue, comparisonValue)) { + [entryValue, comparisonValue] = getDates(entryValue, comparisonValue) + } else { + entryValue = getOriginalValueAsString(entryValue); + comparisonValue = getOriginalValueAsString(comparisonValue); + } + return [entryValue, comparisonValue]; +} + +export function valueIsEqual(entryValue, comparisonValue) { + entryValue = getOriginalValueAsString(entryValue); + comparisonValue = getOriginalValueAsString(comparisonValue); + return entryValue === comparisonValue; +} + +export function valueIsNotEqual(entryValue, comparisonValue) { + entryValue = getOriginalValueAsString(entryValue); + comparisonValue = getOriginalValueAsString(comparisonValue); + return entryValue !== comparisonValue; +} + +export function valueIsSmallerOrEqual(entryValue, comparisonValue) { + if (bothDates(entryValue, comparisonValue) || + (isNumeric(entryValue) && isNumeric(comparisonValue))) { + [entryValue, comparisonValue] = prepareValues(entryValue, comparisonValue); + return entryValue <= comparisonValue; + } else { + return false; + } +} + +export function valueIsGreaterOrEqual(entryValue, comparisonValue) { + if (bothDates(entryValue, comparisonValue) || + (isNumeric(entryValue) && isNumeric(comparisonValue))) { + [entryValue, comparisonValue] = prepareValues(entryValue, comparisonValue); + return entryValue >= comparisonValue; + } else { + return false; + } +} + +export function valueIsSmaller(entryValue, comparisonValue) { + if (bothDates(entryValue, comparisonValue) || + (isNumeric(entryValue) && isNumeric(comparisonValue))) { + [entryValue, comparisonValue] = prepareValues(entryValue, comparisonValue); + return entryValue < comparisonValue; + } else { + return false; + } +} + +export function valueIsGreater(entryValue, comparisonValue) { + if (bothDates(entryValue, comparisonValue) || + (isNumeric(entryValue) && isNumeric(comparisonValue))) { + [entryValue, comparisonValue] = prepareValues(entryValue, comparisonValue); + return entryValue > comparisonValue; + } else { + return false; + } +} + +export function regexDeleteMatch(entryValue, comparisonValue) { + let originalValue = getOriginalValueAsString(entryValue); + let comparisonRegExp = RegExp(comparisonValue, 'g'); + return originalValue.replaceAll(comparisonRegExp, ''); +} + +export function regexReplaceMatch(entryValue, comparisonValue, replacementValue) { + let originalValue = getOriginalValueAsString(entryValue); + let comparisonRegExp = RegExp(comparisonValue, 'g'); + return originalValue.replaceAll(comparisonRegExp, replacementValue); +} + +export function regexDeleteRow(entryValue, comparisonValue) { + let originalValue = getOriginalValueAsString(entryValue); + let comparisonRegExp = RegExp(comparisonValue, 'g'); + return comparisonRegExp.test(originalValue); +}