Skip to content

Commit

Permalink
New: (en) Add year suffix extraction refiner
Browse files Browse the repository at this point in the history
  • Loading branch information
Wanasit Tanakitrungruang committed Aug 10, 2024
1 parent be759f7 commit be6e995
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 1 deletion.
5 changes: 5 additions & 0 deletions src/locales/en/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import ENTimeUnitCasualRelativeFormatParser from "./parsers/ENTimeUnitCasualRela
import ENMergeRelativeAfterDateRefiner from "./refiners/ENMergeRelativeAfterDateRefiner";
import ENMergeRelativeFollowByDateRefiner from "./refiners/ENMergeRelativeFollowByDateRefiner";
import OverlapRemovalRefiner from "../../common/refiners/OverlapRemovalRefiner";
import ENExtractYearSuffixRefiner from "./refiners/ENExtractYearSuffixRefiner";

export default class ENDefaultConfiguration {
/**
Expand Down Expand Up @@ -71,6 +72,10 @@ export default class ENDefaultConfiguration {

// Re-apply the date time refiner again after the timezone refinement and exclusion in common refiners.
options.refiners.push(new ENMergeDateTimeRefiner());

// Extract year after merging date and time
options.refiners.push(new ENExtractYearSuffixRefiner());

// Keep the date range refiner at the end (after all other refinements).
options.refiners.push(new ENMergeDateRangeRefiner());
return options;
Expand Down
37 changes: 37 additions & 0 deletions src/locales/en/refiners/ENExtractYearSuffixRefiner.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { ParsingContext, Refiner } from "../../../chrono";
import { ParsingResult } from "../../../results";
import { YEAR_PATTERN, parseYear } from "../constants";

const YEAR_SUFFIX_PATTERN = new RegExp(`^\\s*(${YEAR_PATTERN})`, "i");
const YEAR_GROUP = 1;
export default class ENExtractYearSuffixRefiner implements Refiner {
refine(context: ParsingContext, results: ParsingResult[]): ParsingResult[] {
results.forEach(function (result) {
context.debug(() => {
console.log("ENExtractYearSuffixRefiner", result.text, result.start);
});
if (!result.start.isDateWithUnknownYear()) {
return;
}

const suffix = context.text.substring(result.index + result.text.length);
const match = YEAR_SUFFIX_PATTERN.exec(suffix);
if (!match) {
return;
}

context.debug(() => {
console.log(`Extracting year: '${match[0]}' into : ${result}`);
});

const year = parseYear(match[YEAR_GROUP]);
if (result.end != null) {
result.end.assign("year", year);
}
result.start.assign("year", year);
result.text += match[0];
});

return results;
}
}
27 changes: 26 additions & 1 deletion test/en/en_year.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import * as chrono from "../../src";
import { testSingleCase, testUnexpectedResult } from "../test_util";
import ENTimeExpressionParser from "../../src/locales/en/parsers/ENTimeExpressionParser";
import { Meridiem } from "../../src";

test("Test - Year numbers with BCE/CE Era label", () => {
testSingleCase(chrono, "10 August 234 BCE", new Date(2012, 7, 10), (result) => {
Expand Down Expand Up @@ -73,3 +73,28 @@ test("Test - Year numbers with Buddhist Era label", () => {
expect(result.start).toBeDate(new Date(2012, 8 - 1, 10, 12));
});
});

test("Test - Year number after date/time expression", () => {
testSingleCase(chrono, "Thu Oct 26 11:00:09 2023", new Date(2016, 10 - 1, 1, 8), (result, text) => {
expect(result.start.get("year")).toBe(2023);
expect(result.start.get("month")).toBe(10);
expect(result.start.get("day")).toBe(26);

expect(result.start.get("hour")).toBe(11);
expect(result.start.get("minute")).toBe(0);
expect(result.start.get("second")).toBe(9);
expect(result.start.get("meridiem")).toBe(Meridiem.AM);
});

testSingleCase(chrono, "Thu Oct 26 11:00:09 EDT 2023", new Date(2016, 10 - 1, 1, 8), (result, text) => {
expect(result.start.get("year")).toBe(2023);
expect(result.start.get("month")).toBe(10);
expect(result.start.get("day")).toBe(26);

expect(result.start.get("hour")).toBe(11);
expect(result.start.get("minute")).toBe(0);
expect(result.start.get("second")).toBe(9);
expect(result.start.get("meridiem")).toBe(Meridiem.AM);
expect(result.start.get("timezoneOffset")).toBe(-240);
});
});

0 comments on commit be6e995

Please sign in to comment.