Skip to content

Commit

Permalink
Fix JDK regex support (#888)
Browse files Browse the repository at this point in the history
Summary of changes:
- Fix the test resources introduced by #783 by moving the `regex`
  fields, such that the test framework does not skip them with a "Not a
  valid test case" message.
- Revert the changes introduced by #815, as those are simply incorrect.
- Extend the test coverage introduced by #815 by (a) updating the test
  regexes to match their intended semantics and (b) include a few
  negative test cases.
- Partially revert the change introduced by #783: the use of
  `Matcher#find()` is correct, but the `hasStartAnchor` and
  `hasEndAnchor` logic introduces more bugs than the issue it aims to
  solve.
- Extend the test coverage introduced by #783, by introducing regexes
  that are not covered by the `hasStartAnchor`/`hasEndAnchor` logic.
- Update the Joni regular expression integration such that it passes
  more of the test cases.
- Disable the "trailing newline" test cases, as these are currently not
  handled correctly by either regex implementation.
  • Loading branch information
Stephan202 committed Dec 2, 2023
1 parent 9ed6dc2 commit 0924dfe
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 40 deletions.
21 changes: 2 additions & 19 deletions src/main/java/com/networknt/schema/regex/JDKRegularExpression.java
Original file line number Diff line number Diff line change
@@ -1,33 +1,16 @@
package com.networknt.schema.regex;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

class JDKRegularExpression implements RegularExpression {
private final Pattern pattern;
private final boolean hasStartAnchor;
private final boolean hasEndAnchor;

JDKRegularExpression(String regex) {
// The patterns in JSON Schema are not implicitly anchored so we must
// use Matcher.find(). However, this method does not honor the end
// anchor when immediately preceded by a quantifier (e.g., ?, *, +).
// To make this work in all cases, we wrap the pattern in a group.
this.hasStartAnchor = '^' == regex.charAt(0);
this.hasEndAnchor = '$' == regex.charAt(regex.length() - 1);
String pattern = regex;
if (this.hasEndAnchor) {
pattern = pattern.substring(this.hasStartAnchor ? 1 : 0, pattern.length() - 1);
pattern = '(' + pattern + ")$";
if (this.hasStartAnchor) pattern = '^' + pattern;
}
this.pattern = Pattern.compile(pattern);
this.pattern = Pattern.compile(regex);
}

@Override
public boolean matches(String value) {
Matcher matcher = this.pattern.matcher(value);
return matcher.find() && (!this.hasStartAnchor || 0 == matcher.start()) && (!this.hasEndAnchor || matcher.end() == value.length());
return this.pattern.matcher(value).find();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class JoniRegularExpression implements RegularExpression {
.replace("\\S", "[^ \\f\\n\\r\\t\\v\\u00a0\\u1680\\u2000-\\u200a\\u2028\\u2029\\u202f\\u205f\\u3000\\ufeff]");

byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
this.pattern = new Regex(bytes, 0, bytes.length, Option.NONE, UTF8Encoding.INSTANCE, Syntax.ECMAScript);
this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, Syntax.ECMAScript);
}

@Override
Expand Down
46 changes: 34 additions & 12 deletions src/test/java/com/networknt/schema/regex/Issue814Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,40 +8,62 @@ class Issue814Test {

@Test
void jdkTypePattern() {
JDKRegularExpression ex = new JDKRegularExpression("^list|date|time|string|enum|int|double|long|boolean|number$");
JDKRegularExpression ex = new JDKRegularExpression("^(list|date|time|string|enum|int|double|long|boolean|number)$");
assertTrue(ex.matches("list"));
assertTrue(ex.matches("string"));
assertTrue(ex.matches("boolean"));
assertTrue(ex.matches("number"));
assertTrue(ex.matches("enum"));
assertFalse(ex.matches("listZ"));
assertFalse(ex.matches("AenumZ"));
assertFalse(ex.matches("Anumber"));
}

@Test
void jdkOptionsPattern() {
JDKRegularExpression ex = new JDKRegularExpression("^\\d*|[a-zA-Z_]+$");
assertTrue(ex.matches("external"));
assertTrue(ex.matches("external_gte"));
assertTrue(ex.matches("force"));
assertTrue(ex.matches("internal"));
JDKRegularExpression ex = new JDKRegularExpression("^\\d|[a-zA-Z_]$");
assertTrue(ex.matches("5"));
assertTrue(ex.matches("55"));
assertTrue(ex.matches("5%"));
assertTrue(ex.matches("a"));
assertTrue(ex.matches("aa"));
assertTrue(ex.matches("%a"));
assertTrue(ex.matches("%_"));
assertTrue(ex.matches("55aa"));
assertTrue(ex.matches("5%%a"));
assertFalse(ex.matches(""));
assertFalse(ex.matches("%"));
assertFalse(ex.matches("a5"));
}

@Test
void joniTypePattern() {
JoniRegularExpression ex = new JoniRegularExpression("^list|date|time|string|enum|int|double|long|boolean|number$");
JoniRegularExpression ex = new JoniRegularExpression("^(list|date|time|string|enum|int|double|long|boolean|number)$");
assertTrue(ex.matches("list"));
assertTrue(ex.matches("string"));
assertTrue(ex.matches("boolean"));
assertTrue(ex.matches("number"));
assertTrue(ex.matches("enum"));
assertFalse(ex.matches("listZ"));
assertFalse(ex.matches("AenumZ"));
assertFalse(ex.matches("Anumber"));
}

@Test
void joniOptionsPattern() {
JoniRegularExpression ex = new JoniRegularExpression("^\\d*|[a-zA-Z_]+$");
assertTrue(ex.matches("internal"));
assertTrue(ex.matches("external"));
assertTrue(ex.matches("external_gte"));
assertTrue(ex.matches("force"));
JoniRegularExpression ex = new JoniRegularExpression("^\\d|[a-zA-Z_]$");
assertTrue(ex.matches("5"));
assertTrue(ex.matches("55"));
assertTrue(ex.matches("5%"));
assertTrue(ex.matches("a"));
assertTrue(ex.matches("aa"));
assertTrue(ex.matches("%a"));
assertTrue(ex.matches("%_"));
assertTrue(ex.matches("55aa"));
assertTrue(ex.matches("5%%a"));
assertFalse(ex.matches(""));
assertFalse(ex.matches("%"));
assertFalse(ex.matches("a5"));
}

}
56 changes: 50 additions & 6 deletions src/test/resources/draft2020-12/issue495.json
Original file line number Diff line number Diff line change
@@ -1,61 +1,105 @@
[
{
"description": "issue495 using ECMA-262",
"regex": "ecma-262",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"pattern": "^[a-z]{1,10}$",
"patternProperties": {
"^[a-z]{1,10}$": true,
"(^1$)": true
},
"unevaluatedProperties": false
},
"tests": [
{
"description": "an expected property name",
"regex": "ecma-262",
"data": { "aaa": 3 },
"valid": true
},
{
"description": "another expected property name",
"regex": "jdk",
"data": { "1": 3 },
"valid": true
},
{
"description": "trailing newline",
"regex": "ecma-262",
"data": { "aaa\n": 3 },
"valid": false
"valid": false,
"disabled": true,
"comment": "Test fails"
},
{
"description": "another trailing newline",
"regex": "jdk",
"data": { "1\n": 3 },
"valid": false,
"disabled": true,
"comment": "Test fails"
},
{
"description": "embedded newline",
"regex": "ecma-262",
"data": { "aaa\nbbb": 3 },
"valid": false
},
{
"description": "leading newline",
"regex": "ecma-262",
"data": { "\nbbb": 3 },
"valid": false
}
]
},
{
"description": "issue495 using Java Pattern",
"regex": "jdk",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"pattern": "^[a-z]{1,10}$",
"patternProperties": {
"^[a-z]{1,10}$": true,
"(^1$)": true
},
"unevaluatedProperties": false
},
"tests": [
{
"description": "an expected property name",
"regex": "jdk",
"data": { "aaa": 3 },
"valid": true
},
{
"description": "another expected property name",
"regex": "jdk",
"data": { "1": 3 },
"valid": true
},
{
"description": "trailing newline",
"regex": "jdk",
"data": { "aaa\n": 3 },
"valid": false
"valid": false,
"disabled": true,
"comment": "Test fails"
},
{
"description": "another trailing newline",
"regex": "jdk",
"data": { "1\n": 3 },
"valid": false,
"disabled": true,
"comment": "Test fails"
},
{
"description": "embedded newline",
"regex": "jdk",
"data": { "aaa\nbbb": 3 },
"valid": false
},
{
"description": "leading newline",
"regex": "jdk",
"data": { "\nbbb": 3 },
"valid": false
}
Expand Down
14 changes: 12 additions & 2 deletions src/test/resources/draft2020-12/issue782.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
[
{
"description": "issue782 using ECMA-262",
"regex": "ecma-262",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"patternProperties": {
Expand All @@ -14,39 +13,44 @@
"tests": [
{
"description": "regexes may be anchored to the start of the property name, 1",
"regex": "ecma-262",
"data": { "x-api-id": 3 },
"valid": true
},
{
"description": "regexes may be anchored to the start of the property name, 2",
"regex": "ecma-262",
"data": { "ax-api-id": 3 },
"valid": false
},
{
"description": "regexes may be anchored to the end of the property name, 1",
"regex": "ecma-262",
"data": { "api-id-y-": 3 },
"valid": true
},
{
"description": "regexes may be anchored to the end of the property name, 2",
"regex": "ecma-262",
"data": { "y-api-id": 3 },
"valid": false
},
{
"description": "regexes may be anchored to both ends of the property name, 1",
"regex": "ecma-262",
"data": { "z-": 3 },
"valid": true
},
{
"description": "regexes may be anchored to both ends of the property name, 2",
"regex": "ecma-262",
"data": { "az-api-id": 3 },
"valid": false
}
]
},
{
"description": "issue782 using Java Pattern",
"regex": "jdk",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"patternProperties": {
Expand All @@ -59,31 +63,37 @@
"tests": [
{
"description": "regexes may be anchored to the start of the property name, 1",
"regex": "jdk",
"data": { "x-api-id": 3 },
"valid": true
},
{
"description": "regexes may be anchored to the start of the property name, 2",
"regex": "jdk",
"data": { "ax-api-id": 3 },
"valid": false
},
{
"description": "regexes may be anchored to the end of the property name, 1",
"regex": "jdk",
"data": { "api-id-y-": 3 },
"valid": true
},
{
"description": "regexes may be anchored to the end of the property name, 2",
"regex": "jdk",
"data": { "y-api-id": 3 },
"valid": false
},
{
"description": "regexes may be anchored to both ends of the property name, 1",
"regex": "jdk",
"data": { "z-": 3 },
"valid": true
},
{
"description": "regexes may be anchored to both ends of the property name, 2",
"regex": "jdk",
"data": { "az-api-id": 3 },
"valid": false
}
Expand Down

0 comments on commit 0924dfe

Please sign in to comment.