Skip to content

Commit

Permalink
Add match_phrase_prefix (#661)
Browse files Browse the repository at this point in the history
* Add an integration test for match_phrase_prefix with required parameters

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Update SQL ANTLR files to support match_phrase_prefix.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* SQL parser test for match_phrase_prefix with required arguments.

Change generateQueries signature to use more appropriate Map type.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* The rest of the match_phrase_prefix owl with required parameters.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Checkstyle fix.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Add a license header.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Make MATCH_PHRASE_PREFIX_MAX_NUM_PARAMETERS public.

Private constants look odd.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Add support for boost parameter in match_phrase.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Add SQL parser unit tests for optional parameters.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Add support for optional parameters for match_phrase_prefix.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Add unit test for AstExpressionBuilderTest that includes all parameters.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Add DSL.namedArgument(String, String)

A convenience method to simplify writing tests.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* ExpressionAnalyzer test for match_phrase_prefix with all parameters.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Support correct max number of optional parameters.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Address checkstyle issues.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Update getRelevanceFunctionResolver usage to pass field argument type.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* match_phrase_prefix PPL required parameters integration test.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* match_phrase_prefix with required parameters in PPL.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Integration test for match_phrase_prefix in PPL with all parameters.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* match_phrase_prefix SQL integration tests.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Add FilterQueryBuilderTest test for match_phrase_prefix with analyzer

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Fix flaky tests

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* match_phrase_prefix documentation for SQL and PPL

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Improve PPL documentation for match_phrase_prefix

- match_phrase was used in a couple places.
- some optional parameters were missing.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Add integration tests for match_phrase_prefix in PPL

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Remove an empty unit test.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>

* Updated incorrect references to match_phrase.

Signed-off-by: MaxKsyunz <maxk@bitquilltech.com>
  • Loading branch information
MaxKsyunz committed Jul 11, 2022
1 parent 58cfa88 commit e180d56
Show file tree
Hide file tree
Showing 20 changed files with 616 additions and 6 deletions.
8 changes: 8 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ public NamedArgumentExpression namedArgument(String argName, Expression value) {
return new NamedArgumentExpression(argName, value);
}

public NamedArgumentExpression namedArgument(String name, String value) {
return namedArgument(name, literal(value));
}

public static ParseExpression parsed(Expression expression, Expression pattern,
Expression identifier) {
return new ParseExpression(expression, pattern, identifier);
Expand Down Expand Up @@ -658,6 +662,10 @@ public FunctionExpression match_phrase(Expression... args) {
return compile(BuiltinFunctionName.MATCH_PHRASE, args);
}

public FunctionExpression match_phrase_prefix(Expression... args) {
return compile(BuiltinFunctionName.MATCH_PHRASE_PREFIX, args);
}

public FunctionExpression multi_match(Expression... args) {
return compile(BuiltinFunctionName.MULTI_MATCH, args);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ public enum BuiltinFunctionName {
MATCH_PHRASE(FunctionName.of("match_phrase")),
MATCHPHRASE(FunctionName.of("matchphrase")),
MATCH_BOOL_PREFIX(FunctionName.of("match_bool_prefix")),

MATCH_PHRASE_PREFIX(FunctionName.of("match_phrase_prefix")),
/**
* Legacy Relevance Function.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public class OpenSearchFunctions {
public static final int MIN_NUM_PARAMETERS = 2;
public static final int MULTI_MATCH_MAX_NUM_PARAMETERS = 17;
public static final int SIMPLE_QUERY_STRING_MAX_NUM_PARAMETERS = 14;
public static final int MATCH_PHRASE_PREFIX_MAX_NUM_PARAMETERS = 7;

/**
* Add functions specific to OpenSearch to repository.
Expand All @@ -46,6 +47,7 @@ public void register(BuiltinFunctionRepository repository) {
// compatibility.
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
repository.register(match_phrase_prefix());
}

private static FunctionResolver match_bool_prefix() {
Expand All @@ -58,6 +60,11 @@ private static FunctionResolver match() {
return getRelevanceFunctionResolver(funcName, MATCH_MAX_NUM_PARAMETERS, STRING);
}

private static FunctionResolver match_phrase_prefix() {
FunctionName funcName = BuiltinFunctionName.MATCH_PHRASE_PREFIX.getName();
return getRelevanceFunctionResolver(funcName, MATCH_PHRASE_PREFIX_MAX_NUM_PARAMETERS, STRING);
}

private static FunctionResolver match_phrase(BuiltinFunctionName matchPhrase) {
FunctionName funcName = matchPhrase.getName();
return getRelevanceFunctionResolver(funcName, MATCH_PHRASE_MAX_NUM_PARAMETERS, STRING);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral;
import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName;
import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral;
import static org.opensearch.sql.ast.dsl.AstDSL.unresolvedArg;
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE;
import static org.opensearch.sql.data.model.ExprValueUtils.integerValue;
import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN;
Expand Down Expand Up @@ -466,6 +467,30 @@ void simple_query_string_expression_two_fields() {
AstDSL.unresolvedArg("query", stringLiteral("sample query"))));
}

@Test
public void match_phrase_prefix_all_params() {
assertAnalyzeEqual(
dsl.match_phrase_prefix(
dsl.namedArgument("field", "test"),
dsl.namedArgument("query", "search query"),
dsl.namedArgument("slop", "3"),
dsl.namedArgument("boost", "1.5"),
dsl.namedArgument("analyzer", "standard"),
dsl.namedArgument("max_expansions", "4"),
dsl.namedArgument("zero_terms_query", "NONE")
),
AstDSL.function("match_phrase_prefix",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("search query")),
unresolvedArg("slop", stringLiteral("3")),
unresolvedArg("boost", stringLiteral("1.5")),
unresolvedArg("analyzer", stringLiteral("standard")),
unresolvedArg("max_expansions", stringLiteral("4")),
unresolvedArg("zero_terms_query", stringLiteral("NONE"))
)
);
}

protected Expression analyze(UnresolvedExpression unresolvedExpression) {
return expressionAnalyzer.analyze(unresolvedExpression, analysisContext);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,19 @@ List<FunctionExpression> match_phrase_dsl_expressions() {
);
}

List<FunctionExpression> match_phrase_prefix_dsl_expressions() {
return List.of(
dsl.match_phrase_prefix(field, query)
);
}

@Test
public void match_phrase_prefix() {
for (FunctionExpression fe : match_phrase_prefix_dsl_expressions()) {
assertEquals(BOOLEAN, fe.type());
}
}

@Test
void match_in_memory() {
FunctionExpression expr = dsl.match(field, query);
Expand Down
41 changes: 41 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2274,6 +2274,47 @@ Another example to show how to set custom values for the optional parameters::
| Hattie | 671 Bristol Street |
+-------------+--------------------+

MATCH_PHRASE_PREFIX
------------

Description
>>>>>>>>>>>

``match_phrase_prefix(field_expression, query_expression[, option=<option_value>]*)``

The match_phrase_prefix function maps to the match_phrase_prefix query used in search engine,
to return the documents that match a provided text with a given field. Available parameters include:

- analyzer
- slop
- zero_terms_query
- max_expansions
- boost


Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> SELECT author, title FROM books WHERE match_phrase_prefix(author, 'Alexander Mil');
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+

Another example to show how to set custom values for the optional parameters::

os> SELECT author, title FROM books WHERE match_phrase_prefix(author, 'Alan Mil', slop = 2);
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+


MULTI_MATCH
-----------

Expand Down
43 changes: 43 additions & 0 deletions docs/user/ppl/functions/relevance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,49 @@ Another example to show how to set custom values for the optional parameters::
+----------------------+--------------------------+



MATCH_PHRASE_PREFIX
------------

Description
>>>>>>>>>>>

``match_phrase_prefix(field_expression, query_expression[, option=<option_value>]*)``

The match_phrase_prefix function maps to the match_phrase_prefix query used in search engine, to return the documents that match a provided text with a given field. Available parameters include:

- analyzer
- slop
- max_expansions
- boost
- zero_terms_query

Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> source=books | where match_phrase_prefix(author, 'Alexander Mil') | fields author, title
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+



Another example to show how to set custom values for the optional parameters::

os> source=books | where match_phrase_prefix(author, 'Alan Mil', slop = 2) | fields author, title
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+



MULTI_MATCH
-----------

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ppl;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BEER;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.Test;

public class MatchPhrasePrefixWhereCommandIT extends PPLIntegTestCase {

@Override
public void init() throws IOException {
loadIndex(Index.BEER);
}

@Test
public void required_parameters() throws IOException {
String query = "source = %s | WHERE match_phrase_prefix(Title, 'champagne be') | fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result,
rows("Can old flat champagne be used for vinegar?"),
rows("Elder flower champagne best to use natural yeast or add a wine yeast?"));
}


@Test
public void all_optional_parameters() throws IOException {
// The values for optional parameters are valid but arbitrary.
String query = "source = %s " +
"| WHERE match_phrase_prefix(Title, 'flat champ', boost = 1.0, " +
"zero_terms_query='ALL', max_expansions = 2, analyzer=standard, slop=0) " +
"| fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("Can old flat champagne be used for vinegar?"));
}


@Test
public void max_expansions_is_3() throws IOException {
// max_expansions applies to the last term in the query -- 'bottl'
// It tells OpenSearch to consider only the first 3 terms that start with 'bottl'
// In this dataset these are 'bottle-conditioning', 'bottling', 'bottles'.

String query = "source = %s " +
"| WHERE match_phrase_prefix(Tags, 'draught bottl', max_expansions=3) | fields Tags";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("brewing draught bottling"),
rows("draught bottles"));
}

@Test
public void analyzer_english() throws IOException {
// English analyzer removes 'in' and 'to' as they are common words.
// This results in an empty query.
String query = "source = %s " +
"| WHERE match_phrase_prefix(Title, 'in to', analyzer=english)" +
"| fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
assertTrue("Expect English analyzer to filter out common words 'in' and 'to'",
result.getInt("total") == 0);
}

@Test
public void analyzer_standard() throws IOException {
// Standard analyzer does not treat 'in' and 'to' as special terms.
// This results in 'to' being used as a phrase prefix given us 'Tokyo'.
String query = "source = %s " +
"| WHERE match_phrase_prefix(Title, 'in to', analyzer=standard)" +
"| fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("Local microbreweries and craft beer in Tokyo"));
}

@Test
public void zero_term_query_all() throws IOException {
// English analyzer removes 'in' and 'to' as they are common words.
// zero_terms_query of 'ALL' causes all rows to be returned.
// ORDER BY ... LIMIT helps make the test understandable.
String query = "source = %s" +
"| WHERE match_phrase_prefix(Title, 'in to', analyzer=english, zero_terms_query='ALL') " +
"| sort -Title | head 1 | fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("was working great, now all foam"));
}


@Test
public void slop_is_2() throws IOException {
// When slop is 0, the terms are matched exactly in the order specified.
// 'open' is used to match prefix of the next term.
String query = "source = %s" +
"| where match_phrase_prefix(Tags, 'gas ta', slop=2) " +
"| fields Tags";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("taste gas"));
}

@Test
public void slop_is_3() throws IOException {
// When slop is 2, results will include phrases where the query terms are transposed.
String query = "source = %s" +
"| where match_phrase_prefix(Tags, 'gas ta', slop=3)" +
"| fields Tags";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result,
rows("taste draught gas"),
rows("taste gas"));
}
}
Loading

0 comments on commit e180d56

Please sign in to comment.