From d2872c87c005feff935178788e909ae0e8ea2c13 Mon Sep 17 00:00:00 2001 From: Tomoyuki Morita Date: Thu, 1 Aug 2024 17:21:57 -0700 Subject: [PATCH 1/4] Refer Antlr file using specific branch Signed-off-by: Tomoyuki Morita --- async-query-core/build.gradle | 10 +- .../src/main/antlr/SqlBaseLexer.g4 | 154 +--- .../src/main/antlr/SqlBaseParser.g4 | 851 ++++-------------- .../sql/spark/utils/SQLQueryUtils.java | 12 +- 4 files changed, 177 insertions(+), 850 deletions(-) diff --git a/async-query-core/build.gradle b/async-query-core/build.gradle index 1de6cb3105..b8e3e337e1 100644 --- a/async-query-core/build.gradle +++ b/async-query-core/build.gradle @@ -21,10 +21,12 @@ tasks.register('downloadG4Files', Exec) { executable 'curl' - args '-o', 'src/main/antlr/FlintSparkSqlExtensions.g4', 'https://raw.githubusercontent.com/opensearch-project/opensearch-spark/main/flint-spark-integration/src/main/antlr4/FlintSparkSqlExtensions.g4' - args '-o', 'src/main/antlr/SparkSqlBase.g4', 'https://raw.githubusercontent.com/opensearch-project/opensearch-spark/main/flint-spark-integration/src/main/antlr4/SparkSqlBase.g4' - args '-o', 'src/main/antlr/SqlBaseParser.g4', 'https://raw.githubusercontent.com/apache/spark/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4' - args '-o', 'src/main/antlr/SqlBaseLexer.g4', 'https://raw.githubusercontent.com/apache/spark/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4' + def opensearchSparkBranch = "0.4" + def apacheSparkBranch = "v3.3.2" + args '-o', 'src/main/antlr/FlintSparkSqlExtensions.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/FlintSparkSqlExtensions.g4" + args '-o', 'src/main/antlr/SparkSqlBase.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/SparkSqlBase.g4" + args '-o', 'src/main/antlr/SqlBaseParser.g4', "https://raw.githubusercontent.com/apache/spark/${apacheSparkBranch}/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4" + args '-o', 'src/main/antlr/SqlBaseLexer.g4', "https://raw.githubusercontent.com/apache/spark/${apacheSparkBranch}/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4" } generateGrammarSource { diff --git a/async-query-core/src/main/antlr/SqlBaseLexer.g4 b/async-query-core/src/main/antlr/SqlBaseLexer.g4 index bde298c23e..e2c4c5444e 100644 --- a/async-query-core/src/main/antlr/SqlBaseLexer.g4 +++ b/async-query-core/src/main/antlr/SqlBaseLexer.g4 @@ -18,7 +18,7 @@ lexer grammar SqlBaseLexer; @members { /** - * When true, parser should throw ParseException for unclosed bracketed comment. + * When true, parser should throw ParseExcetion for unclosed bracketed comment. */ public boolean has_unclosed_bracketed_comment = false; @@ -69,35 +69,6 @@ lexer grammar SqlBaseLexer; public void markUnclosedComment() { has_unclosed_bracketed_comment = true; } - - /** - * When greater than zero, it's in the middle of parsing ARRAY/MAP/STRUCT type. - */ - public int complex_type_level_counter = 0; - - /** - * Increase the counter by one when hits KEYWORD 'ARRAY', 'MAP', 'STRUCT'. - */ - public void incComplexTypeLevelCounter() { - complex_type_level_counter++; - } - - /** - * Decrease the counter by one when hits close tag '>' && the counter greater than zero - * which means we are in the middle of complex type parsing. Otherwise, it's a dangling - * GT token and we do nothing. - */ - public void decComplexTypeLevelCounter() { - if (complex_type_level_counter > 0) complex_type_level_counter--; - } - - /** - * If the counter is zero, it's a shift right operator. It can be closing tags of an complex - * type definition, such as MAP>. - */ - public boolean isShiftRightOperator() { - return complex_type_level_counter == 0 ? true : false; - } } SEMICOLON: ';'; @@ -108,11 +79,9 @@ COMMA: ','; DOT: '.'; LEFT_BRACKET: '['; RIGHT_BRACKET: ']'; -BANG: '!'; // NOTE: If you add a new token in the list below, you should update the list of keywords -// and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`, and -// modify `ParserUtils.toExprAlias()` which assumes all keywords are between `ADD` and `ZONE`. +// and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`. //============================ // Start of the keywords list @@ -122,46 +91,34 @@ ADD: 'ADD'; AFTER: 'AFTER'; ALL: 'ALL'; ALTER: 'ALTER'; -ALWAYS: 'ALWAYS'; ANALYZE: 'ANALYZE'; AND: 'AND'; ANTI: 'ANTI'; ANY: 'ANY'; -ANY_VALUE: 'ANY_VALUE'; ARCHIVE: 'ARCHIVE'; -ARRAY: 'ARRAY' {incComplexTypeLevelCounter();}; +ARRAY: 'ARRAY'; AS: 'AS'; ASC: 'ASC'; AT: 'AT'; AUTHORIZATION: 'AUTHORIZATION'; -BEGIN: 'BEGIN'; BETWEEN: 'BETWEEN'; -BIGINT: 'BIGINT'; -BINARY: 'BINARY'; -BINDING: 'BINDING'; -BOOLEAN: 'BOOLEAN'; BOTH: 'BOTH'; BUCKET: 'BUCKET'; BUCKETS: 'BUCKETS'; BY: 'BY'; -BYTE: 'BYTE'; CACHE: 'CACHE'; -CALLED: 'CALLED'; CASCADE: 'CASCADE'; CASE: 'CASE'; CAST: 'CAST'; CATALOG: 'CATALOG'; CATALOGS: 'CATALOGS'; CHANGE: 'CHANGE'; -CHAR: 'CHAR'; -CHARACTER: 'CHARACTER'; CHECK: 'CHECK'; CLEAR: 'CLEAR'; CLUSTER: 'CLUSTER'; CLUSTERED: 'CLUSTERED'; CODEGEN: 'CODEGEN'; COLLATE: 'COLLATE'; -COLLATION: 'COLLATION'; COLLECTION: 'COLLECTION'; COLUMN: 'COLUMN'; COLUMNS: 'COLUMNS'; @@ -169,11 +126,9 @@ COMMENT: 'COMMENT'; COMMIT: 'COMMIT'; COMPACT: 'COMPACT'; COMPACTIONS: 'COMPACTIONS'; -COMPENSATION: 'COMPENSATION'; COMPUTE: 'COMPUTE'; CONCATENATE: 'CONCATENATE'; CONSTRAINT: 'CONSTRAINT'; -CONTAINS: 'CONTAINS'; COST: 'COST'; CREATE: 'CREATE'; CROSS: 'CROSS'; @@ -184,44 +139,31 @@ CURRENT_TIME: 'CURRENT_TIME'; CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; CURRENT_USER: 'CURRENT_USER'; DAY: 'DAY'; -DAYS: 'DAYS'; DAYOFYEAR: 'DAYOFYEAR'; DATA: 'DATA'; -DATE: 'DATE'; DATABASE: 'DATABASE'; DATABASES: 'DATABASES'; DATEADD: 'DATEADD'; -DATE_ADD: 'DATE_ADD'; DATEDIFF: 'DATEDIFF'; -DATE_DIFF: 'DATE_DIFF'; DBPROPERTIES: 'DBPROPERTIES'; -DEC: 'DEC'; -DECIMAL: 'DECIMAL'; -DECLARE: 'DECLARE'; -DEFAULT: 'DEFAULT'; DEFINED: 'DEFINED'; -DEFINER: 'DEFINER'; DELETE: 'DELETE'; DELIMITED: 'DELIMITED'; DESC: 'DESC'; DESCRIBE: 'DESCRIBE'; -DETERMINISTIC: 'DETERMINISTIC'; DFS: 'DFS'; DIRECTORIES: 'DIRECTORIES'; DIRECTORY: 'DIRECTORY'; DISTINCT: 'DISTINCT'; DISTRIBUTE: 'DISTRIBUTE'; DIV: 'DIV'; -DOUBLE: 'DOUBLE'; DROP: 'DROP'; ELSE: 'ELSE'; END: 'END'; ESCAPE: 'ESCAPE'; ESCAPED: 'ESCAPED'; -EVOLUTION: 'EVOLUTION'; EXCEPT: 'EXCEPT'; EXCHANGE: 'EXCHANGE'; -EXCLUDE: 'EXCLUDE'; EXISTS: 'EXISTS'; EXPLAIN: 'EXPLAIN'; EXPORT: 'EXPORT'; @@ -234,7 +176,6 @@ FIELDS: 'FIELDS'; FILTER: 'FILTER'; FILEFORMAT: 'FILEFORMAT'; FIRST: 'FIRST'; -FLOAT: 'FLOAT'; FOLLOWING: 'FOLLOWING'; FOR: 'FOR'; FOREIGN: 'FOREIGN'; @@ -244,40 +185,29 @@ FROM: 'FROM'; FULL: 'FULL'; FUNCTION: 'FUNCTION'; FUNCTIONS: 'FUNCTIONS'; -GENERATED: 'GENERATED'; GLOBAL: 'GLOBAL'; GRANT: 'GRANT'; GROUP: 'GROUP'; GROUPING: 'GROUPING'; HAVING: 'HAVING'; -BINARY_HEX: 'X'; HOUR: 'HOUR'; -HOURS: 'HOURS'; -IDENTIFIER_KW: 'IDENTIFIER'; IF: 'IF'; IGNORE: 'IGNORE'; -IMMEDIATE: 'IMMEDIATE'; IMPORT: 'IMPORT'; IN: 'IN'; -INCLUDE: 'INCLUDE'; INDEX: 'INDEX'; INDEXES: 'INDEXES'; INNER: 'INNER'; INPATH: 'INPATH'; -INPUT: 'INPUT'; INPUTFORMAT: 'INPUTFORMAT'; INSERT: 'INSERT'; INTERSECT: 'INTERSECT'; INTERVAL: 'INTERVAL'; -INT: 'INT'; -INTEGER: 'INTEGER'; INTO: 'INTO'; -INVOKER: 'INVOKER'; IS: 'IS'; ITEMS: 'ITEMS'; JOIN: 'JOIN'; KEYS: 'KEYS'; -LANGUAGE: 'LANGUAGE'; LAST: 'LAST'; LATERAL: 'LATERAL'; LAZY: 'LAZY'; @@ -294,35 +224,23 @@ LOCATION: 'LOCATION'; LOCK: 'LOCK'; LOCKS: 'LOCKS'; LOGICAL: 'LOGICAL'; -LONG: 'LONG'; MACRO: 'MACRO'; -MAP: 'MAP' {incComplexTypeLevelCounter();}; +MAP: 'MAP'; MATCHED: 'MATCHED'; MERGE: 'MERGE'; MICROSECOND: 'MICROSECOND'; -MICROSECONDS: 'MICROSECONDS'; MILLISECOND: 'MILLISECOND'; -MILLISECONDS: 'MILLISECONDS'; MINUTE: 'MINUTE'; -MINUTES: 'MINUTES'; -MODIFIES: 'MODIFIES'; MONTH: 'MONTH'; -MONTHS: 'MONTHS'; MSCK: 'MSCK'; -NAME: 'NAME'; NAMESPACE: 'NAMESPACE'; NAMESPACES: 'NAMESPACES'; -NANOSECOND: 'NANOSECOND'; -NANOSECONDS: 'NANOSECONDS'; NATURAL: 'NATURAL'; NO: 'NO'; -NONE: 'NONE'; -NOT: 'NOT'; +NOT: 'NOT' | '!'; NULL: 'NULL'; NULLS: 'NULLS'; -NUMERIC: 'NUMERIC'; OF: 'OF'; -OFFSET: 'OFFSET'; ON: 'ON'; ONLY: 'ONLY'; OPTION: 'OPTION'; @@ -339,6 +257,8 @@ OVERWRITE: 'OVERWRITE'; PARTITION: 'PARTITION'; PARTITIONED: 'PARTITIONED'; PARTITIONS: 'PARTITIONS'; +PERCENTILE_CONT: 'PERCENTILE_CONT'; +PERCENTILE_DISC: 'PERCENTILE_DISC'; PERCENTLIT: 'PERCENT'; PIVOT: 'PIVOT'; PLACING: 'PLACING'; @@ -351,8 +271,6 @@ PURGE: 'PURGE'; QUARTER: 'QUARTER'; QUERY: 'QUERY'; RANGE: 'RANGE'; -READS: 'READS'; -REAL: 'REAL'; RECORDREADER: 'RECORDREADER'; RECORDWRITER: 'RECORDWRITER'; RECOVER: 'RECOVER'; @@ -366,8 +284,6 @@ REPLACE: 'REPLACE'; RESET: 'RESET'; RESPECT: 'RESPECT'; RESTRICT: 'RESTRICT'; -RETURN: 'RETURN'; -RETURNS: 'RETURNS'; REVOKE: 'REVOKE'; RIGHT: 'RIGHT'; RLIKE: 'RLIKE' | 'REGEXP'; @@ -378,10 +294,8 @@ ROLLUP: 'ROLLUP'; ROW: 'ROW'; ROWS: 'ROWS'; SECOND: 'SECOND'; -SECONDS: 'SECONDS'; SCHEMA: 'SCHEMA'; SCHEMAS: 'SCHEMAS'; -SECURITY: 'SECURITY'; SELECT: 'SELECT'; SEMI: 'SEMI'; SEPARATED: 'SEPARATED'; @@ -391,23 +305,16 @@ SESSION_USER: 'SESSION_USER'; SET: 'SET'; SETMINUS: 'MINUS'; SETS: 'SETS'; -SHORT: 'SHORT'; SHOW: 'SHOW'; -SINGLE: 'SINGLE'; SKEWED: 'SKEWED'; -SMALLINT: 'SMALLINT'; SOME: 'SOME'; SORT: 'SORT'; SORTED: 'SORTED'; -SOURCE: 'SOURCE'; -SPECIFIC: 'SPECIFIC'; -SQL: 'SQL'; START: 'START'; STATISTICS: 'STATISTICS'; STORED: 'STORED'; STRATIFY: 'STRATIFY'; -STRING: 'STRING'; -STRUCT: 'STRUCT' {incComplexTypeLevelCounter();}; +STRUCT: 'STRUCT'; SUBSTR: 'SUBSTR'; SUBSTRING: 'SUBSTRING'; SYNC: 'SYNC'; @@ -416,21 +323,15 @@ SYSTEM_VERSION: 'SYSTEM_VERSION'; TABLE: 'TABLE'; TABLES: 'TABLES'; TABLESAMPLE: 'TABLESAMPLE'; -TARGET: 'TARGET'; TBLPROPERTIES: 'TBLPROPERTIES'; TEMPORARY: 'TEMPORARY' | 'TEMP'; TERMINATED: 'TERMINATED'; THEN: 'THEN'; TIME: 'TIME'; -TIMEDIFF: 'TIMEDIFF'; TIMESTAMP: 'TIMESTAMP'; -TIMESTAMP_LTZ: 'TIMESTAMP_LTZ'; -TIMESTAMP_NTZ: 'TIMESTAMP_NTZ'; TIMESTAMPADD: 'TIMESTAMPADD'; TIMESTAMPDIFF: 'TIMESTAMPDIFF'; -TINYINT: 'TINYINT'; TO: 'TO'; -EXECUTE: 'EXECUTE'; TOUCH: 'TOUCH'; TRAILING: 'TRAILING'; TRANSACTION: 'TRANSACTION'; @@ -448,30 +349,22 @@ UNION: 'UNION'; UNIQUE: 'UNIQUE'; UNKNOWN: 'UNKNOWN'; UNLOCK: 'UNLOCK'; -UNPIVOT: 'UNPIVOT'; UNSET: 'UNSET'; UPDATE: 'UPDATE'; USE: 'USE'; USER: 'USER'; USING: 'USING'; VALUES: 'VALUES'; -VARCHAR: 'VARCHAR'; -VAR: 'VAR'; -VARIABLE: 'VARIABLE'; -VARIANT: 'VARIANT'; VERSION: 'VERSION'; VIEW: 'VIEW'; VIEWS: 'VIEWS'; -VOID: 'VOID'; WEEK: 'WEEK'; -WEEKS: 'WEEKS'; WHEN: 'WHEN'; WHERE: 'WHERE'; WINDOW: 'WINDOW'; WITH: 'WITH'; WITHIN: 'WITHIN'; YEAR: 'YEAR'; -YEARS: 'YEARS'; ZONE: 'ZONE'; //--SPARK-KEYWORD-LIST-END //============================ @@ -484,11 +377,8 @@ NEQ : '<>'; NEQJ: '!='; LT : '<'; LTE : '<=' | '!>'; -GT : '>' {decComplexTypeLevelCounter();}; +GT : '>'; GTE : '>=' | '!<'; -SHIFT_LEFT: '<<'; -SHIFT_RIGHT: '>>' {isShiftRightOperator()}?; -SHIFT_RIGHT_UNSIGNED: '>>>' {isShiftRightOperator()}?; PLUS: '+'; MINUS: '-'; @@ -501,26 +391,17 @@ PIPE: '|'; CONCAT_PIPE: '||'; HAT: '^'; COLON: ':'; -DOUBLE_COLON: '::'; ARROW: '->'; -FAT_ARROW : '=>'; HENT_START: '/*+'; HENT_END: '*/'; -QUESTION: '?'; -STRING_LITERAL +STRING : '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' + | '"' ( ~('"'|'\\') | ('\\' .) )* '"' | 'R\'' (~'\'')* '\'' | 'R"'(~'"')* '"' ; -DOUBLEQUOTED_STRING - :'"' ( ~('"'|'\\') | ('\\' .) )* '"' - ; - -// NOTE: If you move a numeric literal, you should modify `ParserUtils.toExprAlias()` -// which assumes all numeric literals are between `BIGINT_LITERAL` and `BIGDECIMAL_LITERAL`. - BIGINT_LITERAL : DIGIT+ 'L' ; @@ -561,13 +442,8 @@ BIGDECIMAL_LITERAL | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? ; -// Generalize the identifier to give a sensible INVALID_IDENTIFIER error message: -// * Unicode letters rather than a-z and A-Z only -// * URI paths for table references using paths -// We then narrow down to ANSI rules in exitUnquotedIdentifier() in the parser. IDENTIFIER - : (UNICODE_LETTER | DIGIT | '_')+ - | UNICODE_LETTER+ '://' (UNICODE_LETTER | DIGIT | '_' | '/' | '-' | '.' | '?' | '=' | '&' | '#' | '%')+ + : (LETTER | DIGIT | '_')+ ; BACKQUOTED_IDENTIFIER @@ -591,10 +467,6 @@ fragment LETTER : [A-Z] ; -fragment UNICODE_LETTER - : [\p{L}] - ; - SIMPLE_COMMENT : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN) ; @@ -604,7 +476,7 @@ BRACKETED_COMMENT ; WS - : [ \t\n\f\r\u000B\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u202F\u205F\u3000]+ -> channel(HIDDEN) + : [ \r\n\t]+ -> channel(HIDDEN) ; // Catch-all for anything we can't recognize. diff --git a/async-query-core/src/main/antlr/SqlBaseParser.g4 b/async-query-core/src/main/antlr/SqlBaseParser.g4 index c7aa56cf92..701d4bc5aa 100644 --- a/async-query-core/src/main/antlr/SqlBaseParser.g4 +++ b/async-query-core/src/main/antlr/SqlBaseParser.g4 @@ -35,52 +35,10 @@ options { tokenVocab = SqlBaseLexer; } * When true, the behavior of keywords follows ANSI SQL standard. */ public boolean SQL_standard_keyword_behavior = false; - - /** - * When true, double quoted literals are identifiers rather than STRINGs. - */ - public boolean double_quoted_identifiers = false; } -compoundOrSingleStatement - : singleStatement - | singleCompoundStatement - ; - -singleCompoundStatement - : beginEndCompoundBlock SEMICOLON? EOF - ; - -beginEndCompoundBlock - : beginLabel? BEGIN compoundBody END endLabel? - ; - -compoundBody - : (compoundStatements+=compoundStatement SEMICOLON)* - ; - -compoundStatement - : statement - | setStatementWithOptionalVarKeyword - | beginEndCompoundBlock - ; - -setStatementWithOptionalVarKeyword - : SET variable? assignmentList #setVariableWithOptionalKeyword - | SET variable? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ - LEFT_PAREN query RIGHT_PAREN #setVariableWithOptionalKeyword - ; - singleStatement - : (statement|setResetStatement) SEMICOLON* EOF - ; - -beginLabel - : multipartIdentifier COLON - ; - -endLabel - : multipartIdentifier + : statement SEMICOLON* EOF ; singleExpression @@ -109,205 +67,156 @@ singleTableSchema statement : query #statementDefault - | executeImmediate #visitExecuteImmediate | ctes? dmlStatementNoWith #dmlStatement - | USE identifierReference #use - | USE namespace identifierReference #useNamespace - | SET CATALOG (errorCapturingIdentifier | stringLit) #setCatalog - | CREATE namespace (IF errorCapturingNot EXISTS)? identifierReference + | USE multipartIdentifier #use + | USE namespace multipartIdentifier #useNamespace + | SET CATALOG (identifier | STRING) #setCatalog + | CREATE namespace (IF NOT EXISTS)? multipartIdentifier (commentSpec | locationSpec | (WITH (DBPROPERTIES | PROPERTIES) propertyList))* #createNamespace - | ALTER namespace identifierReference + | ALTER namespace multipartIdentifier SET (DBPROPERTIES | PROPERTIES) propertyList #setNamespaceProperties - | ALTER namespace identifierReference - UNSET (DBPROPERTIES | PROPERTIES) propertyList #unsetNamespaceProperties - | ALTER namespace identifierReference + | ALTER namespace multipartIdentifier SET locationSpec #setNamespaceLocation - | DROP namespace (IF EXISTS)? identifierReference + | DROP namespace (IF EXISTS)? multipartIdentifier (RESTRICT | CASCADE)? #dropNamespace | SHOW namespaces ((FROM | IN) multipartIdentifier)? - (LIKE? pattern=stringLit)? #showNamespaces - | createTableHeader (LEFT_PAREN colDefinitionList RIGHT_PAREN)? tableProvider? + (LIKE? pattern=STRING)? #showNamespaces + | createTableHeader (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider? createTableClauses (AS? query)? #createTable - | CREATE TABLE (IF errorCapturingNot EXISTS)? target=tableIdentifier + | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier LIKE source=tableIdentifier (tableProvider | rowFormat | createFileFormat | locationSpec | (TBLPROPERTIES tableProps=propertyList))* #createTableLike - | replaceTableHeader (LEFT_PAREN colDefinitionList RIGHT_PAREN)? tableProvider? + | replaceTableHeader (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider? createTableClauses (AS? query)? #replaceTable - | ANALYZE TABLE identifierReference partitionSpec? COMPUTE STATISTICS + | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS (identifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)? #analyze - | ANALYZE TABLES ((FROM | IN) identifierReference)? COMPUTE STATISTICS + | ANALYZE TABLES ((FROM | IN) multipartIdentifier)? COMPUTE STATISTICS (identifier)? #analyzeTables - | ALTER TABLE identifierReference + | ALTER TABLE multipartIdentifier ADD (COLUMN | COLUMNS) columns=qualifiedColTypeWithPositionList #addTableColumns - | ALTER TABLE identifierReference + | ALTER TABLE multipartIdentifier ADD (COLUMN | COLUMNS) LEFT_PAREN columns=qualifiedColTypeWithPositionList RIGHT_PAREN #addTableColumns - | ALTER TABLE table=identifierReference + | ALTER TABLE table=multipartIdentifier RENAME COLUMN from=multipartIdentifier TO to=errorCapturingIdentifier #renameTableColumn - | ALTER TABLE identifierReference + | ALTER TABLE multipartIdentifier DROP (COLUMN | COLUMNS) (IF EXISTS)? LEFT_PAREN columns=multipartIdentifierList RIGHT_PAREN #dropTableColumns - | ALTER TABLE identifierReference + | ALTER TABLE multipartIdentifier DROP (COLUMN | COLUMNS) (IF EXISTS)? columns=multipartIdentifierList #dropTableColumns - | ALTER (TABLE | VIEW) from=identifierReference + | ALTER (TABLE | VIEW) from=multipartIdentifier RENAME TO to=multipartIdentifier #renameTable - | ALTER (TABLE | VIEW) identifierReference + | ALTER (TABLE | VIEW) multipartIdentifier SET TBLPROPERTIES propertyList #setTableProperties - | ALTER (TABLE | VIEW) identifierReference + | ALTER (TABLE | VIEW) multipartIdentifier UNSET TBLPROPERTIES (IF EXISTS)? propertyList #unsetTableProperties - | ALTER TABLE table=identifierReference + | ALTER TABLE table=multipartIdentifier (ALTER | CHANGE) COLUMN? column=multipartIdentifier alterColumnAction? #alterTableAlterColumn - | ALTER TABLE table=identifierReference partitionSpec? + | ALTER TABLE table=multipartIdentifier partitionSpec? CHANGE COLUMN? colName=multipartIdentifier colType colPosition? #hiveChangeColumn - | ALTER TABLE table=identifierReference partitionSpec? + | ALTER TABLE table=multipartIdentifier partitionSpec? REPLACE COLUMNS LEFT_PAREN columns=qualifiedColTypeWithPositionList RIGHT_PAREN #hiveReplaceColumns - | ALTER TABLE identifierReference (partitionSpec)? - SET SERDE stringLit (WITH SERDEPROPERTIES propertyList)? #setTableSerDe - | ALTER TABLE identifierReference (partitionSpec)? + | ALTER TABLE multipartIdentifier (partitionSpec)? + SET SERDE STRING (WITH SERDEPROPERTIES propertyList)? #setTableSerDe + | ALTER TABLE multipartIdentifier (partitionSpec)? SET SERDEPROPERTIES propertyList #setTableSerDe - | ALTER (TABLE | VIEW) identifierReference ADD (IF errorCapturingNot EXISTS)? + | ALTER (TABLE | VIEW) multipartIdentifier ADD (IF NOT EXISTS)? partitionSpecLocation+ #addTablePartition - | ALTER TABLE identifierReference + | ALTER TABLE multipartIdentifier from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition - | ALTER (TABLE | VIEW) identifierReference + | ALTER (TABLE | VIEW) multipartIdentifier DROP (IF EXISTS)? partitionSpec (COMMA partitionSpec)* PURGE? #dropTablePartitions - | ALTER TABLE identifierReference + | ALTER TABLE multipartIdentifier (partitionSpec)? SET locationSpec #setTableLocation - | ALTER TABLE identifierReference RECOVER PARTITIONS #recoverPartitions - | ALTER TABLE identifierReference - (clusterBySpec | CLUSTER BY NONE) #alterClusterBy - | DROP TABLE (IF EXISTS)? identifierReference PURGE? #dropTable - | DROP VIEW (IF EXISTS)? identifierReference #dropView + | ALTER TABLE multipartIdentifier RECOVER PARTITIONS #recoverPartitions + | DROP TABLE (IF EXISTS)? multipartIdentifier PURGE? #dropTable + | DROP VIEW (IF EXISTS)? multipartIdentifier #dropView | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)? - VIEW (IF errorCapturingNot EXISTS)? identifierReference + VIEW (IF NOT EXISTS)? multipartIdentifier identifierCommentList? (commentSpec | - schemaBinding | (PARTITIONED ON identifierList) | (TBLPROPERTIES propertyList))* AS query #createView | CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW tableIdentifier (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider (OPTIONS propertyList)? #createTempViewUsing - | ALTER VIEW identifierReference AS? query #alterViewQuery - | ALTER VIEW identifierReference schemaBinding #alterViewSchemaBinding - | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF errorCapturingNot EXISTS)? - identifierReference AS className=stringLit + | ALTER VIEW multipartIdentifier AS? query #alterViewQuery + | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)? + multipartIdentifier AS className=STRING (USING resource (COMMA resource)*)? #createFunction - | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF errorCapturingNot EXISTS)? - identifierReference LEFT_PAREN parameters=colDefinitionList? RIGHT_PAREN - (RETURNS (dataType | TABLE LEFT_PAREN returnParams=colTypeList RIGHT_PAREN))? - routineCharacteristics - RETURN (query | expression) #createUserDefinedFunction - | DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference #dropFunction - | DECLARE (OR REPLACE)? variable? - identifierReference dataType? variableDefaultExpression? #createVariable - | DROP TEMPORARY variable (IF EXISTS)? identifierReference #dropVariable + | DROP TEMPORARY? FUNCTION (IF EXISTS)? multipartIdentifier #dropFunction | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)? - (statement|setResetStatement) #explain - | SHOW TABLES ((FROM | IN) identifierReference)? - (LIKE? pattern=stringLit)? #showTables - | SHOW TABLE EXTENDED ((FROM | IN) ns=identifierReference)? - LIKE pattern=stringLit partitionSpec? #showTableExtended - | SHOW TBLPROPERTIES table=identifierReference + statement #explain + | SHOW TABLES ((FROM | IN) multipartIdentifier)? + (LIKE? pattern=STRING)? #showTables + | SHOW TABLE EXTENDED ((FROM | IN) ns=multipartIdentifier)? + LIKE pattern=STRING partitionSpec? #showTableExtended + | SHOW TBLPROPERTIES table=multipartIdentifier (LEFT_PAREN key=propertyKey RIGHT_PAREN)? #showTblProperties - | SHOW COLUMNS (FROM | IN) table=identifierReference + | SHOW COLUMNS (FROM | IN) table=multipartIdentifier ((FROM | IN) ns=multipartIdentifier)? #showColumns - | SHOW VIEWS ((FROM | IN) identifierReference)? - (LIKE? pattern=stringLit)? #showViews - | SHOW PARTITIONS identifierReference partitionSpec? #showPartitions - | SHOW identifier? FUNCTIONS ((FROM | IN) ns=identifierReference)? - (LIKE? (legacy=multipartIdentifier | pattern=stringLit))? #showFunctions - | SHOW CREATE TABLE identifierReference (AS SERDE)? #showCreateTable + | SHOW VIEWS ((FROM | IN) multipartIdentifier)? + (LIKE? pattern=STRING)? #showViews + | SHOW PARTITIONS multipartIdentifier partitionSpec? #showPartitions + | SHOW identifier? FUNCTIONS ((FROM | IN) ns=multipartIdentifier)? + (LIKE? (legacy=multipartIdentifier | pattern=STRING))? #showFunctions + | SHOW CREATE TABLE multipartIdentifier (AS SERDE)? #showCreateTable | SHOW CURRENT namespace #showCurrentNamespace - | SHOW CATALOGS (LIKE? pattern=stringLit)? #showCatalogs + | SHOW CATALOGS (LIKE? pattern=STRING)? #showCatalogs | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction | (DESC | DESCRIBE) namespace EXTENDED? - identifierReference #describeNamespace + multipartIdentifier #describeNamespace | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)? - identifierReference partitionSpec? describeColName? #describeRelation + multipartIdentifier partitionSpec? describeColName? #describeRelation | (DESC | DESCRIBE) QUERY? query #describeQuery - | COMMENT ON namespace identifierReference IS - comment #commentNamespace - | COMMENT ON TABLE identifierReference IS comment #commentTable - | REFRESH TABLE identifierReference #refreshTable - | REFRESH FUNCTION identifierReference #refreshFunction - | REFRESH (stringLit | .*?) #refreshResource - | CACHE LAZY? TABLE identifierReference + | COMMENT ON namespace multipartIdentifier IS + comment=(STRING | NULL) #commentNamespace + | COMMENT ON TABLE multipartIdentifier IS comment=(STRING | NULL) #commentTable + | REFRESH TABLE multipartIdentifier #refreshTable + | REFRESH FUNCTION multipartIdentifier #refreshFunction + | REFRESH (STRING | .*?) #refreshResource + | CACHE LAZY? TABLE multipartIdentifier (OPTIONS options=propertyList)? (AS? query)? #cacheTable - | UNCACHE TABLE (IF EXISTS)? identifierReference #uncacheTable + | UNCACHE TABLE (IF EXISTS)? multipartIdentifier #uncacheTable | CLEAR CACHE #clearCache - | LOAD DATA LOCAL? INPATH path=stringLit OVERWRITE? INTO TABLE - identifierReference partitionSpec? #loadData - | TRUNCATE TABLE identifierReference partitionSpec? #truncateTable - | (MSCK)? REPAIR TABLE identifierReference + | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE + multipartIdentifier partitionSpec? #loadData + | TRUNCATE TABLE multipartIdentifier partitionSpec? #truncateTable + | MSCK REPAIR TABLE multipartIdentifier (option=(ADD|DROP|SYNC) PARTITIONS)? #repairTable | op=(ADD | LIST) identifier .*? #manageResource - | CREATE INDEX (IF errorCapturingNot EXISTS)? identifier ON TABLE? - identifierReference (USING indexType=identifier)? - LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN - (OPTIONS options=propertyList)? #createIndex - | DROP INDEX (IF EXISTS)? identifier ON TABLE? identifierReference #dropIndex - | unsupportedHiveNativeCommands .*? #failNativeCommand - ; - -setResetStatement - : SET COLLATION collationName=identifier #setCollation - | SET ROLE .*? #failSetRole + | SET ROLE .*? #failNativeCommand | SET TIME ZONE interval #setTimeZone - | SET TIME ZONE timezone #setTimeZone + | SET TIME ZONE timezone=(STRING | LOCAL) #setTimeZone | SET TIME ZONE .*? #setTimeZone - | SET variable assignmentList #setVariable - | SET variable LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ - LEFT_PAREN query RIGHT_PAREN #setVariable | SET configKey EQ configValue #setQuotedConfiguration | SET configKey (EQ .*?)? #setConfiguration | SET .*? EQ configValue #setQuotedConfiguration | SET .*? #setConfiguration | RESET configKey #resetQuotedConfiguration | RESET .*? #resetConfiguration - ; - -executeImmediate - : EXECUTE IMMEDIATE queryParam=executeImmediateQueryParam (INTO targetVariable=multipartIdentifierList)? executeImmediateUsing? - ; - -executeImmediateUsing - : USING LEFT_PAREN params=namedExpressionSeq RIGHT_PAREN - | USING params=namedExpressionSeq - ; - -executeImmediateQueryParam - : stringLit - | multipartIdentifier - ; - -executeImmediateArgument - : (constant|multipartIdentifier) (AS name=errorCapturingIdentifier)? - ; - -executeImmediateArgumentSeq - : executeImmediateArgument (COMMA executeImmediateArgument)* - ; - -timezone - : stringLit - | LOCAL + | CREATE INDEX (IF NOT EXISTS)? identifier ON TABLE? + multipartIdentifier (USING indexType=identifier)? + LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN + (OPTIONS options=propertyList)? #createIndex + | DROP INDEX (IF EXISTS)? identifier ON TABLE? multipartIdentifier #dropIndex + | unsupportedHiveNativeCommands .*? #failNativeCommand ; configKey @@ -315,7 +224,7 @@ configKey ; configValue - : backQuotedIdentifier + : quotedIdentifier ; unsupportedHiveNativeCommands @@ -366,15 +275,11 @@ unsupportedHiveNativeCommands ; createTableHeader - : CREATE TEMPORARY? EXTERNAL? TABLE (IF errorCapturingNot EXISTS)? identifierReference + : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? multipartIdentifier ; replaceTableHeader - : (CREATE OR)? REPLACE TABLE identifierReference - ; - -clusterBySpec - : CLUSTER BY LEFT_PAREN multipartIdentifierList RIGHT_PAREN + : (CREATE OR)? REPLACE TABLE multipartIdentifier ; bucketSpec @@ -390,15 +295,11 @@ skewSpec ; locationSpec - : LOCATION stringLit - ; - -schemaBinding - : WITH SCHEMA (BINDING | COMPENSATION | EVOLUTION | TYPE EVOLUTION) + : LOCATION STRING ; commentSpec - : COMMENT stringLit + : COMMENT STRING ; query @@ -406,11 +307,10 @@ query ; insertInto - : INSERT OVERWRITE TABLE? identifierReference (partitionSpec (IF errorCapturingNot EXISTS)?)? ((BY NAME) | identifierList)? #insertOverwriteTable - | INSERT INTO TABLE? identifierReference partitionSpec? (IF errorCapturingNot EXISTS)? ((BY NAME) | identifierList)? #insertIntoTable - | INSERT INTO TABLE? identifierReference REPLACE whereClause #insertIntoReplaceWhere - | INSERT OVERWRITE LOCAL? DIRECTORY path=stringLit rowFormat? createFileFormat? #insertOverwriteHiveDir - | INSERT OVERWRITE LOCAL? DIRECTORY (path=stringLit)? tableProvider (OPTIONS options=propertyList)? #insertOverwriteDir + : INSERT OVERWRITE TABLE? multipartIdentifier (partitionSpec (IF NOT EXISTS)?)? identifierList? #insertOverwriteTable + | INSERT INTO TABLE? multipartIdentifier partitionSpec? (IF NOT EXISTS)? identifierList? #insertIntoTable + | INSERT OVERWRITE LOCAL? DIRECTORY path=STRING rowFormat? createFileFormat? #insertOverwriteHiveDir + | INSERT OVERWRITE LOCAL? DIRECTORY (path=STRING)? tableProvider (OPTIONS options=propertyList)? #insertOverwriteDir ; partitionSpecLocation @@ -423,7 +323,6 @@ partitionSpec partitionVal : identifier (EQ constant)? - | identifier EQ DEFAULT ; namespace @@ -438,23 +337,16 @@ namespaces | SCHEMAS ; -variable - : VARIABLE - | VAR - ; - describeFuncName - : identifierReference - | stringLit + : qualifiedName + | STRING | comparisonOperator | arithmeticOperator | predicateOperator - | shiftOperator - | BANG ; describeColName - : nameParts+=errorCapturingIdentifier (DOT nameParts+=errorCapturingIdentifier)* + : nameParts+=identifier (DOT nameParts+=identifier)* ; ctes @@ -470,10 +362,9 @@ tableProvider ; createTableClauses - :((OPTIONS options=expressionPropertyList) | + :((OPTIONS options=propertyList) | (PARTITIONED BY partitioning=partitionFieldList) | skewSpec | - clusterBySpec | bucketSpec | rowFormat | createFileFormat | @@ -491,23 +382,15 @@ property ; propertyKey - : errorCapturingIdentifier (DOT errorCapturingIdentifier)* - | stringLit + : identifier (DOT identifier)* + | STRING ; propertyValue : INTEGER_VALUE | DECIMAL_VALUE | booleanValue - | stringLit - ; - -expressionPropertyList - : LEFT_PAREN expressionProperty (COMMA expressionProperty)* RIGHT_PAREN - ; - -expressionProperty - : key=propertyKey (EQ? value=expression)? + | STRING ; constantList @@ -524,35 +407,29 @@ createFileFormat ; fileFormat - : INPUTFORMAT inFmt=stringLit OUTPUTFORMAT outFmt=stringLit #tableFileFormat + : INPUTFORMAT inFmt=STRING OUTPUTFORMAT outFmt=STRING #tableFileFormat | identifier #genericFileFormat ; storageHandler - : stringLit (WITH SERDEPROPERTIES propertyList)? + : STRING (WITH SERDEPROPERTIES propertyList)? ; resource - : identifier stringLit + : identifier STRING ; dmlStatementNoWith : insertInto query #singleInsertQuery | fromClause multiInsertQueryBody+ #multiInsertQuery - | DELETE FROM identifierReference tableAlias whereClause? #deleteFromTable - | UPDATE identifierReference tableAlias setClause whereClause? #updateTable - | MERGE (WITH SCHEMA EVOLUTION)? INTO target=identifierReference targetAlias=tableAlias - USING (source=identifierReference | + | DELETE FROM multipartIdentifier tableAlias whereClause? #deleteFromTable + | UPDATE multipartIdentifier tableAlias setClause whereClause? #updateTable + | MERGE INTO target=multipartIdentifier targetAlias=tableAlias + USING (source=multipartIdentifier | LEFT_PAREN sourceQuery=query RIGHT_PAREN) sourceAlias=tableAlias ON mergeCondition=booleanExpression matchedClause* - notMatchedClause* - notMatchedBySourceClause* #mergeIntoTable - ; - -identifierReference - : IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN - | multipartIdentifier + notMatchedClause* #mergeIntoTable ; queryOrganization @@ -562,7 +439,6 @@ queryOrganization (SORT BY sort+=sortItem (COMMA sort+=sortItem)*)? windowClause? (LIMIT (ALL | limit=expression))? - (OFFSET offset=expression)? ; multiInsertQueryBody @@ -582,7 +458,7 @@ queryTerm queryPrimary : querySpecification #queryPrimaryDefault | fromStatement #fromStmt - | TABLE identifierReference #table + | TABLE multipartIdentifier #table | inlineTable #inlineTableDefault1 | LEFT_PAREN query RIGHT_PAREN #subquery ; @@ -630,11 +506,11 @@ transformClause | kind=MAP setQuantifier? expressionSeq | kind=REDUCE setQuantifier? expressionSeq) inRowFormat=rowFormat? - (RECORDWRITER recordWriter=stringLit)? - USING script=stringLit + (RECORDWRITER recordWriter=STRING)? + USING script=STRING (AS (identifierSeq | colTypeList | (LEFT_PAREN (identifierSeq | colTypeList) RIGHT_PAREN)))? outRowFormat=rowFormat? - (RECORDREADER recordReader=stringLit)? + (RECORDREADER recordReader=STRING)? ; selectClause @@ -649,11 +525,7 @@ matchedClause : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction ; notMatchedClause - : WHEN errorCapturingNot MATCHED (BY TARGET)? (AND notMatchedCond=booleanExpression)? THEN notMatchedAction - ; - -notMatchedBySourceClause - : WHEN errorCapturingNot MATCHED BY SOURCE (AND notMatchedBySourceCond=booleanExpression)? THEN notMatchedBySourceAction + : WHEN NOT MATCHED (AND notMatchedCond=booleanExpression)? THEN notMatchedAction ; matchedAction @@ -668,15 +540,6 @@ notMatchedAction VALUES LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN ; -notMatchedBySourceAction - : DELETE - | UPDATE SET assignmentList - ; - -exceptClause - : EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN - ; - assignmentList : assignment (COMMA assignment)* ; @@ -703,11 +566,11 @@ hintStatement ; fromClause - : FROM relation (COMMA relation)* lateralView* pivotClause? unpivotClause? + : FROM relation (COMMA relation)* lateralView* pivotClause? ; temporalClause - : FOR? (SYSTEM_VERSION | VERSION) AS OF version + : FOR? (SYSTEM_VERSION | VERSION) AS OF version=(INTEGER_VALUE | STRING) | FOR? (SYSTEM_TIME | TIMESTAMP) AS OF timestamp=valueExpression ; @@ -745,60 +608,12 @@ pivotClause ; pivotColumn - : identifiers+=errorCapturingIdentifier - | LEFT_PAREN identifiers+=errorCapturingIdentifier (COMMA identifiers+=errorCapturingIdentifier)* RIGHT_PAREN + : identifiers+=identifier + | LEFT_PAREN identifiers+=identifier (COMMA identifiers+=identifier)* RIGHT_PAREN ; pivotValue - : expression (AS? errorCapturingIdentifier)? - ; - -unpivotClause - : UNPIVOT nullOperator=unpivotNullClause? LEFT_PAREN - operator=unpivotOperator - RIGHT_PAREN (AS? errorCapturingIdentifier)? - ; - -unpivotNullClause - : (INCLUDE | EXCLUDE) NULLS - ; - -unpivotOperator - : (unpivotSingleValueColumnClause | unpivotMultiValueColumnClause) - ; - -unpivotSingleValueColumnClause - : unpivotValueColumn FOR unpivotNameColumn IN LEFT_PAREN unpivotColumns+=unpivotColumnAndAlias (COMMA unpivotColumns+=unpivotColumnAndAlias)* RIGHT_PAREN - ; - -unpivotMultiValueColumnClause - : LEFT_PAREN unpivotValueColumns+=unpivotValueColumn (COMMA unpivotValueColumns+=unpivotValueColumn)* RIGHT_PAREN - FOR unpivotNameColumn - IN LEFT_PAREN unpivotColumnSets+=unpivotColumnSet (COMMA unpivotColumnSets+=unpivotColumnSet)* RIGHT_PAREN - ; - -unpivotColumnSet - : LEFT_PAREN unpivotColumns+=unpivotColumn (COMMA unpivotColumns+=unpivotColumn)* RIGHT_PAREN unpivotAlias? - ; - -unpivotValueColumn - : identifier - ; - -unpivotNameColumn - : identifier - ; - -unpivotColumnAndAlias - : unpivotColumn unpivotAlias? - ; - -unpivotColumn - : multipartIdentifier - ; - -unpivotAlias - : AS? errorCapturingIdentifier + : expression (AS? identifier)? ; lateralView @@ -811,13 +626,7 @@ setQuantifier ; relation - : LATERAL? relationPrimary relationExtension* - ; - -relationExtension - : joinRelation - | pivotClause - | unpivotClause + : LATERAL? relationPrimary joinRelation* ; joinRelation @@ -877,58 +686,20 @@ identifierComment ; relationPrimary - : identifierReference temporalClause? - optionsClause? sample? tableAlias #tableName + : multipartIdentifier temporalClause? + sample? tableAlias #tableName | LEFT_PAREN query RIGHT_PAREN sample? tableAlias #aliasedQuery | LEFT_PAREN relation RIGHT_PAREN sample? tableAlias #aliasedRelation | inlineTable #inlineTableDefault2 | functionTable #tableValuedFunction ; -optionsClause - : WITH options=propertyList - ; - inlineTable : VALUES expression (COMMA expression)* tableAlias ; -functionTableSubqueryArgument - : TABLE identifierReference tableArgumentPartitioning? - | TABLE LEFT_PAREN identifierReference RIGHT_PAREN tableArgumentPartitioning? - | TABLE LEFT_PAREN query RIGHT_PAREN tableArgumentPartitioning? - ; - -tableArgumentPartitioning - : ((WITH SINGLE PARTITION) - | ((PARTITION | DISTRIBUTE) BY - (((LEFT_PAREN partition+=expression (COMMA partition+=expression)* RIGHT_PAREN)) - | (expression (COMMA invalidMultiPartitionExpression=expression)+) - | partition+=expression))) - ((ORDER | SORT) BY - (((LEFT_PAREN sortItem (COMMA sortItem)* RIGHT_PAREN) - | (sortItem (COMMA invalidMultiSortItem=sortItem)+) - | sortItem)))? - ; - -functionTableNamedArgumentExpression - : key=identifier FAT_ARROW table=functionTableSubqueryArgument - ; - -functionTableReferenceArgument - : functionTableSubqueryArgument - | functionTableNamedArgumentExpression - ; - -functionTableArgument - : functionTableReferenceArgument - | functionArgument - ; - functionTable - : funcName=functionName LEFT_PAREN - (functionTableArgument (COMMA functionTableArgument)*)? - RIGHT_PAREN tableAlias + : funcName=functionName LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN tableAlias ; tableAlias @@ -936,13 +707,13 @@ tableAlias ; rowFormat - : ROW FORMAT SERDE name=stringLit (WITH SERDEPROPERTIES props=propertyList)? #rowFormatSerde + : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES props=propertyList)? #rowFormatSerde | ROW FORMAT DELIMITED - (FIELDS TERMINATED BY fieldsTerminatedBy=stringLit (ESCAPED BY escapedBy=stringLit)?)? - (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=stringLit)? - (MAP KEYS TERMINATED BY keysTerminatedBy=stringLit)? - (LINES TERMINATED BY linesSeparatedBy=stringLit)? - (NULL DEFINED AS nullDefinedAs=stringLit)? #rowFormatDelimited + (FIELDS TERMINATED BY fieldsTerminatedBy=STRING (ESCAPED BY escapedBy=STRING)?)? + (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)? + (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)? + (LINES TERMINATED BY linesSeparatedBy=STRING)? + (NULL DEFINED AS nullDefinedAs=STRING)? #rowFormatDelimited ; multipartIdentifierList @@ -1001,21 +772,12 @@ expression : booleanExpression ; -namedArgumentExpression - : key=identifier FAT_ARROW value=expression - ; - -functionArgument - : expression - | namedArgumentExpression - ; - expressionSeq : expression (COMMA expression)* ; booleanExpression - : (NOT | BANG) booleanExpression #logicalNot + : NOT booleanExpression #logicalNot | EXISTS LEFT_PAREN query RIGHT_PAREN #exists | valueExpression predicate? #predicated | left=booleanExpression operator=AND right=booleanExpression #logicalBinary @@ -1023,20 +785,15 @@ booleanExpression ; predicate - : errorCapturingNot? kind=BETWEEN lower=valueExpression AND upper=valueExpression - | errorCapturingNot? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN - | errorCapturingNot? kind=IN LEFT_PAREN query RIGHT_PAREN - | errorCapturingNot? kind=RLIKE pattern=valueExpression - | errorCapturingNot? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN) - | errorCapturingNot? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=stringLit)? - | IS errorCapturingNot? kind=NULL - | IS errorCapturingNot? kind=(TRUE | FALSE | UNKNOWN) - | IS errorCapturingNot? kind=DISTINCT FROM right=valueExpression - ; - -errorCapturingNot - : NOT - | BANG + : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression + | NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN + | NOT? kind=IN LEFT_PAREN query RIGHT_PAREN + | NOT? kind=RLIKE pattern=valueExpression + | NOT? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN) + | NOT? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=STRING)? + | IS NOT? kind=NULL + | IS NOT? kind=(TRUE | FALSE | UNKNOWN) + | IS NOT? kind=DISTINCT FROM right=valueExpression ; valueExpression @@ -1044,19 +801,12 @@ valueExpression | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary - | left=valueExpression shiftOperator right=valueExpression #shiftExpression | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary | left=valueExpression comparisonOperator right=valueExpression #comparison ; -shiftOperator - : SHIFT_LEFT - | SHIFT_RIGHT - | SHIFT_RIGHT_UNSIGNED - ; - datetimeUnit : YEAR | QUARTER | MONTH | WEEK | DAY | DAYOFYEAR @@ -1064,27 +814,22 @@ datetimeUnit ; primaryExpression - : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER | USER | SESSION_USER) #currentLike - | name=(TIMESTAMPADD | DATEADD | DATE_ADD) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN #timestampadd - | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF | TIMEDIFF) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN #timestampdiff + : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER) #currentLike + | name=(TIMESTAMPADD | DATEADD) LEFT_PAREN unit=datetimeUnit COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN #timestampadd + | name=(TIMESTAMPDIFF | DATEDIFF) LEFT_PAREN unit=datetimeUnit COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN #timestampdiff | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN #cast - | primaryExpression collateClause #collate - | primaryExpression DOUBLE_COLON dataType #castByColon | STRUCT LEFT_PAREN (argument+=namedExpression (COMMA argument+=namedExpression)*)? RIGHT_PAREN #struct | FIRST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #first - | ANY_VALUE LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #any_value | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #last | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN #position | constant #constantDefault - | ASTERISK exceptClause? #star - | qualifiedName DOT ASTERISK exceptClause? #star + | ASTERISK #star + | qualifiedName DOT ASTERISK #star | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN #rowConstructor | LEFT_PAREN query RIGHT_PAREN #subqueryExpression - | functionName LEFT_PAREN (setQuantifier? argument+=functionArgument - (COMMA argument+=functionArgument)*)? RIGHT_PAREN - (WITHIN GROUP LEFT_PAREN ORDER BY sortItem (COMMA sortItem)* RIGHT_PAREN)? + | functionName LEFT_PAREN (setQuantifier? argument+=expression (COMMA argument+=expression)*)? RIGHT_PAREN (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? (nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)? #functionCall | identifier ARROW expression #lambda @@ -1100,25 +845,17 @@ primaryExpression FROM srcStr=valueExpression RIGHT_PAREN #trim | OVERLAY LEFT_PAREN input=valueExpression PLACING replace=valueExpression FROM position=valueExpression (FOR length=valueExpression)? RIGHT_PAREN #overlay - ; - -literalType - : DATE - | TIMESTAMP | TIMESTAMP_LTZ | TIMESTAMP_NTZ - | INTERVAL - | BINARY_HEX - | unsupportedType=identifier + | name=(PERCENTILE_CONT | PERCENTILE_DISC) LEFT_PAREN percentage=valueExpression RIGHT_PAREN + WITHIN GROUP LEFT_PAREN ORDER BY sortItem RIGHT_PAREN ( OVER windowSpec)? #percentile ; constant : NULL #nullLiteral - | QUESTION #posParameterLiteral - | COLON identifier #namedParameterLiteral | interval #intervalLiteral - | literalType stringLit #typeConstructor + | identifier STRING #typeConstructor | number #numericLiteral | booleanValue #booleanLiteral - | stringLit+ #stringLiteral + | STRING+ #stringLiteral ; comparisonOperator @@ -1138,7 +875,7 @@ booleanValue ; interval - : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) + : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? ; errorCapturingMultiUnitsInterval @@ -1146,7 +883,7 @@ errorCapturingMultiUnitsInterval ; multiUnitsInterval - : (intervalValue unit+=unitInMultiUnits)+ + : (intervalValue unit+=identifier)+ ; errorCapturingUnitToUnitInterval @@ -1154,54 +891,17 @@ errorCapturingUnitToUnitInterval ; unitToUnitInterval - : value=intervalValue from=unitInUnitToUnit TO to=unitInUnitToUnit + : value=intervalValue from=identifier TO to=identifier ; intervalValue - : (PLUS | MINUS)? - (INTEGER_VALUE | DECIMAL_VALUE | stringLit) - ; - -unitInMultiUnits - : NANOSECOND | NANOSECONDS | MICROSECOND | MICROSECONDS | MILLISECOND | MILLISECONDS - | SECOND | SECONDS | MINUTE | MINUTES | HOUR | HOURS | DAY | DAYS | WEEK | WEEKS - | MONTH | MONTHS | YEAR | YEARS - ; - -unitInUnitToUnit - : SECOND | MINUTE | HOUR | DAY | MONTH | YEAR + : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE | STRING) ; colPosition : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier ; -collateClause - : COLLATE collationName=identifier - ; - -type - : BOOLEAN - | TINYINT | BYTE - | SMALLINT | SHORT - | INT | INTEGER - | BIGINT | LONG - | FLOAT | REAL - | DOUBLE - | DATE - | TIMESTAMP | TIMESTAMP_NTZ | TIMESTAMP_LTZ - | STRING collateClause? - | CHARACTER | CHAR - | VARCHAR - | BINARY - | DECIMAL | DEC | NUMERIC - | VOID - | INTERVAL - | VARIANT - | ARRAY | STRUCT | MAP - | unsupportedType=identifier - ; - dataType : complex=ARRAY LT dataType GT #complexDataType | complex=MAP LT dataType COMMA dataType GT #complexDataType @@ -1209,7 +909,7 @@ dataType | INTERVAL from=(YEAR | MONTH) (TO to=MONTH)? #yearMonthIntervalDataType | INTERVAL from=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))? #dayTimeIntervalDataType - | type (LEFT_PAREN INTEGER_VALUE + | identifier (LEFT_PAREN INTEGER_VALUE (COMMA INTEGER_VALUE)* RIGHT_PAREN)? #primitiveDataType ; @@ -1218,22 +918,7 @@ qualifiedColTypeWithPositionList ; qualifiedColTypeWithPosition - : name=multipartIdentifier dataType colDefinitionDescriptorWithPosition* - ; - -colDefinitionDescriptorWithPosition - : errorCapturingNot NULL - | defaultExpression - | commentSpec - | colPosition - ; - -defaultExpression - : DEFAULT expression - ; - -variableDefaultExpression - : (DEFAULT | EQ) expression + : name=multipartIdentifier dataType (NOT NULL)? commentSpec? colPosition? ; colTypeList @@ -1241,26 +926,7 @@ colTypeList ; colType - : colName=errorCapturingIdentifier dataType (errorCapturingNot NULL)? commentSpec? - ; - -colDefinitionList - : colDefinition (COMMA colDefinition)* - ; - -colDefinition - : colName=errorCapturingIdentifier dataType colDefinitionOption* - ; - -colDefinitionOption - : errorCapturingNot NULL - | defaultExpression - | generationExpression - | commentSpec - ; - -generationExpression - : GENERATED ALWAYS AS LEFT_PAREN expression RIGHT_PAREN + : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec? ; complexColTypeList @@ -1268,49 +934,9 @@ complexColTypeList ; complexColType - : errorCapturingIdentifier COLON? dataType (errorCapturingNot NULL)? commentSpec? - ; - -routineCharacteristics - : (routineLanguage - | specificName - | deterministic - | sqlDataAccess - | nullCall - | commentSpec - | rightsClause)* - ; - -routineLanguage - : LANGUAGE (SQL | IDENTIFIER) - ; - -specificName - : SPECIFIC specific=errorCapturingIdentifier - ; - -deterministic - : DETERMINISTIC - | errorCapturingNot DETERMINISTIC - ; - -sqlDataAccess - : access=NO SQL - | access=CONTAINS SQL - | access=READS SQL DATA - | access=MODIFIES SQL DATA + : identifier COLON? dataType (NOT NULL)? commentSpec? ; -nullCall - : RETURNS NULL ON NULL INPUT - | CALLED ON NULL INPUT - ; - -rightsClause - : SQL SECURITY INVOKER - | SQL SECURITY DEFINER - ; - whenClause : WHEN condition=expression THEN result=expression ; @@ -1352,9 +978,7 @@ qualifiedNameList ; functionName - : IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN - | identFunc=IDENTIFIER_KW // IDENTIFIER itself is also a valid function name. - | qualifiedName + : qualifiedName | FILTER | LEFT | RIGHT @@ -1390,11 +1014,6 @@ strictIdentifier ; quotedIdentifier - : BACKQUOTED_IDENTIFIER - | {double_quoted_identifiers}? DOUBLEQUOTED_STRING - ; - -backQuotedIdentifier : BACKQUOTED_IDENTIFIER ; @@ -1415,25 +1034,10 @@ alterColumnAction : TYPE dataType | commentSpec | colPosition - | setOrDrop=(SET | DROP) errorCapturingNot NULL - | SET defaultExpression - | dropDefault=DROP DEFAULT + | setOrDrop=(SET | DROP) NOT NULL ; -stringLit - : STRING_LITERAL - | {!double_quoted_identifiers}? DOUBLEQUOTED_STRING - ; - -comment - : stringLit - | NULL - ; -version - : INTEGER_VALUE - | stringLit - ; // When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL. // - Reserved keywords: @@ -1450,33 +1054,21 @@ ansiNonReserved : ADD | AFTER | ALTER - | ALWAYS | ANALYZE | ANTI - | ANY_VALUE | ARCHIVE | ARRAY | ASC | AT - | BEGIN | BETWEEN - | BIGINT - | BINARY - | BINARY_HEX - | BINDING - | BOOLEAN | BUCKET | BUCKETS | BY - | BYTE | CACHE - | CALLED | CASCADE | CATALOG | CATALOGS | CHANGE - | CHAR - | CHARACTER | CLEAR | CLUSTER | CLUSTERED @@ -1487,47 +1079,32 @@ ansiNonReserved | COMMIT | COMPACT | COMPACTIONS - | COMPENSATION | COMPUTE | CONCATENATE - | CONTAINS | COST | CUBE | CURRENT | DATA | DATABASE | DATABASES - | DATE | DATEADD - | DATE_ADD | DATEDIFF - | DATE_DIFF | DAY - | DAYS | DAYOFYEAR | DBPROPERTIES - | DEC - | DECIMAL - | DECLARE - | DEFAULT | DEFINED - | DEFINER | DELETE | DELIMITED | DESC | DESCRIBE - | DETERMINISTIC | DFS | DIRECTORIES | DIRECTORY | DISTRIBUTE | DIV - | DOUBLE | DROP | ESCAPED - | EVOLUTION | EXCHANGE - | EXCLUDE | EXISTS | EXPLAIN | EXPORT @@ -1537,36 +1114,25 @@ ansiNonReserved | FIELDS | FILEFORMAT | FIRST - | FLOAT | FOLLOWING | FORMAT | FORMATTED | FUNCTION | FUNCTIONS - | GENERATED | GLOBAL | GROUPING | HOUR - | HOURS - | IDENTIFIER_KW | IF | IGNORE - | IMMEDIATE | IMPORT - | INCLUDE | INDEX | INDEXES | INPATH - | INPUT | INPUTFORMAT | INSERT - | INT - | INTEGER | INTERVAL - | INVOKER | ITEMS | KEYS - | LANGUAGE | LAST | LAZY | LIKE @@ -1580,30 +1146,19 @@ ansiNonReserved | LOCK | LOCKS | LOGICAL - | LONG | MACRO | MAP | MATCHED | MERGE | MICROSECOND - | MICROSECONDS | MILLISECOND - | MILLISECONDS | MINUTE - | MINUTES - | MODIFIES | MONTH - | MONTHS | MSCK - | NAME | NAMESPACE | NAMESPACES - | NANOSECOND - | NANOSECONDS | NO - | NONE | NULLS - | NUMERIC | OF | OPTION | OPTIONS @@ -1626,8 +1181,6 @@ ansiNonReserved | QUARTER | QUERY | RANGE - | READS - | REAL | RECORDREADER | RECORDWRITER | RECOVER @@ -1640,8 +1193,6 @@ ansiNonReserved | RESET | RESPECT | RESTRICT - | RETURN - | RETURNS | REVOKE | RLIKE | ROLE @@ -1653,8 +1204,6 @@ ansiNonReserved | SCHEMA | SCHEMAS | SECOND - | SECONDS - | SECURITY | SEMI | SEPARATED | SERDE @@ -1662,20 +1211,14 @@ ansiNonReserved | SET | SETMINUS | SETS - | SHORT | SHOW - | SINGLE | SKEWED - | SMALLINT | SORT | SORTED - | SOURCE - | SPECIFIC | START | STATISTICS | STORED | STRATIFY - | STRING | STRUCT | SUBSTR | SUBSTRING @@ -1684,17 +1227,12 @@ ansiNonReserved | SYSTEM_VERSION | TABLES | TABLESAMPLE - | TARGET | TBLPROPERTIES | TEMPORARY | TERMINATED - | TIMEDIFF | TIMESTAMP - | TIMESTAMP_LTZ - | TIMESTAMP_NTZ | TIMESTAMPADD | TIMESTAMPDIFF - | TINYINT | TOUCH | TRANSACTION | TRANSACTIONS @@ -1708,24 +1246,16 @@ ansiNonReserved | UNBOUNDED | UNCACHE | UNLOCK - | UNPIVOT | UNSET | UPDATE | USE | VALUES - | VARCHAR - | VAR - | VARIABLE - | VARIANT | VERSION | VIEW | VIEWS - | VOID | WEEK - | WEEKS | WINDOW | YEAR - | YEARS | ZONE //--ANSI-NON-RESERVED-END ; @@ -1764,46 +1294,33 @@ nonReserved | AFTER | ALL | ALTER - | ALWAYS | ANALYZE | AND | ANY - | ANY_VALUE | ARCHIVE | ARRAY | AS | ASC | AT | AUTHORIZATION - | BEGIN | BETWEEN - | BIGINT - | BINARY - | BINARY_HEX - | BINDING - | BOOLEAN | BOTH | BUCKET | BUCKETS | BY - | BYTE | CACHE - | CALLED | CASCADE | CASE | CAST | CATALOG | CATALOGS | CHANGE - | CHAR - | CHARACTER | CHECK | CLEAR | CLUSTER | CLUSTERED | CODEGEN | COLLATE - | COLLATION | COLLECTION | COLUMN | COLUMNS @@ -1811,11 +1328,9 @@ nonReserved | COMMIT | COMPACT | COMPACTIONS - | COMPENSATION | COMPUTE | CONCATENATE | CONSTRAINT - | CONTAINS | COST | CREATE | CUBE @@ -1827,42 +1342,28 @@ nonReserved | DATA | DATABASE | DATABASES - | DATE | DATEADD - | DATE_ADD | DATEDIFF - | DATE_DIFF | DAY - | DAYS | DAYOFYEAR | DBPROPERTIES - | DEC - | DECIMAL - | DECLARE - | DEFAULT | DEFINED - | DEFINER | DELETE | DELIMITED | DESC | DESCRIBE - | DETERMINISTIC | DFS | DIRECTORIES | DIRECTORY | DISTINCT | DISTRIBUTE | DIV - | DOUBLE | DROP | ELSE | END | ESCAPE | ESCAPED - | EVOLUTION | EXCHANGE - | EXCLUDE - | EXECUTE | EXISTS | EXPLAIN | EXPORT @@ -1875,7 +1376,6 @@ nonReserved | FIELDS | FILEFORMAT | FIRST - | FLOAT | FOLLOWING | FOR | FOREIGN @@ -1884,41 +1384,30 @@ nonReserved | FROM | FUNCTION | FUNCTIONS - | GENERATED | GLOBAL | GRANT | GROUP | GROUPING | HAVING | HOUR - | HOURS - | IDENTIFIER_KW | IF | IGNORE - | IMMEDIATE | IMPORT | IN - | INCLUDE | INDEX | INDEXES | INPATH - | INPUT | INPUTFORMAT | INSERT - | INT - | INTEGER | INTERVAL | INTO - | INVOKER | IS | ITEMS | KEYS - | LANGUAGE | LAST | LAZY | LEADING | LIKE - | LONG | ILIKE | LIMIT | LINES @@ -1929,34 +1418,22 @@ nonReserved | LOCK | LOCKS | LOGICAL - | LONG | MACRO | MAP | MATCHED | MERGE | MICROSECOND - | MICROSECONDS | MILLISECOND - | MILLISECONDS | MINUTE - | MINUTES - | MODIFIES | MONTH - | MONTHS | MSCK - | NAME | NAMESPACE | NAMESPACES - | NANOSECOND - | NANOSECONDS | NO - | NONE | NOT | NULL | NULLS - | NUMERIC | OF - | OFFSET | ONLY | OPTION | OPTIONS @@ -1972,6 +1449,8 @@ nonReserved | PARTITION | PARTITIONED | PARTITIONS + | PERCENTILE_CONT + | PERCENTILE_DISC | PERCENTLIT | PIVOT | PLACING @@ -1984,8 +1463,6 @@ nonReserved | QUARTER | QUERY | RANGE - | READS - | REAL | RECORDREADER | RECORDWRITER | RECOVER @@ -1999,8 +1476,6 @@ nonReserved | RESET | RESPECT | RESTRICT - | RETURN - | RETURNS | REVOKE | RLIKE | ROLE @@ -2012,8 +1487,6 @@ nonReserved | SCHEMA | SCHEMAS | SECOND - | SECONDS - | SECURITY | SELECT | SEPARATED | SERDE @@ -2021,22 +1494,15 @@ nonReserved | SESSION_USER | SET | SETS - | SHORT | SHOW - | SINGLE | SKEWED - | SMALLINT | SOME | SORT | SORTED - | SOURCE - | SPECIFIC - | SQL | START | STATISTICS | STORED | STRATIFY - | STRING | STRUCT | SUBSTR | SUBSTRING @@ -2046,19 +1512,14 @@ nonReserved | TABLE | TABLES | TABLESAMPLE - | TARGET | TBLPROPERTIES | TEMPORARY | TERMINATED | THEN | TIME - | TIMEDIFF | TIMESTAMP - | TIMESTAMP_LTZ - | TIMESTAMP_NTZ | TIMESTAMPADD | TIMESTAMPDIFF - | TINYINT | TO | TOUCH | TRAILING @@ -2076,29 +1537,21 @@ nonReserved | UNIQUE | UNKNOWN | UNLOCK - | UNPIVOT | UNSET | UPDATE | USE | USER | VALUES - | VARCHAR - | VAR - | VARIABLE - | VARIANT | VERSION | VIEW | VIEWS - | VOID | WEEK - | WEEKS | WHEN | WHERE | WINDOW | WITH | WITHIN | YEAR - | YEARS | ZONE //--DEFAULT-NON-RESERVED-END ; diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java b/async-query-core/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java index 0bb9cb4b85..85e5a3d349 100644 --- a/async-query-core/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java @@ -22,7 +22,7 @@ import org.opensearch.sql.spark.antlr.parser.FlintSparkSqlExtensionsParser; import org.opensearch.sql.spark.antlr.parser.SqlBaseLexer; import org.opensearch.sql.spark.antlr.parser.SqlBaseParser; -import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.IdentifierReferenceContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.MultipartIdentifierContext; import org.opensearch.sql.spark.antlr.parser.SqlBaseParserBaseVisitor; import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; import org.opensearch.sql.spark.dispatcher.model.FullyQualifiedTableName; @@ -108,15 +108,15 @@ public static class SparkSqlTableNameVisitor extends SqlBaseParserBaseVisitor Date: Fri, 6 Sep 2024 11:48:04 -0700 Subject: [PATCH 2/4] Fix version to current one Signed-off-by: Tomoyuki Morita --- async-query-core/build.gradle | 8 +- .../src/main/antlr/SqlBaseLexer.g4 | 89 +---- .../src/main/antlr/SqlBaseParser.g4 | 373 +++--------------- 3 files changed, 77 insertions(+), 393 deletions(-) diff --git a/async-query-core/build.gradle b/async-query-core/build.gradle index b8e3e337e1..e126ee0580 100644 --- a/async-query-core/build.gradle +++ b/async-query-core/build.gradle @@ -21,12 +21,12 @@ tasks.register('downloadG4Files', Exec) { executable 'curl' - def opensearchSparkBranch = "0.4" - def apacheSparkBranch = "v3.3.2" + def opensearchSparkBranch = "0.5" + def apacheSparkVersionTag = "v3.5.1" args '-o', 'src/main/antlr/FlintSparkSqlExtensions.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/FlintSparkSqlExtensions.g4" args '-o', 'src/main/antlr/SparkSqlBase.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/SparkSqlBase.g4" - args '-o', 'src/main/antlr/SqlBaseParser.g4', "https://raw.githubusercontent.com/apache/spark/${apacheSparkBranch}/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4" - args '-o', 'src/main/antlr/SqlBaseLexer.g4', "https://raw.githubusercontent.com/apache/spark/${apacheSparkBranch}/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4" + args '-o', 'src/main/antlr/SqlBaseParser.g4', "https://raw.githubusercontent.com/apache/spark/${apacheSparkVersionTag}/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4" + args '-o', 'src/main/antlr/SqlBaseLexer.g4', "https://raw.githubusercontent.com/apache/spark/${apacheSparkVersionTag}/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4" } generateGrammarSource { diff --git a/async-query-core/src/main/antlr/SqlBaseLexer.g4 b/async-query-core/src/main/antlr/SqlBaseLexer.g4 index acfc0011f5..fb440ef8d3 100644 --- a/async-query-core/src/main/antlr/SqlBaseLexer.g4 +++ b/async-query-core/src/main/antlr/SqlBaseLexer.g4 @@ -69,35 +69,6 @@ lexer grammar SqlBaseLexer; public void markUnclosedComment() { has_unclosed_bracketed_comment = true; } - - /** - * When greater than zero, it's in the middle of parsing ARRAY/MAP/STRUCT type. - */ - public int complex_type_level_counter = 0; - - /** - * Increase the counter by one when hits KEYWORD 'ARRAY', 'MAP', 'STRUCT'. - */ - public void incComplexTypeLevelCounter() { - complex_type_level_counter++; - } - - /** - * Decrease the counter by one when hits close tag '>' && the counter greater than zero - * which means we are in the middle of complex type parsing. Otherwise, it's a dangling - * GT token and we do nothing. - */ - public void decComplexTypeLevelCounter() { - if (complex_type_level_counter > 0) complex_type_level_counter--; - } - - /** - * If the counter is zero, it's a shift right operator. It can be closing tags of an complex - * type definition, such as MAP>. - */ - public boolean isShiftRightOperator() { - return complex_type_level_counter == 0 ? true : false; - } } SEMICOLON: ';'; @@ -108,7 +79,6 @@ COMMA: ','; DOT: '.'; LEFT_BRACKET: '['; RIGHT_BRACKET: ']'; -BANG: '!'; // NOTE: If you add a new token in the list below, you should update the list of keywords // and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`, and @@ -129,16 +99,14 @@ ANTI: 'ANTI'; ANY: 'ANY'; ANY_VALUE: 'ANY_VALUE'; ARCHIVE: 'ARCHIVE'; -ARRAY: 'ARRAY' {incComplexTypeLevelCounter();}; +ARRAY: 'ARRAY'; AS: 'AS'; ASC: 'ASC'; AT: 'AT'; AUTHORIZATION: 'AUTHORIZATION'; -BEGIN: 'BEGIN'; BETWEEN: 'BETWEEN'; BIGINT: 'BIGINT'; BINARY: 'BINARY'; -BINDING: 'BINDING'; BOOLEAN: 'BOOLEAN'; BOTH: 'BOTH'; BUCKET: 'BUCKET'; @@ -146,7 +114,6 @@ BUCKETS: 'BUCKETS'; BY: 'BY'; BYTE: 'BYTE'; CACHE: 'CACHE'; -CALLED: 'CALLED'; CASCADE: 'CASCADE'; CASE: 'CASE'; CAST: 'CAST'; @@ -161,7 +128,6 @@ CLUSTER: 'CLUSTER'; CLUSTERED: 'CLUSTERED'; CODEGEN: 'CODEGEN'; COLLATE: 'COLLATE'; -COLLATION: 'COLLATION'; COLLECTION: 'COLLECTION'; COLUMN: 'COLUMN'; COLUMNS: 'COLUMNS'; @@ -169,11 +135,9 @@ COMMENT: 'COMMENT'; COMMIT: 'COMMIT'; COMPACT: 'COMPACT'; COMPACTIONS: 'COMPACTIONS'; -COMPENSATION: 'COMPENSATION'; COMPUTE: 'COMPUTE'; CONCATENATE: 'CONCATENATE'; CONSTRAINT: 'CONSTRAINT'; -CONTAINS: 'CONTAINS'; COST: 'COST'; CREATE: 'CREATE'; CROSS: 'CROSS'; @@ -197,29 +161,24 @@ DATE_DIFF: 'DATE_DIFF'; DBPROPERTIES: 'DBPROPERTIES'; DEC: 'DEC'; DECIMAL: 'DECIMAL'; -DECLARE: 'DECLARE'; DEFAULT: 'DEFAULT'; DEFINED: 'DEFINED'; -DEFINER: 'DEFINER'; DELETE: 'DELETE'; DELIMITED: 'DELIMITED'; DESC: 'DESC'; DESCRIBE: 'DESCRIBE'; -DETERMINISTIC: 'DETERMINISTIC'; DFS: 'DFS'; DIRECTORIES: 'DIRECTORIES'; DIRECTORY: 'DIRECTORY'; DISTINCT: 'DISTINCT'; DISTRIBUTE: 'DISTRIBUTE'; DIV: 'DIV'; -DO: 'DO'; DOUBLE: 'DOUBLE'; DROP: 'DROP'; ELSE: 'ELSE'; END: 'END'; ESCAPE: 'ESCAPE'; ESCAPED: 'ESCAPED'; -EVOLUTION: 'EVOLUTION'; EXCEPT: 'EXCEPT'; EXCHANGE: 'EXCHANGE'; EXCLUDE: 'EXCLUDE'; @@ -257,7 +216,6 @@ HOURS: 'HOURS'; IDENTIFIER_KW: 'IDENTIFIER'; IF: 'IF'; IGNORE: 'IGNORE'; -IMMEDIATE: 'IMMEDIATE'; IMPORT: 'IMPORT'; IN: 'IN'; INCLUDE: 'INCLUDE'; @@ -265,7 +223,6 @@ INDEX: 'INDEX'; INDEXES: 'INDEXES'; INNER: 'INNER'; INPATH: 'INPATH'; -INPUT: 'INPUT'; INPUTFORMAT: 'INPUTFORMAT'; INSERT: 'INSERT'; INTERSECT: 'INTERSECT'; @@ -273,12 +230,10 @@ INTERVAL: 'INTERVAL'; INT: 'INT'; INTEGER: 'INTEGER'; INTO: 'INTO'; -INVOKER: 'INVOKER'; IS: 'IS'; ITEMS: 'ITEMS'; JOIN: 'JOIN'; KEYS: 'KEYS'; -LANGUAGE: 'LANGUAGE'; LAST: 'LAST'; LATERAL: 'LATERAL'; LAZY: 'LAZY'; @@ -297,7 +252,7 @@ LOCKS: 'LOCKS'; LOGICAL: 'LOGICAL'; LONG: 'LONG'; MACRO: 'MACRO'; -MAP: 'MAP' {incComplexTypeLevelCounter();}; +MAP: 'MAP'; MATCHED: 'MATCHED'; MERGE: 'MERGE'; MICROSECOND: 'MICROSECOND'; @@ -306,7 +261,6 @@ MILLISECOND: 'MILLISECOND'; MILLISECONDS: 'MILLISECONDS'; MINUTE: 'MINUTE'; MINUTES: 'MINUTES'; -MODIFIES: 'MODIFIES'; MONTH: 'MONTH'; MONTHS: 'MONTHS'; MSCK: 'MSCK'; @@ -317,8 +271,7 @@ NANOSECOND: 'NANOSECOND'; NANOSECONDS: 'NANOSECONDS'; NATURAL: 'NATURAL'; NO: 'NO'; -NONE: 'NONE'; -NOT: 'NOT'; +NOT: 'NOT' | '!'; NULL: 'NULL'; NULLS: 'NULLS'; NUMERIC: 'NUMERIC'; @@ -340,6 +293,8 @@ OVERWRITE: 'OVERWRITE'; PARTITION: 'PARTITION'; PARTITIONED: 'PARTITIONED'; PARTITIONS: 'PARTITIONS'; +PERCENTILE_CONT: 'PERCENTILE_CONT'; +PERCENTILE_DISC: 'PERCENTILE_DISC'; PERCENTLIT: 'PERCENT'; PIVOT: 'PIVOT'; PLACING: 'PLACING'; @@ -352,7 +307,6 @@ PURGE: 'PURGE'; QUARTER: 'QUARTER'; QUERY: 'QUERY'; RANGE: 'RANGE'; -READS: 'READS'; REAL: 'REAL'; RECORDREADER: 'RECORDREADER'; RECORDWRITER: 'RECORDWRITER'; @@ -367,8 +321,6 @@ REPLACE: 'REPLACE'; RESET: 'RESET'; RESPECT: 'RESPECT'; RESTRICT: 'RESTRICT'; -RETURN: 'RETURN'; -RETURNS: 'RETURNS'; REVOKE: 'REVOKE'; RIGHT: 'RIGHT'; RLIKE: 'RLIKE' | 'REGEXP'; @@ -382,7 +334,6 @@ SECOND: 'SECOND'; SECONDS: 'SECONDS'; SCHEMA: 'SCHEMA'; SCHEMAS: 'SCHEMAS'; -SECURITY: 'SECURITY'; SELECT: 'SELECT'; SEMI: 'SEMI'; SEPARATED: 'SEPARATED'; @@ -394,21 +345,18 @@ SETMINUS: 'MINUS'; SETS: 'SETS'; SHORT: 'SHORT'; SHOW: 'SHOW'; -SINGLE: 'SINGLE'; SKEWED: 'SKEWED'; SMALLINT: 'SMALLINT'; SOME: 'SOME'; SORT: 'SORT'; SORTED: 'SORTED'; SOURCE: 'SOURCE'; -SPECIFIC: 'SPECIFIC'; -SQL: 'SQL'; START: 'START'; STATISTICS: 'STATISTICS'; STORED: 'STORED'; STRATIFY: 'STRATIFY'; STRING: 'STRING'; -STRUCT: 'STRUCT' {incComplexTypeLevelCounter();}; +STRUCT: 'STRUCT'; SUBSTR: 'SUBSTR'; SUBSTRING: 'SUBSTRING'; SYNC: 'SYNC'; @@ -423,7 +371,6 @@ TEMPORARY: 'TEMPORARY' | 'TEMP'; TERMINATED: 'TERMINATED'; THEN: 'THEN'; TIME: 'TIME'; -TIMEDIFF: 'TIMEDIFF'; TIMESTAMP: 'TIMESTAMP'; TIMESTAMP_LTZ: 'TIMESTAMP_LTZ'; TIMESTAMP_NTZ: 'TIMESTAMP_NTZ'; @@ -431,7 +378,6 @@ TIMESTAMPADD: 'TIMESTAMPADD'; TIMESTAMPDIFF: 'TIMESTAMPDIFF'; TINYINT: 'TINYINT'; TO: 'TO'; -EXECUTE: 'EXECUTE'; TOUCH: 'TOUCH'; TRAILING: 'TRAILING'; TRANSACTION: 'TRANSACTION'; @@ -457,9 +403,6 @@ USER: 'USER'; USING: 'USING'; VALUES: 'VALUES'; VARCHAR: 'VARCHAR'; -VAR: 'VAR'; -VARIABLE: 'VARIABLE'; -VARIANT: 'VARIANT'; VERSION: 'VERSION'; VIEW: 'VIEW'; VIEWS: 'VIEWS'; @@ -468,7 +411,6 @@ WEEK: 'WEEK'; WEEKS: 'WEEKS'; WHEN: 'WHEN'; WHERE: 'WHERE'; -WHILE: 'WHILE'; WINDOW: 'WINDOW'; WITH: 'WITH'; WITHIN: 'WITHIN'; @@ -486,11 +428,8 @@ NEQ : '<>'; NEQJ: '!='; LT : '<'; LTE : '<=' | '!>'; -GT : '>' {decComplexTypeLevelCounter();}; +GT : '>'; GTE : '>=' | '!<'; -SHIFT_LEFT: '<<'; -SHIFT_RIGHT: '>>' {isShiftRightOperator()}?; -SHIFT_RIGHT_UNSIGNED: '>>>' {isShiftRightOperator()}?; PLUS: '+'; MINUS: '-'; @@ -503,7 +442,6 @@ PIPE: '|'; CONCAT_PIPE: '||'; HAT: '^'; COLON: ':'; -DOUBLE_COLON: '::'; ARROW: '->'; FAT_ARROW : '=>'; HENT_START: '/*+'; @@ -563,13 +501,8 @@ BIGDECIMAL_LITERAL | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? ; -// Generalize the identifier to give a sensible INVALID_IDENTIFIER error message: -// * Unicode letters rather than a-z and A-Z only -// * URI paths for table references using paths -// We then narrow down to ANSI rules in exitUnquotedIdentifier() in the parser. IDENTIFIER - : (UNICODE_LETTER | DIGIT | '_')+ - | UNICODE_LETTER+ '://' (UNICODE_LETTER | DIGIT | '_' | '/' | '-' | '.' | '?' | '=' | '&' | '#' | '%')+ + : (LETTER | DIGIT | '_')+ ; BACKQUOTED_IDENTIFIER @@ -593,10 +526,6 @@ fragment LETTER : [A-Z] ; -fragment UNICODE_LETTER - : [\p{L}] - ; - SIMPLE_COMMENT : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN) ; @@ -606,7 +535,7 @@ BRACKETED_COMMENT ; WS - : [ \t\n\f\r\u000B\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u202F\u205F\u3000]+ -> channel(HIDDEN) + : [ \r\n\t]+ -> channel(HIDDEN) ; // Catch-all for anything we can't recognize. diff --git a/async-query-core/src/main/antlr/SqlBaseParser.g4 b/async-query-core/src/main/antlr/SqlBaseParser.g4 index 5b8805821b..04128216be 100644 --- a/async-query-core/src/main/antlr/SqlBaseParser.g4 +++ b/async-query-core/src/main/antlr/SqlBaseParser.g4 @@ -42,57 +42,8 @@ options { tokenVocab = SqlBaseLexer; } public boolean double_quoted_identifiers = false; } -compoundOrSingleStatement - : singleStatement - | singleCompoundStatement - ; - -singleCompoundStatement - : beginEndCompoundBlock SEMICOLON? EOF - ; - -beginEndCompoundBlock - : beginLabel? BEGIN compoundBody END endLabel? - ; - -compoundBody - : (compoundStatements+=compoundStatement SEMICOLON)* - ; - -compoundStatement - : statement - | setStatementWithOptionalVarKeyword - | beginEndCompoundBlock - | ifElseStatement - | whileStatement - ; - -setStatementWithOptionalVarKeyword - : SET variable? assignmentList #setVariableWithOptionalKeyword - | SET variable? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ - LEFT_PAREN query RIGHT_PAREN #setVariableWithOptionalKeyword - ; - -whileStatement - : beginLabel? WHILE booleanExpression DO compoundBody END WHILE endLabel? - ; - -ifElseStatement - : IF booleanExpression THEN conditionalBodies+=compoundBody - (ELSE IF booleanExpression THEN conditionalBodies+=compoundBody)* - (ELSE elseBody=compoundBody)? END IF - ; - singleStatement - : (statement|setResetStatement) SEMICOLON* EOF - ; - -beginLabel - : multipartIdentifier COLON - ; - -endLabel - : multipartIdentifier + : statement SEMICOLON* EOF ; singleExpression @@ -121,36 +72,33 @@ singleTableSchema statement : query #statementDefault - | executeImmediate #visitExecuteImmediate | ctes? dmlStatementNoWith #dmlStatement | USE identifierReference #use | USE namespace identifierReference #useNamespace - | SET CATALOG (errorCapturingIdentifier | stringLit) #setCatalog - | CREATE namespace (IF errorCapturingNot EXISTS)? identifierReference + | SET CATALOG (identifier | stringLit) #setCatalog + | CREATE namespace (IF NOT EXISTS)? identifierReference (commentSpec | locationSpec | (WITH (DBPROPERTIES | PROPERTIES) propertyList))* #createNamespace | ALTER namespace identifierReference SET (DBPROPERTIES | PROPERTIES) propertyList #setNamespaceProperties - | ALTER namespace identifierReference - UNSET (DBPROPERTIES | PROPERTIES) propertyList #unsetNamespaceProperties | ALTER namespace identifierReference SET locationSpec #setNamespaceLocation | DROP namespace (IF EXISTS)? identifierReference (RESTRICT | CASCADE)? #dropNamespace | SHOW namespaces ((FROM | IN) multipartIdentifier)? (LIKE? pattern=stringLit)? #showNamespaces - | createTableHeader (LEFT_PAREN colDefinitionList RIGHT_PAREN)? tableProvider? + | createTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider? createTableClauses (AS? query)? #createTable - | CREATE TABLE (IF errorCapturingNot EXISTS)? target=tableIdentifier + | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier LIKE source=tableIdentifier (tableProvider | rowFormat | createFileFormat | locationSpec | (TBLPROPERTIES tableProps=propertyList))* #createTableLike - | replaceTableHeader (LEFT_PAREN colDefinitionList RIGHT_PAREN)? tableProvider? + | replaceTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider? createTableClauses (AS? query)? #replaceTable | ANALYZE TABLE identifierReference partitionSpec? COMPUTE STATISTICS @@ -192,7 +140,7 @@ statement SET SERDE stringLit (WITH SERDEPROPERTIES propertyList)? #setTableSerDe | ALTER TABLE identifierReference (partitionSpec)? SET SERDEPROPERTIES propertyList #setTableSerDe - | ALTER (TABLE | VIEW) identifierReference ADD (IF errorCapturingNot EXISTS)? + | ALTER (TABLE | VIEW) identifierReference ADD (IF NOT EXISTS)? partitionSpecLocation+ #addTablePartition | ALTER TABLE identifierReference from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition @@ -201,15 +149,12 @@ statement | ALTER TABLE identifierReference (partitionSpec)? SET locationSpec #setTableLocation | ALTER TABLE identifierReference RECOVER PARTITIONS #recoverPartitions - | ALTER TABLE identifierReference - (clusterBySpec | CLUSTER BY NONE) #alterClusterBy | DROP TABLE (IF EXISTS)? identifierReference PURGE? #dropTable | DROP VIEW (IF EXISTS)? identifierReference #dropView | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)? - VIEW (IF errorCapturingNot EXISTS)? identifierReference + VIEW (IF NOT EXISTS)? identifierReference identifierCommentList? (commentSpec | - schemaBinding | (PARTITIONED ON identifierList) | (TBLPROPERTIES propertyList))* AS query #createView @@ -217,21 +162,12 @@ statement tableIdentifier (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider (OPTIONS propertyList)? #createTempViewUsing | ALTER VIEW identifierReference AS? query #alterViewQuery - | ALTER VIEW identifierReference schemaBinding #alterViewSchemaBinding - | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF errorCapturingNot EXISTS)? + | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)? identifierReference AS className=stringLit (USING resource (COMMA resource)*)? #createFunction - | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF errorCapturingNot EXISTS)? - identifierReference LEFT_PAREN parameters=colDefinitionList? RIGHT_PAREN - (RETURNS (dataType | TABLE LEFT_PAREN returnParams=colTypeList RIGHT_PAREN))? - routineCharacteristics - RETURN (query | expression) #createUserDefinedFunction - | DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference #dropFunction - | DECLARE (OR REPLACE)? variable? - identifierReference dataType? variableDefaultExpression? #createVariable - | DROP TEMPORARY variable (IF EXISTS)? identifierReference #dropVariable + | DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference #dropFunction | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)? - (statement|setResetStatement) #explain + statement #explain | SHOW TABLES ((FROM | IN) identifierReference)? (LIKE? pattern=stringLit)? #showTables | SHOW TABLE EXTENDED ((FROM | IN) ns=identifierReference)? @@ -270,51 +206,22 @@ statement | (MSCK)? REPAIR TABLE identifierReference (option=(ADD|DROP|SYNC) PARTITIONS)? #repairTable | op=(ADD | LIST) identifier .*? #manageResource - | CREATE INDEX (IF errorCapturingNot EXISTS)? identifier ON TABLE? - identifierReference (USING indexType=identifier)? - LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN - (OPTIONS options=propertyList)? #createIndex - | DROP INDEX (IF EXISTS)? identifier ON TABLE? identifierReference #dropIndex - | unsupportedHiveNativeCommands .*? #failNativeCommand - ; - -setResetStatement - : SET COLLATION collationName=identifier #setCollation - | SET ROLE .*? #failSetRole + | SET ROLE .*? #failNativeCommand | SET TIME ZONE interval #setTimeZone | SET TIME ZONE timezone #setTimeZone | SET TIME ZONE .*? #setTimeZone - | SET variable assignmentList #setVariable - | SET variable LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ - LEFT_PAREN query RIGHT_PAREN #setVariable | SET configKey EQ configValue #setQuotedConfiguration | SET configKey (EQ .*?)? #setConfiguration | SET .*? EQ configValue #setQuotedConfiguration | SET .*? #setConfiguration | RESET configKey #resetQuotedConfiguration | RESET .*? #resetConfiguration - ; - -executeImmediate - : EXECUTE IMMEDIATE queryParam=executeImmediateQueryParam (INTO targetVariable=multipartIdentifierList)? executeImmediateUsing? - ; - -executeImmediateUsing - : USING LEFT_PAREN params=namedExpressionSeq RIGHT_PAREN - | USING params=namedExpressionSeq - ; - -executeImmediateQueryParam - : stringLit - | multipartIdentifier - ; - -executeImmediateArgument - : (constant|multipartIdentifier) (AS name=errorCapturingIdentifier)? - ; - -executeImmediateArgumentSeq - : executeImmediateArgument (COMMA executeImmediateArgument)* + | CREATE INDEX (IF NOT EXISTS)? identifier ON TABLE? + identifierReference (USING indexType=identifier)? + LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN + (OPTIONS options=propertyList)? #createIndex + | DROP INDEX (IF EXISTS)? identifier ON TABLE? identifierReference #dropIndex + | unsupportedHiveNativeCommands .*? #failNativeCommand ; timezone @@ -378,17 +285,13 @@ unsupportedHiveNativeCommands ; createTableHeader - : CREATE TEMPORARY? EXTERNAL? TABLE (IF errorCapturingNot EXISTS)? identifierReference + : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? identifierReference ; replaceTableHeader : (CREATE OR)? REPLACE TABLE identifierReference ; -clusterBySpec - : CLUSTER BY LEFT_PAREN multipartIdentifierList RIGHT_PAREN - ; - bucketSpec : CLUSTERED BY identifierList (SORTED BY orderedIdentifierList)? @@ -405,10 +308,6 @@ locationSpec : LOCATION stringLit ; -schemaBinding - : WITH SCHEMA (BINDING | COMPENSATION | EVOLUTION | TYPE EVOLUTION) - ; - commentSpec : COMMENT stringLit ; @@ -418,9 +317,9 @@ query ; insertInto - : INSERT OVERWRITE TABLE? identifierReference optionsClause? (partitionSpec (IF errorCapturingNot EXISTS)?)? ((BY NAME) | identifierList)? #insertOverwriteTable - | INSERT INTO TABLE? identifierReference optionsClause? partitionSpec? (IF errorCapturingNot EXISTS)? ((BY NAME) | identifierList)? #insertIntoTable - | INSERT INTO TABLE? identifierReference optionsClause? REPLACE whereClause #insertIntoReplaceWhere + : INSERT OVERWRITE TABLE? identifierReference (partitionSpec (IF NOT EXISTS)?)? ((BY NAME) | identifierList)? #insertOverwriteTable + | INSERT INTO TABLE? identifierReference partitionSpec? (IF NOT EXISTS)? ((BY NAME) | identifierList)? #insertIntoTable + | INSERT INTO TABLE? identifierReference REPLACE whereClause #insertIntoReplaceWhere | INSERT OVERWRITE LOCAL? DIRECTORY path=stringLit rowFormat? createFileFormat? #insertOverwriteHiveDir | INSERT OVERWRITE LOCAL? DIRECTORY (path=stringLit)? tableProvider (OPTIONS options=propertyList)? #insertOverwriteDir ; @@ -450,23 +349,16 @@ namespaces | SCHEMAS ; -variable - : VARIABLE - | VAR - ; - describeFuncName : identifierReference | stringLit | comparisonOperator | arithmeticOperator | predicateOperator - | shiftOperator - | BANG ; describeColName - : nameParts+=errorCapturingIdentifier (DOT nameParts+=errorCapturingIdentifier)* + : nameParts+=identifier (DOT nameParts+=identifier)* ; ctes @@ -485,7 +377,6 @@ createTableClauses :((OPTIONS options=expressionPropertyList) | (PARTITIONED BY partitioning=partitionFieldList) | skewSpec | - clusterBySpec | bucketSpec | rowFormat | createFileFormat | @@ -503,7 +394,7 @@ property ; propertyKey - : errorCapturingIdentifier (DOT errorCapturingIdentifier)* + : identifier (DOT identifier)* | stringLit ; @@ -553,7 +444,7 @@ dmlStatementNoWith | fromClause multiInsertQueryBody+ #multiInsertQuery | DELETE FROM identifierReference tableAlias whereClause? #deleteFromTable | UPDATE identifierReference tableAlias setClause whereClause? #updateTable - | MERGE (WITH SCHEMA EVOLUTION)? INTO target=identifierReference targetAlias=tableAlias + | MERGE INTO target=identifierReference targetAlias=tableAlias USING (source=identifierReference | LEFT_PAREN sourceQuery=query RIGHT_PAREN) sourceAlias=tableAlias ON mergeCondition=booleanExpression @@ -661,11 +552,11 @@ matchedClause : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction ; notMatchedClause - : WHEN errorCapturingNot MATCHED (BY TARGET)? (AND notMatchedCond=booleanExpression)? THEN notMatchedAction + : WHEN NOT MATCHED (BY TARGET)? (AND notMatchedCond=booleanExpression)? THEN notMatchedAction ; notMatchedBySourceClause - : WHEN errorCapturingNot MATCHED BY SOURCE (AND notMatchedBySourceCond=booleanExpression)? THEN notMatchedBySourceAction + : WHEN NOT MATCHED BY SOURCE (AND notMatchedBySourceCond=booleanExpression)? THEN notMatchedBySourceAction ; matchedAction @@ -685,10 +576,6 @@ notMatchedBySourceAction | UPDATE SET assignmentList ; -exceptClause - : EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN - ; - assignmentList : assignment (COMMA assignment)* ; @@ -757,18 +644,18 @@ pivotClause ; pivotColumn - : identifiers+=errorCapturingIdentifier - | LEFT_PAREN identifiers+=errorCapturingIdentifier (COMMA identifiers+=errorCapturingIdentifier)* RIGHT_PAREN + : identifiers+=identifier + | LEFT_PAREN identifiers+=identifier (COMMA identifiers+=identifier)* RIGHT_PAREN ; pivotValue - : expression (AS? errorCapturingIdentifier)? + : expression (AS? identifier)? ; unpivotClause : UNPIVOT nullOperator=unpivotNullClause? LEFT_PAREN operator=unpivotOperator - RIGHT_PAREN (AS? errorCapturingIdentifier)? + RIGHT_PAREN (AS? identifier)? ; unpivotNullClause @@ -810,7 +697,7 @@ unpivotColumn ; unpivotAlias - : AS? errorCapturingIdentifier + : AS? identifier ; lateralView @@ -890,37 +777,21 @@ identifierComment relationPrimary : identifierReference temporalClause? - optionsClause? sample? tableAlias #tableName + sample? tableAlias #tableName | LEFT_PAREN query RIGHT_PAREN sample? tableAlias #aliasedQuery | LEFT_PAREN relation RIGHT_PAREN sample? tableAlias #aliasedRelation | inlineTable #inlineTableDefault2 | functionTable #tableValuedFunction ; -optionsClause - : WITH options=propertyList - ; - inlineTable : VALUES expression (COMMA expression)* tableAlias ; functionTableSubqueryArgument - : TABLE identifierReference tableArgumentPartitioning? - | TABLE LEFT_PAREN identifierReference RIGHT_PAREN tableArgumentPartitioning? - | TABLE LEFT_PAREN query RIGHT_PAREN tableArgumentPartitioning? - ; - -tableArgumentPartitioning - : ((WITH SINGLE PARTITION) - | ((PARTITION | DISTRIBUTE) BY - (((LEFT_PAREN partition+=expression (COMMA partition+=expression)* RIGHT_PAREN)) - | (expression (COMMA invalidMultiPartitionExpression=expression)+) - | partition+=expression))) - ((ORDER | SORT) BY - (((LEFT_PAREN sortItem (COMMA sortItem)* RIGHT_PAREN) - | (sortItem (COMMA invalidMultiSortItem=sortItem)+) - | sortItem)))? + : TABLE identifierReference + | TABLE LEFT_PAREN identifierReference RIGHT_PAREN + | TABLE LEFT_PAREN query RIGHT_PAREN ; functionTableNamedArgumentExpression @@ -1027,7 +898,7 @@ expressionSeq ; booleanExpression - : (NOT | BANG) booleanExpression #logicalNot + : NOT booleanExpression #logicalNot | EXISTS LEFT_PAREN query RIGHT_PAREN #exists | valueExpression predicate? #predicated | left=booleanExpression operator=AND right=booleanExpression #logicalBinary @@ -1035,20 +906,15 @@ booleanExpression ; predicate - : errorCapturingNot? kind=BETWEEN lower=valueExpression AND upper=valueExpression - | errorCapturingNot? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN - | errorCapturingNot? kind=IN LEFT_PAREN query RIGHT_PAREN - | errorCapturingNot? kind=RLIKE pattern=valueExpression - | errorCapturingNot? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN) - | errorCapturingNot? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=stringLit)? - | IS errorCapturingNot? kind=NULL - | IS errorCapturingNot? kind=(TRUE | FALSE | UNKNOWN) - | IS errorCapturingNot? kind=DISTINCT FROM right=valueExpression - ; - -errorCapturingNot - : NOT - | BANG + : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression + | NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN + | NOT? kind=IN LEFT_PAREN query RIGHT_PAREN + | NOT? kind=RLIKE pattern=valueExpression + | NOT? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN) + | NOT? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=stringLit)? + | IS NOT? kind=NULL + | IS NOT? kind=(TRUE | FALSE | UNKNOWN) + | IS NOT? kind=DISTINCT FROM right=valueExpression ; valueExpression @@ -1056,19 +922,12 @@ valueExpression | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary - | left=valueExpression shiftOperator right=valueExpression #shiftExpression | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary | left=valueExpression comparisonOperator right=valueExpression #comparison ; -shiftOperator - : SHIFT_LEFT - | SHIFT_RIGHT - | SHIFT_RIGHT_UNSIGNED - ; - datetimeUnit : YEAR | QUARTER | MONTH | WEEK | DAY | DAYOFYEAR @@ -1076,27 +935,24 @@ datetimeUnit ; primaryExpression - : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER | USER | SESSION_USER) #currentLike + : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER | USER) #currentLike | name=(TIMESTAMPADD | DATEADD | DATE_ADD) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN #timestampadd - | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF | TIMEDIFF) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN #timestampdiff + | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN #timestampdiff | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN #cast - | primaryExpression collateClause #collate - | primaryExpression DOUBLE_COLON dataType #castByColon | STRUCT LEFT_PAREN (argument+=namedExpression (COMMA argument+=namedExpression)*)? RIGHT_PAREN #struct | FIRST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #first | ANY_VALUE LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #any_value | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #last | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN #position | constant #constantDefault - | ASTERISK exceptClause? #star - | qualifiedName DOT ASTERISK exceptClause? #star + | ASTERISK #star + | qualifiedName DOT ASTERISK #star | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN #rowConstructor | LEFT_PAREN query RIGHT_PAREN #subqueryExpression | functionName LEFT_PAREN (setQuantifier? argument+=functionArgument (COMMA argument+=functionArgument)*)? RIGHT_PAREN - (WITHIN GROUP LEFT_PAREN ORDER BY sortItem (COMMA sortItem)* RIGHT_PAREN)? (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? (nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)? #functionCall | identifier ARROW expression #lambda @@ -1112,6 +968,9 @@ primaryExpression FROM srcStr=valueExpression RIGHT_PAREN #trim | OVERLAY LEFT_PAREN input=valueExpression PLACING replace=valueExpression FROM position=valueExpression (FOR length=valueExpression)? RIGHT_PAREN #overlay + | name=(PERCENTILE_CONT | PERCENTILE_DISC) LEFT_PAREN percentage=valueExpression RIGHT_PAREN + WITHIN GROUP LEFT_PAREN ORDER BY sortItem RIGHT_PAREN + (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? ( OVER windowSpec)? #percentile ; literalType @@ -1188,10 +1047,6 @@ colPosition : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier ; -collateClause - : COLLATE collationName=identifier - ; - type : BOOLEAN | TINYINT | BYTE @@ -1202,14 +1057,13 @@ type | DOUBLE | DATE | TIMESTAMP | TIMESTAMP_NTZ | TIMESTAMP_LTZ - | STRING collateClause? + | STRING | CHARACTER | CHAR | VARCHAR | BINARY | DECIMAL | DEC | NUMERIC | VOID | INTERVAL - | VARIANT | ARRAY | STRUCT | MAP | unsupportedType=identifier ; @@ -1234,7 +1088,7 @@ qualifiedColTypeWithPosition ; colDefinitionDescriptorWithPosition - : errorCapturingNot NULL + : NOT NULL | defaultExpression | commentSpec | colPosition @@ -1244,28 +1098,24 @@ defaultExpression : DEFAULT expression ; -variableDefaultExpression - : (DEFAULT | EQ) expression - ; - colTypeList : colType (COMMA colType)* ; colType - : colName=errorCapturingIdentifier dataType (errorCapturingNot NULL)? commentSpec? + : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec? ; -colDefinitionList - : colDefinition (COMMA colDefinition)* +createOrReplaceTableColTypeList + : createOrReplaceTableColType (COMMA createOrReplaceTableColType)* ; -colDefinition +createOrReplaceTableColType : colName=errorCapturingIdentifier dataType colDefinitionOption* ; colDefinitionOption - : errorCapturingNot NULL + : NOT NULL | defaultExpression | generationExpression | commentSpec @@ -1280,49 +1130,9 @@ complexColTypeList ; complexColType - : errorCapturingIdentifier COLON? dataType (errorCapturingNot NULL)? commentSpec? - ; - -routineCharacteristics - : (routineLanguage - | specificName - | deterministic - | sqlDataAccess - | nullCall - | commentSpec - | rightsClause)* - ; - -routineLanguage - : LANGUAGE (SQL | IDENTIFIER) - ; - -specificName - : SPECIFIC specific=errorCapturingIdentifier - ; - -deterministic - : DETERMINISTIC - | errorCapturingNot DETERMINISTIC + : identifier COLON? dataType (NOT NULL)? commentSpec? ; -sqlDataAccess - : access=NO SQL - | access=CONTAINS SQL - | access=READS SQL DATA - | access=MODIFIES SQL DATA - ; - -nullCall - : RETURNS NULL ON NULL INPUT - | CALLED ON NULL INPUT - ; - -rightsClause - : SQL SECURITY INVOKER - | SQL SECURITY DEFINER - ; - whenClause : WHEN condition=expression THEN result=expression ; @@ -1427,7 +1237,7 @@ alterColumnAction : TYPE dataType | commentSpec | colPosition - | setOrDrop=(SET | DROP) errorCapturingNot NULL + | setOrDrop=(SET | DROP) NOT NULL | SET defaultExpression | dropDefault=DROP DEFAULT ; @@ -1470,19 +1280,16 @@ ansiNonReserved | ARRAY | ASC | AT - | BEGIN | BETWEEN | BIGINT | BINARY | BINARY_HEX - | BINDING | BOOLEAN | BUCKET | BUCKETS | BY | BYTE | CACHE - | CALLED | CASCADE | CATALOG | CATALOGS @@ -1499,10 +1306,8 @@ ansiNonReserved | COMMIT | COMPACT | COMPACTIONS - | COMPENSATION | COMPUTE | CONCATENATE - | CONTAINS | COST | CUBE | CURRENT @@ -1520,25 +1325,20 @@ ansiNonReserved | DBPROPERTIES | DEC | DECIMAL - | DECLARE | DEFAULT | DEFINED - | DEFINER | DELETE | DELIMITED | DESC | DESCRIBE - | DETERMINISTIC | DFS | DIRECTORIES | DIRECTORY | DISTRIBUTE | DIV - | DO | DOUBLE | DROP | ESCAPED - | EVOLUTION | EXCHANGE | EXCLUDE | EXISTS @@ -1564,22 +1364,18 @@ ansiNonReserved | IDENTIFIER_KW | IF | IGNORE - | IMMEDIATE | IMPORT | INCLUDE | INDEX | INDEXES | INPATH - | INPUT | INPUTFORMAT | INSERT | INT | INTEGER | INTERVAL - | INVOKER | ITEMS | KEYS - | LANGUAGE | LAST | LAZY | LIKE @@ -1604,7 +1400,6 @@ ansiNonReserved | MILLISECONDS | MINUTE | MINUTES - | MODIFIES | MONTH | MONTHS | MSCK @@ -1614,7 +1409,6 @@ ansiNonReserved | NANOSECOND | NANOSECONDS | NO - | NONE | NULLS | NUMERIC | OF @@ -1639,7 +1433,6 @@ ansiNonReserved | QUARTER | QUERY | RANGE - | READS | REAL | RECORDREADER | RECORDWRITER @@ -1653,8 +1446,6 @@ ansiNonReserved | RESET | RESPECT | RESTRICT - | RETURN - | RETURNS | REVOKE | RLIKE | ROLE @@ -1667,7 +1458,6 @@ ansiNonReserved | SCHEMAS | SECOND | SECONDS - | SECURITY | SEMI | SEPARATED | SERDE @@ -1677,13 +1467,11 @@ ansiNonReserved | SETS | SHORT | SHOW - | SINGLE | SKEWED | SMALLINT | SORT | SORTED | SOURCE - | SPECIFIC | START | STATISTICS | STORED @@ -1701,7 +1489,6 @@ ansiNonReserved | TBLPROPERTIES | TEMPORARY | TERMINATED - | TIMEDIFF | TIMESTAMP | TIMESTAMP_LTZ | TIMESTAMP_NTZ @@ -1727,16 +1514,12 @@ ansiNonReserved | USE | VALUES | VARCHAR - | VAR - | VARIABLE - | VARIANT | VERSION | VIEW | VIEWS | VOID | WEEK | WEEKS - | WHILE | WINDOW | YEAR | YEARS @@ -1789,12 +1572,10 @@ nonReserved | ASC | AT | AUTHORIZATION - | BEGIN | BETWEEN | BIGINT | BINARY | BINARY_HEX - | BINDING | BOOLEAN | BOTH | BUCKET @@ -1802,7 +1583,6 @@ nonReserved | BY | BYTE | CACHE - | CALLED | CASCADE | CASE | CAST @@ -1817,7 +1597,6 @@ nonReserved | CLUSTERED | CODEGEN | COLLATE - | COLLATION | COLLECTION | COLUMN | COLUMNS @@ -1825,11 +1604,9 @@ nonReserved | COMMIT | COMPACT | COMPACTIONS - | COMPENSATION | COMPUTE | CONCATENATE | CONSTRAINT - | CONTAINS | COST | CREATE | CUBE @@ -1852,32 +1629,26 @@ nonReserved | DBPROPERTIES | DEC | DECIMAL - | DECLARE | DEFAULT | DEFINED - | DEFINER | DELETE | DELIMITED | DESC | DESCRIBE - | DETERMINISTIC | DFS | DIRECTORIES | DIRECTORY | DISTINCT | DISTRIBUTE | DIV - | DO | DOUBLE | DROP | ELSE | END | ESCAPE | ESCAPED - | EVOLUTION | EXCHANGE | EXCLUDE - | EXECUTE | EXISTS | EXPLAIN | EXPORT @@ -1910,25 +1681,21 @@ nonReserved | IDENTIFIER_KW | IF | IGNORE - | IMMEDIATE | IMPORT | IN | INCLUDE | INDEX | INDEXES | INPATH - | INPUT | INPUTFORMAT | INSERT | INT | INTEGER | INTERVAL | INTO - | INVOKER | IS | ITEMS | KEYS - | LANGUAGE | LAST | LAZY | LEADING @@ -1955,7 +1722,6 @@ nonReserved | MILLISECONDS | MINUTE | MINUTES - | MODIFIES | MONTH | MONTHS | MSCK @@ -1965,7 +1731,6 @@ nonReserved | NANOSECOND | NANOSECONDS | NO - | NONE | NOT | NULL | NULLS @@ -1987,6 +1752,8 @@ nonReserved | PARTITION | PARTITIONED | PARTITIONS + | PERCENTILE_CONT + | PERCENTILE_DISC | PERCENTLIT | PIVOT | PLACING @@ -1999,7 +1766,6 @@ nonReserved | QUARTER | QUERY | RANGE - | READS | REAL | RECORDREADER | RECORDWRITER @@ -2014,8 +1780,6 @@ nonReserved | RESET | RESPECT | RESTRICT - | RETURN - | RETURNS | REVOKE | RLIKE | ROLE @@ -2028,7 +1792,6 @@ nonReserved | SCHEMAS | SECOND | SECONDS - | SECURITY | SELECT | SEPARATED | SERDE @@ -2038,15 +1801,12 @@ nonReserved | SETS | SHORT | SHOW - | SINGLE | SKEWED | SMALLINT | SOME | SORT | SORTED | SOURCE - | SPECIFIC - | SQL | START | STATISTICS | STORED @@ -2067,7 +1827,6 @@ nonReserved | TERMINATED | THEN | TIME - | TIMEDIFF | TIMESTAMP | TIMESTAMP_LTZ | TIMESTAMP_NTZ @@ -2098,16 +1857,12 @@ nonReserved | USER | VALUES | VARCHAR - | VAR - | VARIABLE - | VARIANT | VERSION | VIEW | VIEWS | VOID | WEEK | WEEKS - | WHILE | WHEN | WHERE | WINDOW From 51e402f4cb402bea391a028e18a5a12849f268d8 Mon Sep 17 00:00:00 2001 From: Tomoyuki Morita Date: Thu, 19 Sep 2024 14:03:04 -0700 Subject: [PATCH 3/4] Disable auto download Signed-off-by: Tomoyuki Morita --- async-query-core/README.md | 7 +++++++ async-query-core/build.gradle | 8 +------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/async-query-core/README.md b/async-query-core/README.md index 815088bce6..1910e19063 100644 --- a/async-query-core/README.md +++ b/async-query-core/README.md @@ -32,3 +32,10 @@ Following is the list of extension points where the consumer of the library need - [DataSourceSparkParameterComposer](src/main/java/org/opensearch/sql/spark/parameter/DataSourceSparkParameterComposer.java) - [GeneralSparkParameterComposer](src/main/java/org/opensearch/sql/spark/parameter/GeneralSparkParameterComposer.java) - [SparkSubmitParameterModifier](src/main/java/org/opensearch/sql/spark/config/SparkSubmitParameterModifier.java) To be deprecated in favor of GeneralSparkParameterComposer + +## Update Grammar files +This package uses ANTLR grammar files from `opensearch-spark` and `Spark` repositories. +To update the grammar files, update `build.gradle` file (in `downloadG4Files` task) as needed and run: +``` +./gradlew async-query-core:downloadG4Files +``` diff --git a/async-query-core/build.gradle b/async-query-core/build.gradle index e126ee0580..6a507a4a9e 100644 --- a/async-query-core/build.gradle +++ b/async-query-core/build.gradle @@ -21,7 +21,7 @@ tasks.register('downloadG4Files', Exec) { executable 'curl' - def opensearchSparkBranch = "0.5" + def opensearchSparkBranch = "0.6" def apacheSparkVersionTag = "v3.5.1" args '-o', 'src/main/antlr/FlintSparkSqlExtensions.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/FlintSparkSqlExtensions.g4" args '-o', 'src/main/antlr/SparkSqlBase.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/SparkSqlBase.g4" @@ -40,12 +40,6 @@ configurations { } } -// skip download in case of offline build -if (!gradle.startParameter.offline) { - // Make sure the downloadG4File task runs before the generateGrammarSource task - generateGrammarSource.dependsOn downloadG4Files -} - dependencies { antlr "org.antlr:antlr4:4.7.1" From 828b4d2a87af920fd87e08e631032cee423d0e7a Mon Sep 17 00:00:00 2001 From: Tomoyuki Morita Date: Thu, 19 Sep 2024 14:07:03 -0700 Subject: [PATCH 4/4] Disable auto download Signed-off-by: Tomoyuki Morita --- async-query-core/README.md | 1 + async-query-core/build.gradle | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/async-query-core/README.md b/async-query-core/README.md index 1910e19063..08301c024d 100644 --- a/async-query-core/README.md +++ b/async-query-core/README.md @@ -39,3 +39,4 @@ To update the grammar files, update `build.gradle` file (in `downloadG4Files` ta ``` ./gradlew async-query-core:downloadG4Files ``` +This will overwrite the files under `src/main/antlr`. \ No newline at end of file diff --git a/async-query-core/build.gradle b/async-query-core/build.gradle index 6a507a4a9e..bc5fcd4b24 100644 --- a/async-query-core/build.gradle +++ b/async-query-core/build.gradle @@ -21,7 +21,7 @@ tasks.register('downloadG4Files', Exec) { executable 'curl' - def opensearchSparkBranch = "0.6" + def opensearchSparkBranch = "0.5" def apacheSparkVersionTag = "v3.5.1" args '-o', 'src/main/antlr/FlintSparkSqlExtensions.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/FlintSparkSqlExtensions.g4" args '-o', 'src/main/antlr/SparkSqlBase.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/SparkSqlBase.g4"