From e8e344bbea896460ef9524d98582bc0206b409cf Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Tue, 28 Nov 2023 14:30:17 -0700 Subject: [PATCH] chore(flink): quote every identifier for Flink SQL --- ibis/backends/base/sql/registry/helpers.py | 6 +- ibis/backends/flink/compiler/core.py | 3 +- ibis/backends/flink/identifiers.py | 560 --------------------- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 567 deletions(-) delete mode 100644 ibis/backends/flink/identifiers.py diff --git a/ibis/backends/base/sql/registry/helpers.py b/ibis/backends/base/sql/registry/helpers.py index c3d56f9e20d4d..16de96728d19d 100644 --- a/ibis/backends/base/sql/registry/helpers.py +++ b/ibis/backends/base/sql/registry/helpers.py @@ -14,11 +14,9 @@ def format_call(translator, func, *args): return "{}({})".format(func, ", ".join(formatted_args)) -def quote_identifier( - name, quotechar="`", force=False, base_identifiers=identifiers.base_identifiers -): +def quote_identifier(name, quotechar="`", force=False): """Add quotes to the `name` identifier if needed.""" - if force or name.count(" ") or name in base_identifiers: + if force or name.count(" ") or name in identifiers.base_identifiers: return f"{quotechar}{name}{quotechar}" else: return name diff --git a/ibis/backends/flink/compiler/core.py b/ibis/backends/flink/compiler/core.py index dc3d78d964d35..f5d4c37748000 100644 --- a/ibis/backends/flink/compiler/core.py +++ b/ibis/backends/flink/compiler/core.py @@ -14,13 +14,12 @@ TableSetFormatter, ) from ibis.backends.base.sql.registry import quote_identifier -from ibis.backends.flink import identifiers from ibis.backends.flink.translator import FlinkExprTranslator class FlinkTableSetFormatter(TableSetFormatter): def _quote_identifier(self, name): - return quote_identifier(name, base_identifiers=identifiers.base_identifiers) + return quote_identifier(name, force=True) def _format_in_memory_table(self, op): names = op.schema.names diff --git a/ibis/backends/flink/identifiers.py b/ibis/backends/flink/identifiers.py deleted file mode 100644 index 0535fb445b3d5..0000000000000 --- a/ibis/backends/flink/identifiers.py +++ /dev/null @@ -1,560 +0,0 @@ -from __future__ import annotations - -# https://nightlies.apache.org/flink/flink-docs-release-1.18/docs/dev/table/sql/overview/#reserved-keywords -base_identifiers = [ - "a", - "abs", - "absolute", - "action", - "ada", - "add", - "admin", - "after", - "all", - "allocate", - "allow", - "alter", - "always", - "and", - "analyze", - "any", - "are", - "array", - "as", - "asc", - "asensitive", - "assertion", - "assignment", - "asymmetric", - "at", - "atomic", - "attribute", - "attributes", - "authorization", - "avg", - "before", - "begin", - "bernoulli", - "between", - "bigint", - "binary", - "bit", - "blob", - "boolean", - "both", - "breadth", - "by", - "bytes", - "c", - "call", - "called", - "cardinality", - "cascade", - "cascaded", - "case", - "cast", - "catalog", - "catalog_name", - "ceil", - "ceiling", - "century", - "chain", - "char", - "character", - "characteristics", - "characters", - "character_length", - "character_set_catalog", - "character_set_name", - "character_set_schema", - "char_length", - "check", - "class_origin", - "clob", - "close", - "coalesce", - "cobol", - "collate", - "collation", - "collation_catalog", - "collation_name", - "collation_schema", - "collect", - "column", - "columns", - "column_name", - "command_function", - "command_function_code", - "commit", - "committed", - "condition", - "condition_number", - "connect", - "connection", - "connection_name", - "constraint", - "constraints", - "constraint_catalog", - "constraint_name", - "constraint_schema", - "constructor", - "contains", - "continue", - "convert", - "corr", - "corresponding", - "count", - "covar_pop", - "covar_samp", - "create", - "cross", - "cube", - "cume_dist", - "current", - "current_catalog", - "current_date", - "current_default_transform_group", - "current_path", - "current_role", - "current_schema", - "current_time", - "current_timestamp", - "current_transform_group_for_type", - "current_user", - "cursor", - "cursor_name", - "cycle", - "data", - "database", - "date", - "datetime_interval_code", - "datetime_interval_precision", - "day", - "deallocate", - "dec", - "decade", - "decimal", - "declare", - "default", - "defaults", - "deferrable", - "deferred", - "defined", - "definer", - "degree", - "delete", - "dense_rank", - "depth", - "deref", - "derived", - "desc", - "describe", - "description", - "descriptor", - "deterministic", - "diagnostics", - "disallow", - "disconnect", - "dispatch", - "distinct", - "domain", - "double", - "dow", - "doy", - "drop", - "dynamic", - "dynamic_function", - "dynamic_function_code", - "each", - "element", - "else", - "end", - "end-exec", - "epoch", - "equals", - "escape", - "every", - "except", - "exception", - "exclude", - "excluding", - "exec", - "execute", - "exists", - "exp", - "explain", - "extend", - "external", - "extract", - "false", - "fetch", - "filter", - "final", - "first", - "first_value", - "float", - "floor", - "following", - "for", - "foreign", - "fortran", - "found", - "frac_second", - "free", - "from", - "full", - "function", - "fusion", - "g", - "general", - "generated", - "get", - "global", - "go", - "goto", - "grant", - "granted", - "group", - "grouping", - "having", - "hierarchy", - "hold", - "hour", - "identity", - "immediate", - "implementation", - "import", - "in", - "including", - "increment", - "indicator", - "initially", - "inner", - "inout", - "input", - "insensitive", - "insert", - "instance", - "instantiable", - "int", - "integer", - "intersect", - "intersection", - "interval", - "into", - "invoker", - "is", - "isolation", - "java", - "join", - "k", - "key", - "key_member", - "key_type", - "label", - "language", - "large", - "last", - "last_value", - "lateral", - "leading", - "left", - "length", - "level", - "library", - "like", - "limit", - "ln", - "local", - "localtime", - "localtimestamp", - "locator", - "lower", - "m", - "map", - "match", - "matched", - "max", - "maxvalue", - "member", - "merge", - "message_length", - "message_octet_length", - "message_text", - "method", - "microsecond", - "millennium", - "min", - "minute", - "minvalue", - "mod", - "modifies", - "module", - "modules", - "month", - "more", - "multiset", - "mumps", - "name", - "names", - "national", - "natural", - "nchar", - "nclob", - "nesting", - "new", - "next", - "no", - "none", - "normalize", - "normalized", - "not", - "null", - "nullable", - "nullif", - "nulls", - "number", - "numeric", - "object", - "octets", - "octet_length", - "of", - "offset", - "old", - "on", - "only", - "open", - "option", - "options", - "or", - "order", - "ordering", - "ordinality", - "others", - "out", - "outer", - "output", - "over", - "overlaps", - "overlay", - "overriding", - "pad", - "parameter", - "parameter_mode", - "parameter_name", - "parameter_ordinal_position", - "parameter_specific_catalog", - "parameter_specific_name", - "parameter_specific_schema", - "partial", - "partition", - "pascal", - "passthrough", - "path", - "percentile_cont", - "percentile_disc", - "percent_rank", - "placing", - "plan", - "pli", - "position", - "power", - "preceding", - "precision", - "prepare", - "preserve", - "primary", - "prior", - "privileges", - "procedure", - "public", - "quarter", - "range", - "rank", - "raw", - "read", - "reads", - "real", - "recursive", - "ref", - "references", - "referencing", - "regr_avgx", - "regr_avgy", - "regr_count", - "regr_intercept", - "regr_r2", - "regr_slope", - "regr_sxx", - "regr_sxy", - "regr_syy", - "relative", - "release", - "repeatable", - "reset", - "restart", - "restrict", - "result", - "return", - "returned_cardinality", - "returned_length", - "returned_octet_length", - "returned_sqlstate", - "returns", - "revoke", - "right", - "role", - "rollback", - "rollup", - "routine", - "routine_catalog", - "routine_name", - "routine_schema", - "row", - "rows", - "row_count", - "row_number", - "savepoint", - "scale", - "schema", - "schema_name", - "scope", - "scope_catalogs", - "scope_name", - "scope_schema", - "scroll", - "search", - "second", - "section", - "security", - "select", - "self", - "sensitive", - "sequence", - "serializable", - "server", - "server_name", - "session", - "session_user", - "set", - "sets", - "similar", - "simple", - "size", - "smallint", - "some", - "source", - "space", - "specific", - "specifictype", - "specific_name", - "sql", - "sqlexception", - "sqlstate", - "sqlwarning", - "sql_tsi_day", - "sql_tsi_frac_second", - "sql_tsi_hour", - "sql_tsi_microsecond", - "sql_tsi_minute", - "sql_tsi_month", - "sql_tsi_quarter", - "sql_tsi_second", - "sql_tsi_week", - "sql_tsi_year", - "sqrt", - "start", - "state", - "statement", - "static", - "statistics", - "stddev_pop", - "stddev_samp", - "stream", - "string", - "structure", - "style", - "subclass_origin", - "submultiset", - "substitute", - "substring", - "sum", - "symmetric", - "system", - "system_user", - "table", - "tablesample", - "table_name", - "temporary", - "then", - "ties", - "time", - "timestamp", - "timestampadd", - "timestampdiff", - "timezone_hour", - "timezone_minute", - "tinyint", - "to", - "top_level_count", - "trailing", - "transaction", - "transactions_active", - "transactions_committed", - "transactions_rolled_back", - "transform", - "transforms", - "translate", - "translation", - "treat", - "trigger", - "trigger_catalog", - "trigger_name", - "trigger_schema", - "trim", - "true", - "type", - "uescape", - "unbounded", - "uncommitted", - "under", - "union", - "unique", - "unknown", - "unnamed", - "unnest", - "update", - "upper", - "upsert", - "usage", - "user", - "user_defined_type_catalog", - "user_defined_type_code", - "user_defined_type_name", - "user_defined_type_schema", - "using", - "value", - "values", - "varbinary", - "varchar", - "varying", - "var_pop", - "var_samp", - "version", - "view", - "week", - "when", - "whenever", - "where", - "width_bucket", - "window", - "with", - "within", - "without", - "work", - "wrapper", - "write", - "xml", - "year", - "zone", -] diff --git a/pyproject.toml b/pyproject.toml index 347350ff367b1..a4ac63034d3de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -376,7 +376,7 @@ show_deps = true [tool.codespell] # local codespell matches `./docs`, pre-commit codespell matches `docs` skip = "*.lock,.direnv,.git,./docs/_freeze,docs/_freeze/**,*.svg,*.css,*.html,*.js" -ignore-regex = '\b(i[if]f|I[IF]F|AFE|inout)\b' +ignore-regex = '\b(i[if]f|I[IF]F|AFE)\b' builtin = "clear,rare,names" [tool.ruff]