sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27 unit_to_var, 28) 29from sqlglot.helper import seq_get, split_num_words 30from sqlglot.tokens import TokenType 31 32if t.TYPE_CHECKING: 33 from sqlglot._typing import E, Lit 34 35logger = logging.getLogger("sqlglot") 36 37 38def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 39 if not expression.find_ancestor(exp.From, exp.Join): 40 return self.values_sql(expression) 41 42 structs = [] 43 alias = expression.args.get("alias") 44 for tup in expression.find_all(exp.Tuple): 45 field_aliases = ( 46 alias.columns 47 if alias and alias.columns 48 else (f"_c{i}" for i in range(len(tup.expressions))) 49 ) 50 expressions = [ 51 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 52 for name, fld in zip(field_aliases, tup.expressions) 53 ] 54 structs.append(exp.Struct(expressions=expressions)) 55 56 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 57 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 58 return self.unnest_sql( 59 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 60 ) 61 62 63def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 64 this = expression.this 65 if isinstance(this, exp.Schema): 66 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 67 else: 68 this = self.sql(this) 69 return f"RETURNS {this}" 70 71 72def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 73 returns = expression.find(exp.ReturnsProperty) 74 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 75 expression.set("kind", "TABLE FUNCTION") 76 77 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 78 expression.set("expression", expression.expression.this) 79 80 return self.create_sql(expression) 81 82 83def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 84 """Remove references to unnest table aliases since bigquery doesn't allow them. 85 86 These are added by the optimizer's qualify_column step. 87 """ 88 from sqlglot.optimizer.scope import find_all_in_scope 89 90 if isinstance(expression, exp.Select): 91 unnest_aliases = { 92 unnest.alias 93 for unnest in find_all_in_scope(expression, exp.Unnest) 94 if isinstance(unnest.parent, (exp.From, exp.Join)) 95 } 96 if unnest_aliases: 97 for column in expression.find_all(exp.Column): 98 if column.table in unnest_aliases: 99 column.set("table", None) 100 elif column.db in unnest_aliases: 101 column.set("db", None) 102 103 return expression 104 105 106# https://issuetracker.google.com/issues/162294746 107# workaround for bigquery bug when grouping by an expression and then ordering 108# WITH x AS (SELECT 1 y) 109# SELECT y + 1 z 110# FROM x 111# GROUP BY x + 1 112# ORDER by z 113def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 114 if isinstance(expression, exp.Select): 115 group = expression.args.get("group") 116 order = expression.args.get("order") 117 118 if group and order: 119 aliases = { 120 select.this: select.args["alias"] 121 for select in expression.selects 122 if isinstance(select, exp.Alias) 123 } 124 125 for grouped in group.expressions: 126 if grouped.is_int: 127 continue 128 alias = aliases.get(grouped) 129 if alias: 130 grouped.replace(exp.column(alias)) 131 132 return expression 133 134 135def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 136 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 137 if isinstance(expression, exp.CTE) and expression.alias_column_names: 138 cte_query = expression.this 139 140 if cte_query.is_star: 141 logger.warning( 142 "Can't push down CTE column names for star queries. Run the query through" 143 " the optimizer or use 'qualify' to expand the star projections first." 144 ) 145 return expression 146 147 column_names = expression.alias_column_names 148 expression.args["alias"].set("columns", None) 149 150 for name, select in zip(column_names, cte_query.selects): 151 to_replace = select 152 153 if isinstance(select, exp.Alias): 154 select = select.this 155 156 # Inner aliases are shadowed by the CTE column names 157 to_replace.replace(exp.alias_(select, name)) 158 159 return expression 160 161 162def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 163 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 164 this.set("zone", seq_get(args, 2)) 165 return this 166 167 168def _build_timestamp(args: t.List) -> exp.Timestamp: 169 timestamp = exp.Timestamp.from_arg_list(args) 170 timestamp.set("with_tz", True) 171 return timestamp 172 173 174def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 175 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 176 return expr_type.from_arg_list(args) 177 178 179def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 180 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 181 arg = seq_get(args, 0) 182 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 183 184 185def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 186 return self.sql( 187 exp.Exists( 188 this=exp.select("1") 189 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 190 .where(exp.column("_col").eq(expression.right)) 191 ) 192 ) 193 194 195def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 196 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 197 198 199def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 200 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 201 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 202 unit = unit_to_var(expression) 203 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 204 205 206def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 207 scale = expression.args.get("scale") 208 timestamp = expression.this 209 210 if scale in (None, exp.UnixToTime.SECONDS): 211 return self.func("TIMESTAMP_SECONDS", timestamp) 212 if scale == exp.UnixToTime.MILLIS: 213 return self.func("TIMESTAMP_MILLIS", timestamp) 214 if scale == exp.UnixToTime.MICROS: 215 return self.func("TIMESTAMP_MICROS", timestamp) 216 217 unix_seconds = exp.cast( 218 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 219 ) 220 return self.func("TIMESTAMP_SECONDS", unix_seconds) 221 222 223def _build_time(args: t.List) -> exp.Func: 224 if len(args) == 1: 225 return exp.TsOrDsToTime(this=args[0]) 226 if len(args) == 3: 227 return exp.TimeFromParts.from_arg_list(args) 228 229 return exp.Anonymous(this="TIME", expressions=args) 230 231 232class BigQuery(Dialect): 233 WEEK_OFFSET = -1 234 UNNEST_COLUMN_ONLY = True 235 SUPPORTS_USER_DEFINED_TYPES = False 236 SUPPORTS_SEMI_ANTI_JOIN = False 237 LOG_BASE_FIRST = False 238 239 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 240 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 241 242 # bigquery udfs are case sensitive 243 NORMALIZE_FUNCTIONS = False 244 245 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 246 TIME_MAPPING = { 247 "%D": "%m/%d/%y", 248 "%E*S": "%S.%f", 249 "%E6S": "%S.%f", 250 } 251 252 FORMAT_MAPPING = { 253 "DD": "%d", 254 "MM": "%m", 255 "MON": "%b", 256 "MONTH": "%B", 257 "YYYY": "%Y", 258 "YY": "%y", 259 "HH": "%I", 260 "HH12": "%I", 261 "HH24": "%H", 262 "MI": "%M", 263 "SS": "%S", 264 "SSSSS": "%f", 265 "TZH": "%z", 266 } 267 268 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 269 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 270 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 271 272 def normalize_identifier(self, expression: E) -> E: 273 if isinstance(expression, exp.Identifier): 274 parent = expression.parent 275 while isinstance(parent, exp.Dot): 276 parent = parent.parent 277 278 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 279 # by default. The following check uses a heuristic to detect tables based on whether 280 # they are qualified. This should generally be correct, because tables in BigQuery 281 # must be qualified with at least a dataset, unless @@dataset_id is set. 282 case_sensitive = ( 283 isinstance(parent, exp.UserDefinedFunction) 284 or ( 285 isinstance(parent, exp.Table) 286 and parent.db 287 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 288 ) 289 or expression.meta.get("is_table") 290 ) 291 if not case_sensitive: 292 expression.set("this", expression.this.lower()) 293 294 return expression 295 296 class Tokenizer(tokens.Tokenizer): 297 QUOTES = ["'", '"', '"""', "'''"] 298 COMMENTS = ["--", "#", ("/*", "*/")] 299 IDENTIFIERS = ["`"] 300 STRING_ESCAPES = ["\\"] 301 302 HEX_STRINGS = [("0x", ""), ("0X", "")] 303 304 BYTE_STRINGS = [ 305 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 306 ] 307 308 RAW_STRINGS = [ 309 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 310 ] 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "ANY TYPE": TokenType.VARIANT, 315 "BEGIN": TokenType.COMMAND, 316 "BEGIN TRANSACTION": TokenType.BEGIN, 317 "BYTES": TokenType.BINARY, 318 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 319 "DATETIME": TokenType.TIMESTAMP, 320 "DECLARE": TokenType.COMMAND, 321 "ELSEIF": TokenType.COMMAND, 322 "EXCEPTION": TokenType.COMMAND, 323 "FLOAT64": TokenType.DOUBLE, 324 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 325 "MODEL": TokenType.MODEL, 326 "NOT DETERMINISTIC": TokenType.VOLATILE, 327 "RECORD": TokenType.STRUCT, 328 "TIMESTAMP": TokenType.TIMESTAMPTZ, 329 } 330 KEYWORDS.pop("DIV") 331 KEYWORDS.pop("VALUES") 332 333 class Parser(parser.Parser): 334 PREFIXED_PIVOT_COLUMNS = True 335 LOG_DEFAULTS_TO_LN = True 336 SUPPORTS_IMPLICIT_UNNEST = True 337 338 FUNCTIONS = { 339 **parser.Parser.FUNCTIONS, 340 "DATE": _build_date, 341 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 342 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 343 "DATE_TRUNC": lambda args: exp.DateTrunc( 344 unit=exp.Literal.string(str(seq_get(args, 1))), 345 this=seq_get(args, 0), 346 ), 347 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 348 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 349 "DIV": binary_from_function(exp.IntDiv), 350 "FORMAT_DATE": lambda args: exp.TimeToStr( 351 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 352 ), 353 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 354 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 355 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 356 ), 357 "MD5": exp.MD5Digest.from_arg_list, 358 "TO_HEX": _build_to_hex, 359 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 360 [seq_get(args, 1), seq_get(args, 0)] 361 ), 362 "PARSE_TIMESTAMP": _build_parse_timestamp, 363 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 364 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 365 this=seq_get(args, 0), 366 expression=seq_get(args, 1), 367 position=seq_get(args, 2), 368 occurrence=seq_get(args, 3), 369 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 370 ), 371 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 372 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 373 "SPLIT": lambda args: exp.Split( 374 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 375 this=seq_get(args, 0), 376 expression=seq_get(args, 1) or exp.Literal.string(","), 377 ), 378 "TIME": _build_time, 379 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 380 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 381 "TIMESTAMP": _build_timestamp, 382 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 383 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 384 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 385 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 386 ), 387 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 388 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 389 ), 390 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 391 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 392 } 393 394 FUNCTION_PARSERS = { 395 **parser.Parser.FUNCTION_PARSERS, 396 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 397 } 398 FUNCTION_PARSERS.pop("TRIM") 399 400 NO_PAREN_FUNCTIONS = { 401 **parser.Parser.NO_PAREN_FUNCTIONS, 402 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 403 } 404 405 NESTED_TYPE_TOKENS = { 406 *parser.Parser.NESTED_TYPE_TOKENS, 407 TokenType.TABLE, 408 } 409 410 PROPERTY_PARSERS = { 411 **parser.Parser.PROPERTY_PARSERS, 412 "NOT DETERMINISTIC": lambda self: self.expression( 413 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 414 ), 415 "OPTIONS": lambda self: self._parse_with_property(), 416 } 417 418 CONSTRAINT_PARSERS = { 419 **parser.Parser.CONSTRAINT_PARSERS, 420 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 421 } 422 423 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 424 RANGE_PARSERS.pop(TokenType.OVERLAPS) 425 426 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 427 428 STATEMENT_PARSERS = { 429 **parser.Parser.STATEMENT_PARSERS, 430 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 431 TokenType.END: lambda self: self._parse_as_command(self._prev), 432 TokenType.FOR: lambda self: self._parse_for_in(), 433 } 434 435 BRACKET_OFFSETS = { 436 "OFFSET": (0, False), 437 "ORDINAL": (1, False), 438 "SAFE_OFFSET": (0, True), 439 "SAFE_ORDINAL": (1, True), 440 } 441 442 def _parse_for_in(self) -> exp.ForIn: 443 this = self._parse_range() 444 self._match_text_seq("DO") 445 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 446 447 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 448 this = super()._parse_table_part(schema=schema) or self._parse_number() 449 450 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 451 if isinstance(this, exp.Identifier): 452 table_name = this.name 453 while self._match(TokenType.DASH, advance=False) and self._next: 454 text = "" 455 while self._curr and self._curr.token_type != TokenType.DOT: 456 self._advance() 457 text += self._prev.text 458 table_name += text 459 460 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 461 elif isinstance(this, exp.Literal): 462 table_name = this.name 463 464 if self._is_connected() and self._parse_var(any_token=True): 465 table_name += self._prev.text 466 467 this = exp.Identifier(this=table_name, quoted=True) 468 469 return this 470 471 def _parse_table_parts( 472 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 473 ) -> exp.Table: 474 table = super()._parse_table_parts( 475 schema=schema, is_db_reference=is_db_reference, wildcard=True 476 ) 477 478 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 479 if not table.catalog: 480 if table.db: 481 parts = table.db.split(".") 482 if len(parts) == 2 and not table.args["db"].quoted: 483 table.set("catalog", exp.Identifier(this=parts[0])) 484 table.set("db", exp.Identifier(this=parts[1])) 485 else: 486 parts = table.name.split(".") 487 if len(parts) == 2 and not table.this.quoted: 488 table.set("db", exp.Identifier(this=parts[0])) 489 table.set("this", exp.Identifier(this=parts[1])) 490 491 if any("." in p.name for p in table.parts): 492 catalog, db, this, *rest = ( 493 exp.to_identifier(p, quoted=True) 494 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 495 ) 496 497 if rest and this: 498 this = exp.Dot.build([this, *rest]) # type: ignore 499 500 table = exp.Table(this=this, db=db, catalog=catalog) 501 table.meta["quoted_table"] = True 502 503 return table 504 505 @t.overload 506 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 507 508 @t.overload 509 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 510 511 def _parse_json_object(self, agg=False): 512 json_object = super()._parse_json_object() 513 array_kv_pair = seq_get(json_object.expressions, 0) 514 515 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 516 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 517 if ( 518 array_kv_pair 519 and isinstance(array_kv_pair.this, exp.Array) 520 and isinstance(array_kv_pair.expression, exp.Array) 521 ): 522 keys = array_kv_pair.this.expressions 523 values = array_kv_pair.expression.expressions 524 525 json_object.set( 526 "expressions", 527 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 528 ) 529 530 return json_object 531 532 def _parse_bracket( 533 self, this: t.Optional[exp.Expression] = None 534 ) -> t.Optional[exp.Expression]: 535 bracket = super()._parse_bracket(this) 536 537 if this is bracket: 538 return bracket 539 540 if isinstance(bracket, exp.Bracket): 541 for expression in bracket.expressions: 542 name = expression.name.upper() 543 544 if name not in self.BRACKET_OFFSETS: 545 break 546 547 offset, safe = self.BRACKET_OFFSETS[name] 548 bracket.set("offset", offset) 549 bracket.set("safe", safe) 550 expression.replace(expression.expressions[0]) 551 552 return bracket 553 554 class Generator(generator.Generator): 555 EXPLICIT_UNION = True 556 INTERVAL_ALLOWS_PLURAL_FORM = False 557 JOIN_HINTS = False 558 QUERY_HINTS = False 559 TABLE_HINTS = False 560 LIMIT_FETCH = "LIMIT" 561 RENAME_TABLE_WITH_DB = False 562 NVL2_SUPPORTED = False 563 UNNEST_WITH_ORDINALITY = False 564 COLLATE_IS_FUNC = True 565 LIMIT_ONLY_LITERALS = True 566 SUPPORTS_TABLE_ALIAS_COLUMNS = False 567 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 568 JSON_KEY_VALUE_PAIR_SEP = "," 569 NULL_ORDERING_SUPPORTED = False 570 IGNORE_NULLS_IN_FUNC = True 571 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 572 CAN_IMPLEMENT_ARRAY_ANY = True 573 SUPPORTS_TO_NUMBER = False 574 NAMED_PLACEHOLDER_TOKEN = "@" 575 576 TRANSFORMS = { 577 **generator.Generator.TRANSFORMS, 578 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 579 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 580 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 581 exp.Array: inline_array_unless_query, 582 exp.ArrayContains: _array_contains_sql, 583 exp.ArrayFilter: filter_array_using_unnest, 584 exp.ArraySize: rename_func("ARRAY_LENGTH"), 585 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 586 exp.CollateProperty: lambda self, e: ( 587 f"DEFAULT COLLATE {self.sql(e, 'this')}" 588 if e.args.get("default") 589 else f"COLLATE {self.sql(e, 'this')}" 590 ), 591 exp.Commit: lambda *_: "COMMIT TRANSACTION", 592 exp.CountIf: rename_func("COUNTIF"), 593 exp.Create: _create_sql, 594 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 595 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 596 exp.DateDiff: lambda self, e: self.func( 597 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 598 ), 599 exp.DateFromParts: rename_func("DATE"), 600 exp.DateStrToDate: datestrtodate_sql, 601 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 602 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 603 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 604 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 605 exp.FromTimeZone: lambda self, e: self.func( 606 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 607 ), 608 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 609 exp.GroupConcat: rename_func("STRING_AGG"), 610 exp.Hex: rename_func("TO_HEX"), 611 exp.If: if_sql(false_value="NULL"), 612 exp.ILike: no_ilike_sql, 613 exp.IntDiv: rename_func("DIV"), 614 exp.JSONFormat: rename_func("TO_JSON_STRING"), 615 exp.Max: max_or_greatest, 616 exp.Mod: rename_func("MOD"), 617 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 618 exp.MD5Digest: rename_func("MD5"), 619 exp.Min: min_or_least, 620 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 621 exp.RegexpExtract: lambda self, e: self.func( 622 "REGEXP_EXTRACT", 623 e.this, 624 e.expression, 625 e.args.get("position"), 626 e.args.get("occurrence"), 627 ), 628 exp.RegexpReplace: regexp_replace_sql, 629 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 630 exp.ReturnsProperty: _returnsproperty_sql, 631 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 632 exp.Select: transforms.preprocess( 633 [ 634 transforms.explode_to_unnest(), 635 _unqualify_unnest, 636 transforms.eliminate_distinct_on, 637 _alias_ordered_group, 638 transforms.eliminate_semi_and_anti_joins, 639 ] 640 ), 641 exp.SHA2: lambda self, e: self.func( 642 "SHA256" if e.text("length") == "256" else "SHA512", e.this 643 ), 644 exp.StabilityProperty: lambda self, e: ( 645 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 646 ), 647 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 648 exp.StrToTime: lambda self, e: self.func( 649 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 650 ), 651 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 652 exp.TimeFromParts: rename_func("TIME"), 653 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 654 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 655 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 656 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 657 exp.TimeStrToTime: timestrtotime_sql, 658 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 659 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 660 exp.TsOrDsAdd: _ts_or_ds_add_sql, 661 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 662 exp.TsOrDsToTime: rename_func("TIME"), 663 exp.Unhex: rename_func("FROM_HEX"), 664 exp.UnixDate: rename_func("UNIX_DATE"), 665 exp.UnixToTime: _unix_to_time_sql, 666 exp.Values: _derived_table_values_to_unnest, 667 exp.VariancePop: rename_func("VAR_POP"), 668 } 669 670 SUPPORTED_JSON_PATH_PARTS = { 671 exp.JSONPathKey, 672 exp.JSONPathRoot, 673 exp.JSONPathSubscript, 674 } 675 676 TYPE_MAPPING = { 677 **generator.Generator.TYPE_MAPPING, 678 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 679 exp.DataType.Type.BIGINT: "INT64", 680 exp.DataType.Type.BINARY: "BYTES", 681 exp.DataType.Type.BOOLEAN: "BOOL", 682 exp.DataType.Type.CHAR: "STRING", 683 exp.DataType.Type.DECIMAL: "NUMERIC", 684 exp.DataType.Type.DOUBLE: "FLOAT64", 685 exp.DataType.Type.FLOAT: "FLOAT64", 686 exp.DataType.Type.INT: "INT64", 687 exp.DataType.Type.NCHAR: "STRING", 688 exp.DataType.Type.NVARCHAR: "STRING", 689 exp.DataType.Type.SMALLINT: "INT64", 690 exp.DataType.Type.TEXT: "STRING", 691 exp.DataType.Type.TIMESTAMP: "DATETIME", 692 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 693 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 694 exp.DataType.Type.TINYINT: "INT64", 695 exp.DataType.Type.VARBINARY: "BYTES", 696 exp.DataType.Type.VARCHAR: "STRING", 697 exp.DataType.Type.VARIANT: "ANY TYPE", 698 } 699 700 PROPERTIES_LOCATION = { 701 **generator.Generator.PROPERTIES_LOCATION, 702 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 703 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 704 } 705 706 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 707 RESERVED_KEYWORDS = { 708 *generator.Generator.RESERVED_KEYWORDS, 709 "all", 710 "and", 711 "any", 712 "array", 713 "as", 714 "asc", 715 "assert_rows_modified", 716 "at", 717 "between", 718 "by", 719 "case", 720 "cast", 721 "collate", 722 "contains", 723 "create", 724 "cross", 725 "cube", 726 "current", 727 "default", 728 "define", 729 "desc", 730 "distinct", 731 "else", 732 "end", 733 "enum", 734 "escape", 735 "except", 736 "exclude", 737 "exists", 738 "extract", 739 "false", 740 "fetch", 741 "following", 742 "for", 743 "from", 744 "full", 745 "group", 746 "grouping", 747 "groups", 748 "hash", 749 "having", 750 "if", 751 "ignore", 752 "in", 753 "inner", 754 "intersect", 755 "interval", 756 "into", 757 "is", 758 "join", 759 "lateral", 760 "left", 761 "like", 762 "limit", 763 "lookup", 764 "merge", 765 "natural", 766 "new", 767 "no", 768 "not", 769 "null", 770 "nulls", 771 "of", 772 "on", 773 "or", 774 "order", 775 "outer", 776 "over", 777 "partition", 778 "preceding", 779 "proto", 780 "qualify", 781 "range", 782 "recursive", 783 "respect", 784 "right", 785 "rollup", 786 "rows", 787 "select", 788 "set", 789 "some", 790 "struct", 791 "tablesample", 792 "then", 793 "to", 794 "treat", 795 "true", 796 "unbounded", 797 "union", 798 "unnest", 799 "using", 800 "when", 801 "where", 802 "window", 803 "with", 804 "within", 805 } 806 807 def table_parts(self, expression: exp.Table) -> str: 808 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 809 # we need to make sure the correct quoting is used in each case. 810 # 811 # For example, if there is a CTE x that clashes with a schema name, then the former will 812 # return the table y in that schema, whereas the latter will return the CTE's y column: 813 # 814 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 815 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 816 if expression.meta.get("quoted_table"): 817 table_parts = ".".join(p.name for p in expression.parts) 818 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 819 820 return super().table_parts(expression) 821 822 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 823 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 824 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 825 826 def eq_sql(self, expression: exp.EQ) -> str: 827 # Operands of = cannot be NULL in BigQuery 828 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 829 if not isinstance(expression.parent, exp.Update): 830 return "NULL" 831 832 return self.binary(expression, "=") 833 834 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 835 parent = expression.parent 836 837 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 838 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 839 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 840 return self.func( 841 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 842 ) 843 844 return super().attimezone_sql(expression) 845 846 def trycast_sql(self, expression: exp.TryCast) -> str: 847 return self.cast_sql(expression, safe_prefix="SAFE_") 848 849 def bracket_sql(self, expression: exp.Bracket) -> str: 850 this = expression.this 851 expressions = expression.expressions 852 853 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 854 arg = expressions[0] 855 if arg.type is None: 856 from sqlglot.optimizer.annotate_types import annotate_types 857 858 arg = annotate_types(arg) 859 860 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 861 # BQ doesn't support bracket syntax with string values for structs 862 return f"{self.sql(this)}.{arg.name}" 863 864 expressions_sql = self.expressions(expression, flat=True) 865 offset = expression.args.get("offset") 866 867 if offset == 0: 868 expressions_sql = f"OFFSET({expressions_sql})" 869 elif offset == 1: 870 expressions_sql = f"ORDINAL({expressions_sql})" 871 elif offset is not None: 872 self.unsupported(f"Unsupported array offset: {offset}") 873 874 if expression.args.get("safe"): 875 expressions_sql = f"SAFE_{expressions_sql}" 876 877 return f"{self.sql(this)}[{expressions_sql}]" 878 879 def in_unnest_op(self, expression: exp.Unnest) -> str: 880 return self.sql(expression) 881 882 def except_op(self, expression: exp.Except) -> str: 883 if not expression.args.get("distinct"): 884 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 885 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 886 887 def intersect_op(self, expression: exp.Intersect) -> str: 888 if not expression.args.get("distinct"): 889 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 890 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 891 892 def with_properties(self, properties: exp.Properties) -> str: 893 return self.properties(properties, prefix=self.seg("OPTIONS")) 894 895 def version_sql(self, expression: exp.Version) -> str: 896 if expression.name == "TIMESTAMP": 897 expression.set("this", "SYSTEM_TIME") 898 return super().version_sql(expression)
233class BigQuery(Dialect): 234 WEEK_OFFSET = -1 235 UNNEST_COLUMN_ONLY = True 236 SUPPORTS_USER_DEFINED_TYPES = False 237 SUPPORTS_SEMI_ANTI_JOIN = False 238 LOG_BASE_FIRST = False 239 240 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 241 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 242 243 # bigquery udfs are case sensitive 244 NORMALIZE_FUNCTIONS = False 245 246 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 247 TIME_MAPPING = { 248 "%D": "%m/%d/%y", 249 "%E*S": "%S.%f", 250 "%E6S": "%S.%f", 251 } 252 253 FORMAT_MAPPING = { 254 "DD": "%d", 255 "MM": "%m", 256 "MON": "%b", 257 "MONTH": "%B", 258 "YYYY": "%Y", 259 "YY": "%y", 260 "HH": "%I", 261 "HH12": "%I", 262 "HH24": "%H", 263 "MI": "%M", 264 "SS": "%S", 265 "SSSSS": "%f", 266 "TZH": "%z", 267 } 268 269 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 270 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 271 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 272 273 def normalize_identifier(self, expression: E) -> E: 274 if isinstance(expression, exp.Identifier): 275 parent = expression.parent 276 while isinstance(parent, exp.Dot): 277 parent = parent.parent 278 279 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 280 # by default. The following check uses a heuristic to detect tables based on whether 281 # they are qualified. This should generally be correct, because tables in BigQuery 282 # must be qualified with at least a dataset, unless @@dataset_id is set. 283 case_sensitive = ( 284 isinstance(parent, exp.UserDefinedFunction) 285 or ( 286 isinstance(parent, exp.Table) 287 and parent.db 288 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 289 ) 290 or expression.meta.get("is_table") 291 ) 292 if not case_sensitive: 293 expression.set("this", expression.this.lower()) 294 295 return expression 296 297 class Tokenizer(tokens.Tokenizer): 298 QUOTES = ["'", '"', '"""', "'''"] 299 COMMENTS = ["--", "#", ("/*", "*/")] 300 IDENTIFIERS = ["`"] 301 STRING_ESCAPES = ["\\"] 302 303 HEX_STRINGS = [("0x", ""), ("0X", "")] 304 305 BYTE_STRINGS = [ 306 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 307 ] 308 309 RAW_STRINGS = [ 310 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 311 ] 312 313 KEYWORDS = { 314 **tokens.Tokenizer.KEYWORDS, 315 "ANY TYPE": TokenType.VARIANT, 316 "BEGIN": TokenType.COMMAND, 317 "BEGIN TRANSACTION": TokenType.BEGIN, 318 "BYTES": TokenType.BINARY, 319 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 320 "DATETIME": TokenType.TIMESTAMP, 321 "DECLARE": TokenType.COMMAND, 322 "ELSEIF": TokenType.COMMAND, 323 "EXCEPTION": TokenType.COMMAND, 324 "FLOAT64": TokenType.DOUBLE, 325 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 326 "MODEL": TokenType.MODEL, 327 "NOT DETERMINISTIC": TokenType.VOLATILE, 328 "RECORD": TokenType.STRUCT, 329 "TIMESTAMP": TokenType.TIMESTAMPTZ, 330 } 331 KEYWORDS.pop("DIV") 332 KEYWORDS.pop("VALUES") 333 334 class Parser(parser.Parser): 335 PREFIXED_PIVOT_COLUMNS = True 336 LOG_DEFAULTS_TO_LN = True 337 SUPPORTS_IMPLICIT_UNNEST = True 338 339 FUNCTIONS = { 340 **parser.Parser.FUNCTIONS, 341 "DATE": _build_date, 342 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 343 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 344 "DATE_TRUNC": lambda args: exp.DateTrunc( 345 unit=exp.Literal.string(str(seq_get(args, 1))), 346 this=seq_get(args, 0), 347 ), 348 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 349 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 350 "DIV": binary_from_function(exp.IntDiv), 351 "FORMAT_DATE": lambda args: exp.TimeToStr( 352 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 353 ), 354 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 355 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 356 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 357 ), 358 "MD5": exp.MD5Digest.from_arg_list, 359 "TO_HEX": _build_to_hex, 360 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 361 [seq_get(args, 1), seq_get(args, 0)] 362 ), 363 "PARSE_TIMESTAMP": _build_parse_timestamp, 364 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 365 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 366 this=seq_get(args, 0), 367 expression=seq_get(args, 1), 368 position=seq_get(args, 2), 369 occurrence=seq_get(args, 3), 370 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 371 ), 372 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 373 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 374 "SPLIT": lambda args: exp.Split( 375 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 376 this=seq_get(args, 0), 377 expression=seq_get(args, 1) or exp.Literal.string(","), 378 ), 379 "TIME": _build_time, 380 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 381 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 382 "TIMESTAMP": _build_timestamp, 383 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 384 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 385 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 386 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 387 ), 388 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 389 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 390 ), 391 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 392 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 393 } 394 395 FUNCTION_PARSERS = { 396 **parser.Parser.FUNCTION_PARSERS, 397 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 398 } 399 FUNCTION_PARSERS.pop("TRIM") 400 401 NO_PAREN_FUNCTIONS = { 402 **parser.Parser.NO_PAREN_FUNCTIONS, 403 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 404 } 405 406 NESTED_TYPE_TOKENS = { 407 *parser.Parser.NESTED_TYPE_TOKENS, 408 TokenType.TABLE, 409 } 410 411 PROPERTY_PARSERS = { 412 **parser.Parser.PROPERTY_PARSERS, 413 "NOT DETERMINISTIC": lambda self: self.expression( 414 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 415 ), 416 "OPTIONS": lambda self: self._parse_with_property(), 417 } 418 419 CONSTRAINT_PARSERS = { 420 **parser.Parser.CONSTRAINT_PARSERS, 421 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 422 } 423 424 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 425 RANGE_PARSERS.pop(TokenType.OVERLAPS) 426 427 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 428 429 STATEMENT_PARSERS = { 430 **parser.Parser.STATEMENT_PARSERS, 431 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 432 TokenType.END: lambda self: self._parse_as_command(self._prev), 433 TokenType.FOR: lambda self: self._parse_for_in(), 434 } 435 436 BRACKET_OFFSETS = { 437 "OFFSET": (0, False), 438 "ORDINAL": (1, False), 439 "SAFE_OFFSET": (0, True), 440 "SAFE_ORDINAL": (1, True), 441 } 442 443 def _parse_for_in(self) -> exp.ForIn: 444 this = self._parse_range() 445 self._match_text_seq("DO") 446 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 447 448 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 449 this = super()._parse_table_part(schema=schema) or self._parse_number() 450 451 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 452 if isinstance(this, exp.Identifier): 453 table_name = this.name 454 while self._match(TokenType.DASH, advance=False) and self._next: 455 text = "" 456 while self._curr and self._curr.token_type != TokenType.DOT: 457 self._advance() 458 text += self._prev.text 459 table_name += text 460 461 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 462 elif isinstance(this, exp.Literal): 463 table_name = this.name 464 465 if self._is_connected() and self._parse_var(any_token=True): 466 table_name += self._prev.text 467 468 this = exp.Identifier(this=table_name, quoted=True) 469 470 return this 471 472 def _parse_table_parts( 473 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 474 ) -> exp.Table: 475 table = super()._parse_table_parts( 476 schema=schema, is_db_reference=is_db_reference, wildcard=True 477 ) 478 479 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 480 if not table.catalog: 481 if table.db: 482 parts = table.db.split(".") 483 if len(parts) == 2 and not table.args["db"].quoted: 484 table.set("catalog", exp.Identifier(this=parts[0])) 485 table.set("db", exp.Identifier(this=parts[1])) 486 else: 487 parts = table.name.split(".") 488 if len(parts) == 2 and not table.this.quoted: 489 table.set("db", exp.Identifier(this=parts[0])) 490 table.set("this", exp.Identifier(this=parts[1])) 491 492 if any("." in p.name for p in table.parts): 493 catalog, db, this, *rest = ( 494 exp.to_identifier(p, quoted=True) 495 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 496 ) 497 498 if rest and this: 499 this = exp.Dot.build([this, *rest]) # type: ignore 500 501 table = exp.Table(this=this, db=db, catalog=catalog) 502 table.meta["quoted_table"] = True 503 504 return table 505 506 @t.overload 507 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 508 509 @t.overload 510 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 511 512 def _parse_json_object(self, agg=False): 513 json_object = super()._parse_json_object() 514 array_kv_pair = seq_get(json_object.expressions, 0) 515 516 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 517 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 518 if ( 519 array_kv_pair 520 and isinstance(array_kv_pair.this, exp.Array) 521 and isinstance(array_kv_pair.expression, exp.Array) 522 ): 523 keys = array_kv_pair.this.expressions 524 values = array_kv_pair.expression.expressions 525 526 json_object.set( 527 "expressions", 528 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 529 ) 530 531 return json_object 532 533 def _parse_bracket( 534 self, this: t.Optional[exp.Expression] = None 535 ) -> t.Optional[exp.Expression]: 536 bracket = super()._parse_bracket(this) 537 538 if this is bracket: 539 return bracket 540 541 if isinstance(bracket, exp.Bracket): 542 for expression in bracket.expressions: 543 name = expression.name.upper() 544 545 if name not in self.BRACKET_OFFSETS: 546 break 547 548 offset, safe = self.BRACKET_OFFSETS[name] 549 bracket.set("offset", offset) 550 bracket.set("safe", safe) 551 expression.replace(expression.expressions[0]) 552 553 return bracket 554 555 class Generator(generator.Generator): 556 EXPLICIT_UNION = True 557 INTERVAL_ALLOWS_PLURAL_FORM = False 558 JOIN_HINTS = False 559 QUERY_HINTS = False 560 TABLE_HINTS = False 561 LIMIT_FETCH = "LIMIT" 562 RENAME_TABLE_WITH_DB = False 563 NVL2_SUPPORTED = False 564 UNNEST_WITH_ORDINALITY = False 565 COLLATE_IS_FUNC = True 566 LIMIT_ONLY_LITERALS = True 567 SUPPORTS_TABLE_ALIAS_COLUMNS = False 568 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 569 JSON_KEY_VALUE_PAIR_SEP = "," 570 NULL_ORDERING_SUPPORTED = False 571 IGNORE_NULLS_IN_FUNC = True 572 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 573 CAN_IMPLEMENT_ARRAY_ANY = True 574 SUPPORTS_TO_NUMBER = False 575 NAMED_PLACEHOLDER_TOKEN = "@" 576 577 TRANSFORMS = { 578 **generator.Generator.TRANSFORMS, 579 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 580 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 581 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 582 exp.Array: inline_array_unless_query, 583 exp.ArrayContains: _array_contains_sql, 584 exp.ArrayFilter: filter_array_using_unnest, 585 exp.ArraySize: rename_func("ARRAY_LENGTH"), 586 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 587 exp.CollateProperty: lambda self, e: ( 588 f"DEFAULT COLLATE {self.sql(e, 'this')}" 589 if e.args.get("default") 590 else f"COLLATE {self.sql(e, 'this')}" 591 ), 592 exp.Commit: lambda *_: "COMMIT TRANSACTION", 593 exp.CountIf: rename_func("COUNTIF"), 594 exp.Create: _create_sql, 595 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 596 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 597 exp.DateDiff: lambda self, e: self.func( 598 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 599 ), 600 exp.DateFromParts: rename_func("DATE"), 601 exp.DateStrToDate: datestrtodate_sql, 602 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 603 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 604 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 605 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 606 exp.FromTimeZone: lambda self, e: self.func( 607 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 608 ), 609 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 610 exp.GroupConcat: rename_func("STRING_AGG"), 611 exp.Hex: rename_func("TO_HEX"), 612 exp.If: if_sql(false_value="NULL"), 613 exp.ILike: no_ilike_sql, 614 exp.IntDiv: rename_func("DIV"), 615 exp.JSONFormat: rename_func("TO_JSON_STRING"), 616 exp.Max: max_or_greatest, 617 exp.Mod: rename_func("MOD"), 618 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 619 exp.MD5Digest: rename_func("MD5"), 620 exp.Min: min_or_least, 621 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 622 exp.RegexpExtract: lambda self, e: self.func( 623 "REGEXP_EXTRACT", 624 e.this, 625 e.expression, 626 e.args.get("position"), 627 e.args.get("occurrence"), 628 ), 629 exp.RegexpReplace: regexp_replace_sql, 630 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 631 exp.ReturnsProperty: _returnsproperty_sql, 632 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 633 exp.Select: transforms.preprocess( 634 [ 635 transforms.explode_to_unnest(), 636 _unqualify_unnest, 637 transforms.eliminate_distinct_on, 638 _alias_ordered_group, 639 transforms.eliminate_semi_and_anti_joins, 640 ] 641 ), 642 exp.SHA2: lambda self, e: self.func( 643 "SHA256" if e.text("length") == "256" else "SHA512", e.this 644 ), 645 exp.StabilityProperty: lambda self, e: ( 646 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 647 ), 648 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 649 exp.StrToTime: lambda self, e: self.func( 650 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 651 ), 652 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 653 exp.TimeFromParts: rename_func("TIME"), 654 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 655 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 656 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 657 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 658 exp.TimeStrToTime: timestrtotime_sql, 659 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 660 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 661 exp.TsOrDsAdd: _ts_or_ds_add_sql, 662 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 663 exp.TsOrDsToTime: rename_func("TIME"), 664 exp.Unhex: rename_func("FROM_HEX"), 665 exp.UnixDate: rename_func("UNIX_DATE"), 666 exp.UnixToTime: _unix_to_time_sql, 667 exp.Values: _derived_table_values_to_unnest, 668 exp.VariancePop: rename_func("VAR_POP"), 669 } 670 671 SUPPORTED_JSON_PATH_PARTS = { 672 exp.JSONPathKey, 673 exp.JSONPathRoot, 674 exp.JSONPathSubscript, 675 } 676 677 TYPE_MAPPING = { 678 **generator.Generator.TYPE_MAPPING, 679 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 680 exp.DataType.Type.BIGINT: "INT64", 681 exp.DataType.Type.BINARY: "BYTES", 682 exp.DataType.Type.BOOLEAN: "BOOL", 683 exp.DataType.Type.CHAR: "STRING", 684 exp.DataType.Type.DECIMAL: "NUMERIC", 685 exp.DataType.Type.DOUBLE: "FLOAT64", 686 exp.DataType.Type.FLOAT: "FLOAT64", 687 exp.DataType.Type.INT: "INT64", 688 exp.DataType.Type.NCHAR: "STRING", 689 exp.DataType.Type.NVARCHAR: "STRING", 690 exp.DataType.Type.SMALLINT: "INT64", 691 exp.DataType.Type.TEXT: "STRING", 692 exp.DataType.Type.TIMESTAMP: "DATETIME", 693 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 694 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 695 exp.DataType.Type.TINYINT: "INT64", 696 exp.DataType.Type.VARBINARY: "BYTES", 697 exp.DataType.Type.VARCHAR: "STRING", 698 exp.DataType.Type.VARIANT: "ANY TYPE", 699 } 700 701 PROPERTIES_LOCATION = { 702 **generator.Generator.PROPERTIES_LOCATION, 703 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 704 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 705 } 706 707 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 708 RESERVED_KEYWORDS = { 709 *generator.Generator.RESERVED_KEYWORDS, 710 "all", 711 "and", 712 "any", 713 "array", 714 "as", 715 "asc", 716 "assert_rows_modified", 717 "at", 718 "between", 719 "by", 720 "case", 721 "cast", 722 "collate", 723 "contains", 724 "create", 725 "cross", 726 "cube", 727 "current", 728 "default", 729 "define", 730 "desc", 731 "distinct", 732 "else", 733 "end", 734 "enum", 735 "escape", 736 "except", 737 "exclude", 738 "exists", 739 "extract", 740 "false", 741 "fetch", 742 "following", 743 "for", 744 "from", 745 "full", 746 "group", 747 "grouping", 748 "groups", 749 "hash", 750 "having", 751 "if", 752 "ignore", 753 "in", 754 "inner", 755 "intersect", 756 "interval", 757 "into", 758 "is", 759 "join", 760 "lateral", 761 "left", 762 "like", 763 "limit", 764 "lookup", 765 "merge", 766 "natural", 767 "new", 768 "no", 769 "not", 770 "null", 771 "nulls", 772 "of", 773 "on", 774 "or", 775 "order", 776 "outer", 777 "over", 778 "partition", 779 "preceding", 780 "proto", 781 "qualify", 782 "range", 783 "recursive", 784 "respect", 785 "right", 786 "rollup", 787 "rows", 788 "select", 789 "set", 790 "some", 791 "struct", 792 "tablesample", 793 "then", 794 "to", 795 "treat", 796 "true", 797 "unbounded", 798 "union", 799 "unnest", 800 "using", 801 "when", 802 "where", 803 "window", 804 "with", 805 "within", 806 } 807 808 def table_parts(self, expression: exp.Table) -> str: 809 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 810 # we need to make sure the correct quoting is used in each case. 811 # 812 # For example, if there is a CTE x that clashes with a schema name, then the former will 813 # return the table y in that schema, whereas the latter will return the CTE's y column: 814 # 815 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 816 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 817 if expression.meta.get("quoted_table"): 818 table_parts = ".".join(p.name for p in expression.parts) 819 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 820 821 return super().table_parts(expression) 822 823 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 824 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 825 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 826 827 def eq_sql(self, expression: exp.EQ) -> str: 828 # Operands of = cannot be NULL in BigQuery 829 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 830 if not isinstance(expression.parent, exp.Update): 831 return "NULL" 832 833 return self.binary(expression, "=") 834 835 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 836 parent = expression.parent 837 838 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 839 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 840 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 841 return self.func( 842 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 843 ) 844 845 return super().attimezone_sql(expression) 846 847 def trycast_sql(self, expression: exp.TryCast) -> str: 848 return self.cast_sql(expression, safe_prefix="SAFE_") 849 850 def bracket_sql(self, expression: exp.Bracket) -> str: 851 this = expression.this 852 expressions = expression.expressions 853 854 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 855 arg = expressions[0] 856 if arg.type is None: 857 from sqlglot.optimizer.annotate_types import annotate_types 858 859 arg = annotate_types(arg) 860 861 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 862 # BQ doesn't support bracket syntax with string values for structs 863 return f"{self.sql(this)}.{arg.name}" 864 865 expressions_sql = self.expressions(expression, flat=True) 866 offset = expression.args.get("offset") 867 868 if offset == 0: 869 expressions_sql = f"OFFSET({expressions_sql})" 870 elif offset == 1: 871 expressions_sql = f"ORDINAL({expressions_sql})" 872 elif offset is not None: 873 self.unsupported(f"Unsupported array offset: {offset}") 874 875 if expression.args.get("safe"): 876 expressions_sql = f"SAFE_{expressions_sql}" 877 878 return f"{self.sql(this)}[{expressions_sql}]" 879 880 def in_unnest_op(self, expression: exp.Unnest) -> str: 881 return self.sql(expression) 882 883 def except_op(self, expression: exp.Except) -> str: 884 if not expression.args.get("distinct"): 885 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 886 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 887 888 def intersect_op(self, expression: exp.Intersect) -> str: 889 if not expression.args.get("distinct"): 890 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 891 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 892 893 def with_properties(self, properties: exp.Properties) -> str: 894 return self.properties(properties, prefix=self.seg("OPTIONS")) 895 896 def version_sql(self, expression: exp.Version) -> str: 897 if expression.name == "TIMESTAMP": 898 expression.set("this", "SYSTEM_TIME") 899 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
273 def normalize_identifier(self, expression: E) -> E: 274 if isinstance(expression, exp.Identifier): 275 parent = expression.parent 276 while isinstance(parent, exp.Dot): 277 parent = parent.parent 278 279 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 280 # by default. The following check uses a heuristic to detect tables based on whether 281 # they are qualified. This should generally be correct, because tables in BigQuery 282 # must be qualified with at least a dataset, unless @@dataset_id is set. 283 case_sensitive = ( 284 isinstance(parent, exp.UserDefinedFunction) 285 or ( 286 isinstance(parent, exp.Table) 287 and parent.db 288 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 289 ) 290 or expression.meta.get("is_table") 291 ) 292 if not case_sensitive: 293 expression.set("this", expression.this.lower()) 294 295 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
297 class Tokenizer(tokens.Tokenizer): 298 QUOTES = ["'", '"', '"""', "'''"] 299 COMMENTS = ["--", "#", ("/*", "*/")] 300 IDENTIFIERS = ["`"] 301 STRING_ESCAPES = ["\\"] 302 303 HEX_STRINGS = [("0x", ""), ("0X", "")] 304 305 BYTE_STRINGS = [ 306 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 307 ] 308 309 RAW_STRINGS = [ 310 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 311 ] 312 313 KEYWORDS = { 314 **tokens.Tokenizer.KEYWORDS, 315 "ANY TYPE": TokenType.VARIANT, 316 "BEGIN": TokenType.COMMAND, 317 "BEGIN TRANSACTION": TokenType.BEGIN, 318 "BYTES": TokenType.BINARY, 319 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 320 "DATETIME": TokenType.TIMESTAMP, 321 "DECLARE": TokenType.COMMAND, 322 "ELSEIF": TokenType.COMMAND, 323 "EXCEPTION": TokenType.COMMAND, 324 "FLOAT64": TokenType.DOUBLE, 325 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 326 "MODEL": TokenType.MODEL, 327 "NOT DETERMINISTIC": TokenType.VOLATILE, 328 "RECORD": TokenType.STRUCT, 329 "TIMESTAMP": TokenType.TIMESTAMPTZ, 330 } 331 KEYWORDS.pop("DIV") 332 KEYWORDS.pop("VALUES")
Inherited Members
334 class Parser(parser.Parser): 335 PREFIXED_PIVOT_COLUMNS = True 336 LOG_DEFAULTS_TO_LN = True 337 SUPPORTS_IMPLICIT_UNNEST = True 338 339 FUNCTIONS = { 340 **parser.Parser.FUNCTIONS, 341 "DATE": _build_date, 342 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 343 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 344 "DATE_TRUNC": lambda args: exp.DateTrunc( 345 unit=exp.Literal.string(str(seq_get(args, 1))), 346 this=seq_get(args, 0), 347 ), 348 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 349 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 350 "DIV": binary_from_function(exp.IntDiv), 351 "FORMAT_DATE": lambda args: exp.TimeToStr( 352 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 353 ), 354 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 355 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 356 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 357 ), 358 "MD5": exp.MD5Digest.from_arg_list, 359 "TO_HEX": _build_to_hex, 360 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 361 [seq_get(args, 1), seq_get(args, 0)] 362 ), 363 "PARSE_TIMESTAMP": _build_parse_timestamp, 364 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 365 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 366 this=seq_get(args, 0), 367 expression=seq_get(args, 1), 368 position=seq_get(args, 2), 369 occurrence=seq_get(args, 3), 370 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 371 ), 372 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 373 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 374 "SPLIT": lambda args: exp.Split( 375 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 376 this=seq_get(args, 0), 377 expression=seq_get(args, 1) or exp.Literal.string(","), 378 ), 379 "TIME": _build_time, 380 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 381 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 382 "TIMESTAMP": _build_timestamp, 383 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 384 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 385 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 386 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 387 ), 388 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 389 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 390 ), 391 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 392 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 393 } 394 395 FUNCTION_PARSERS = { 396 **parser.Parser.FUNCTION_PARSERS, 397 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 398 } 399 FUNCTION_PARSERS.pop("TRIM") 400 401 NO_PAREN_FUNCTIONS = { 402 **parser.Parser.NO_PAREN_FUNCTIONS, 403 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 404 } 405 406 NESTED_TYPE_TOKENS = { 407 *parser.Parser.NESTED_TYPE_TOKENS, 408 TokenType.TABLE, 409 } 410 411 PROPERTY_PARSERS = { 412 **parser.Parser.PROPERTY_PARSERS, 413 "NOT DETERMINISTIC": lambda self: self.expression( 414 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 415 ), 416 "OPTIONS": lambda self: self._parse_with_property(), 417 } 418 419 CONSTRAINT_PARSERS = { 420 **parser.Parser.CONSTRAINT_PARSERS, 421 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 422 } 423 424 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 425 RANGE_PARSERS.pop(TokenType.OVERLAPS) 426 427 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 428 429 STATEMENT_PARSERS = { 430 **parser.Parser.STATEMENT_PARSERS, 431 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 432 TokenType.END: lambda self: self._parse_as_command(self._prev), 433 TokenType.FOR: lambda self: self._parse_for_in(), 434 } 435 436 BRACKET_OFFSETS = { 437 "OFFSET": (0, False), 438 "ORDINAL": (1, False), 439 "SAFE_OFFSET": (0, True), 440 "SAFE_ORDINAL": (1, True), 441 } 442 443 def _parse_for_in(self) -> exp.ForIn: 444 this = self._parse_range() 445 self._match_text_seq("DO") 446 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 447 448 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 449 this = super()._parse_table_part(schema=schema) or self._parse_number() 450 451 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 452 if isinstance(this, exp.Identifier): 453 table_name = this.name 454 while self._match(TokenType.DASH, advance=False) and self._next: 455 text = "" 456 while self._curr and self._curr.token_type != TokenType.DOT: 457 self._advance() 458 text += self._prev.text 459 table_name += text 460 461 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 462 elif isinstance(this, exp.Literal): 463 table_name = this.name 464 465 if self._is_connected() and self._parse_var(any_token=True): 466 table_name += self._prev.text 467 468 this = exp.Identifier(this=table_name, quoted=True) 469 470 return this 471 472 def _parse_table_parts( 473 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 474 ) -> exp.Table: 475 table = super()._parse_table_parts( 476 schema=schema, is_db_reference=is_db_reference, wildcard=True 477 ) 478 479 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 480 if not table.catalog: 481 if table.db: 482 parts = table.db.split(".") 483 if len(parts) == 2 and not table.args["db"].quoted: 484 table.set("catalog", exp.Identifier(this=parts[0])) 485 table.set("db", exp.Identifier(this=parts[1])) 486 else: 487 parts = table.name.split(".") 488 if len(parts) == 2 and not table.this.quoted: 489 table.set("db", exp.Identifier(this=parts[0])) 490 table.set("this", exp.Identifier(this=parts[1])) 491 492 if any("." in p.name for p in table.parts): 493 catalog, db, this, *rest = ( 494 exp.to_identifier(p, quoted=True) 495 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 496 ) 497 498 if rest and this: 499 this = exp.Dot.build([this, *rest]) # type: ignore 500 501 table = exp.Table(this=this, db=db, catalog=catalog) 502 table.meta["quoted_table"] = True 503 504 return table 505 506 @t.overload 507 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 508 509 @t.overload 510 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 511 512 def _parse_json_object(self, agg=False): 513 json_object = super()._parse_json_object() 514 array_kv_pair = seq_get(json_object.expressions, 0) 515 516 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 517 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 518 if ( 519 array_kv_pair 520 and isinstance(array_kv_pair.this, exp.Array) 521 and isinstance(array_kv_pair.expression, exp.Array) 522 ): 523 keys = array_kv_pair.this.expressions 524 values = array_kv_pair.expression.expressions 525 526 json_object.set( 527 "expressions", 528 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 529 ) 530 531 return json_object 532 533 def _parse_bracket( 534 self, this: t.Optional[exp.Expression] = None 535 ) -> t.Optional[exp.Expression]: 536 bracket = super()._parse_bracket(this) 537 538 if this is bracket: 539 return bracket 540 541 if isinstance(bracket, exp.Bracket): 542 for expression in bracket.expressions: 543 name = expression.name.upper() 544 545 if name not in self.BRACKET_OFFSETS: 546 break 547 548 offset, safe = self.BRACKET_OFFSETS[name] 549 bracket.set("offset", offset) 550 bracket.set("safe", safe) 551 expression.replace(expression.expressions[0]) 552 553 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
555 class Generator(generator.Generator): 556 EXPLICIT_UNION = True 557 INTERVAL_ALLOWS_PLURAL_FORM = False 558 JOIN_HINTS = False 559 QUERY_HINTS = False 560 TABLE_HINTS = False 561 LIMIT_FETCH = "LIMIT" 562 RENAME_TABLE_WITH_DB = False 563 NVL2_SUPPORTED = False 564 UNNEST_WITH_ORDINALITY = False 565 COLLATE_IS_FUNC = True 566 LIMIT_ONLY_LITERALS = True 567 SUPPORTS_TABLE_ALIAS_COLUMNS = False 568 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 569 JSON_KEY_VALUE_PAIR_SEP = "," 570 NULL_ORDERING_SUPPORTED = False 571 IGNORE_NULLS_IN_FUNC = True 572 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 573 CAN_IMPLEMENT_ARRAY_ANY = True 574 SUPPORTS_TO_NUMBER = False 575 NAMED_PLACEHOLDER_TOKEN = "@" 576 577 TRANSFORMS = { 578 **generator.Generator.TRANSFORMS, 579 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 580 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 581 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 582 exp.Array: inline_array_unless_query, 583 exp.ArrayContains: _array_contains_sql, 584 exp.ArrayFilter: filter_array_using_unnest, 585 exp.ArraySize: rename_func("ARRAY_LENGTH"), 586 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 587 exp.CollateProperty: lambda self, e: ( 588 f"DEFAULT COLLATE {self.sql(e, 'this')}" 589 if e.args.get("default") 590 else f"COLLATE {self.sql(e, 'this')}" 591 ), 592 exp.Commit: lambda *_: "COMMIT TRANSACTION", 593 exp.CountIf: rename_func("COUNTIF"), 594 exp.Create: _create_sql, 595 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 596 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 597 exp.DateDiff: lambda self, e: self.func( 598 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 599 ), 600 exp.DateFromParts: rename_func("DATE"), 601 exp.DateStrToDate: datestrtodate_sql, 602 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 603 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 604 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 605 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 606 exp.FromTimeZone: lambda self, e: self.func( 607 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 608 ), 609 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 610 exp.GroupConcat: rename_func("STRING_AGG"), 611 exp.Hex: rename_func("TO_HEX"), 612 exp.If: if_sql(false_value="NULL"), 613 exp.ILike: no_ilike_sql, 614 exp.IntDiv: rename_func("DIV"), 615 exp.JSONFormat: rename_func("TO_JSON_STRING"), 616 exp.Max: max_or_greatest, 617 exp.Mod: rename_func("MOD"), 618 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 619 exp.MD5Digest: rename_func("MD5"), 620 exp.Min: min_or_least, 621 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 622 exp.RegexpExtract: lambda self, e: self.func( 623 "REGEXP_EXTRACT", 624 e.this, 625 e.expression, 626 e.args.get("position"), 627 e.args.get("occurrence"), 628 ), 629 exp.RegexpReplace: regexp_replace_sql, 630 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 631 exp.ReturnsProperty: _returnsproperty_sql, 632 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 633 exp.Select: transforms.preprocess( 634 [ 635 transforms.explode_to_unnest(), 636 _unqualify_unnest, 637 transforms.eliminate_distinct_on, 638 _alias_ordered_group, 639 transforms.eliminate_semi_and_anti_joins, 640 ] 641 ), 642 exp.SHA2: lambda self, e: self.func( 643 "SHA256" if e.text("length") == "256" else "SHA512", e.this 644 ), 645 exp.StabilityProperty: lambda self, e: ( 646 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 647 ), 648 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 649 exp.StrToTime: lambda self, e: self.func( 650 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 651 ), 652 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 653 exp.TimeFromParts: rename_func("TIME"), 654 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 655 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 656 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 657 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 658 exp.TimeStrToTime: timestrtotime_sql, 659 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 660 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 661 exp.TsOrDsAdd: _ts_or_ds_add_sql, 662 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 663 exp.TsOrDsToTime: rename_func("TIME"), 664 exp.Unhex: rename_func("FROM_HEX"), 665 exp.UnixDate: rename_func("UNIX_DATE"), 666 exp.UnixToTime: _unix_to_time_sql, 667 exp.Values: _derived_table_values_to_unnest, 668 exp.VariancePop: rename_func("VAR_POP"), 669 } 670 671 SUPPORTED_JSON_PATH_PARTS = { 672 exp.JSONPathKey, 673 exp.JSONPathRoot, 674 exp.JSONPathSubscript, 675 } 676 677 TYPE_MAPPING = { 678 **generator.Generator.TYPE_MAPPING, 679 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 680 exp.DataType.Type.BIGINT: "INT64", 681 exp.DataType.Type.BINARY: "BYTES", 682 exp.DataType.Type.BOOLEAN: "BOOL", 683 exp.DataType.Type.CHAR: "STRING", 684 exp.DataType.Type.DECIMAL: "NUMERIC", 685 exp.DataType.Type.DOUBLE: "FLOAT64", 686 exp.DataType.Type.FLOAT: "FLOAT64", 687 exp.DataType.Type.INT: "INT64", 688 exp.DataType.Type.NCHAR: "STRING", 689 exp.DataType.Type.NVARCHAR: "STRING", 690 exp.DataType.Type.SMALLINT: "INT64", 691 exp.DataType.Type.TEXT: "STRING", 692 exp.DataType.Type.TIMESTAMP: "DATETIME", 693 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 694 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 695 exp.DataType.Type.TINYINT: "INT64", 696 exp.DataType.Type.VARBINARY: "BYTES", 697 exp.DataType.Type.VARCHAR: "STRING", 698 exp.DataType.Type.VARIANT: "ANY TYPE", 699 } 700 701 PROPERTIES_LOCATION = { 702 **generator.Generator.PROPERTIES_LOCATION, 703 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 704 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 705 } 706 707 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 708 RESERVED_KEYWORDS = { 709 *generator.Generator.RESERVED_KEYWORDS, 710 "all", 711 "and", 712 "any", 713 "array", 714 "as", 715 "asc", 716 "assert_rows_modified", 717 "at", 718 "between", 719 "by", 720 "case", 721 "cast", 722 "collate", 723 "contains", 724 "create", 725 "cross", 726 "cube", 727 "current", 728 "default", 729 "define", 730 "desc", 731 "distinct", 732 "else", 733 "end", 734 "enum", 735 "escape", 736 "except", 737 "exclude", 738 "exists", 739 "extract", 740 "false", 741 "fetch", 742 "following", 743 "for", 744 "from", 745 "full", 746 "group", 747 "grouping", 748 "groups", 749 "hash", 750 "having", 751 "if", 752 "ignore", 753 "in", 754 "inner", 755 "intersect", 756 "interval", 757 "into", 758 "is", 759 "join", 760 "lateral", 761 "left", 762 "like", 763 "limit", 764 "lookup", 765 "merge", 766 "natural", 767 "new", 768 "no", 769 "not", 770 "null", 771 "nulls", 772 "of", 773 "on", 774 "or", 775 "order", 776 "outer", 777 "over", 778 "partition", 779 "preceding", 780 "proto", 781 "qualify", 782 "range", 783 "recursive", 784 "respect", 785 "right", 786 "rollup", 787 "rows", 788 "select", 789 "set", 790 "some", 791 "struct", 792 "tablesample", 793 "then", 794 "to", 795 "treat", 796 "true", 797 "unbounded", 798 "union", 799 "unnest", 800 "using", 801 "when", 802 "where", 803 "window", 804 "with", 805 "within", 806 } 807 808 def table_parts(self, expression: exp.Table) -> str: 809 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 810 # we need to make sure the correct quoting is used in each case. 811 # 812 # For example, if there is a CTE x that clashes with a schema name, then the former will 813 # return the table y in that schema, whereas the latter will return the CTE's y column: 814 # 815 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 816 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 817 if expression.meta.get("quoted_table"): 818 table_parts = ".".join(p.name for p in expression.parts) 819 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 820 821 return super().table_parts(expression) 822 823 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 824 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 825 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 826 827 def eq_sql(self, expression: exp.EQ) -> str: 828 # Operands of = cannot be NULL in BigQuery 829 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 830 if not isinstance(expression.parent, exp.Update): 831 return "NULL" 832 833 return self.binary(expression, "=") 834 835 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 836 parent = expression.parent 837 838 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 839 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 840 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 841 return self.func( 842 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 843 ) 844 845 return super().attimezone_sql(expression) 846 847 def trycast_sql(self, expression: exp.TryCast) -> str: 848 return self.cast_sql(expression, safe_prefix="SAFE_") 849 850 def bracket_sql(self, expression: exp.Bracket) -> str: 851 this = expression.this 852 expressions = expression.expressions 853 854 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 855 arg = expressions[0] 856 if arg.type is None: 857 from sqlglot.optimizer.annotate_types import annotate_types 858 859 arg = annotate_types(arg) 860 861 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 862 # BQ doesn't support bracket syntax with string values for structs 863 return f"{self.sql(this)}.{arg.name}" 864 865 expressions_sql = self.expressions(expression, flat=True) 866 offset = expression.args.get("offset") 867 868 if offset == 0: 869 expressions_sql = f"OFFSET({expressions_sql})" 870 elif offset == 1: 871 expressions_sql = f"ORDINAL({expressions_sql})" 872 elif offset is not None: 873 self.unsupported(f"Unsupported array offset: {offset}") 874 875 if expression.args.get("safe"): 876 expressions_sql = f"SAFE_{expressions_sql}" 877 878 return f"{self.sql(this)}[{expressions_sql}]" 879 880 def in_unnest_op(self, expression: exp.Unnest) -> str: 881 return self.sql(expression) 882 883 def except_op(self, expression: exp.Except) -> str: 884 if not expression.args.get("distinct"): 885 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 886 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 887 888 def intersect_op(self, expression: exp.Intersect) -> str: 889 if not expression.args.get("distinct"): 890 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 891 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 892 893 def with_properties(self, properties: exp.Properties) -> str: 894 return self.properties(properties, prefix=self.seg("OPTIONS")) 895 896 def version_sql(self, expression: exp.Version) -> str: 897 if expression.name == "TIMESTAMP": 898 expression.set("this", "SYSTEM_TIME") 899 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
808 def table_parts(self, expression: exp.Table) -> str: 809 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 810 # we need to make sure the correct quoting is used in each case. 811 # 812 # For example, if there is a CTE x that clashes with a schema name, then the former will 813 # return the table y in that schema, whereas the latter will return the CTE's y column: 814 # 815 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 816 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 817 if expression.meta.get("quoted_table"): 818 table_parts = ".".join(p.name for p in expression.parts) 819 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 820 821 return super().table_parts(expression)
835 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 836 parent = expression.parent 837 838 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 839 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 840 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 841 return self.func( 842 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 843 ) 844 845 return super().attimezone_sql(expression)
850 def bracket_sql(self, expression: exp.Bracket) -> str: 851 this = expression.this 852 expressions = expression.expressions 853 854 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 855 arg = expressions[0] 856 if arg.type is None: 857 from sqlglot.optimizer.annotate_types import annotate_types 858 859 arg = annotate_types(arg) 860 861 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 862 # BQ doesn't support bracket syntax with string values for structs 863 return f"{self.sql(this)}.{arg.name}" 864 865 expressions_sql = self.expressions(expression, flat=True) 866 offset = expression.args.get("offset") 867 868 if offset == 0: 869 expressions_sql = f"OFFSET({expressions_sql})" 870 elif offset == 1: 871 expressions_sql = f"ORDINAL({expressions_sql})" 872 elif offset is not None: 873 self.unsupported(f"Unsupported array offset: {offset}") 874 875 if expression.args.get("safe"): 876 expressions_sql = f"SAFE_{expressions_sql}" 877 878 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- OUTER_UNION_MODIFIERS
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql