sqlglot.dialects.prql
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, parser, tokens 6from sqlglot.dialects.dialect import Dialect 7from sqlglot.tokens import TokenType 8 9 10def _select_all(table: exp.Expression) -> t.Optional[exp.Select]: 11 return exp.select("*").from_(table, copy=False) if table else None 12 13 14class PRQL(Dialect): 15 DPIPE_IS_STRING_CONCAT = False 16 17 class Tokenizer(tokens.Tokenizer): 18 IDENTIFIERS = ["`"] 19 QUOTES = ["'", '"'] 20 21 SINGLE_TOKENS = { 22 **tokens.Tokenizer.SINGLE_TOKENS, 23 "=": TokenType.ALIAS, 24 "'": TokenType.QUOTE, 25 '"': TokenType.QUOTE, 26 "`": TokenType.IDENTIFIER, 27 "#": TokenType.COMMENT, 28 } 29 30 KEYWORDS = { 31 **tokens.Tokenizer.KEYWORDS, 32 } 33 34 class Parser(parser.Parser): 35 CONJUNCTION = { 36 **parser.Parser.CONJUNCTION, 37 TokenType.DAMP: exp.And, 38 TokenType.DPIPE: exp.Or, 39 } 40 41 TRANSFORM_PARSERS = { 42 "DERIVE": lambda self, query: self._parse_selection(query), 43 "SELECT": lambda self, query: self._parse_selection(query, append=False), 44 "TAKE": lambda self, query: self._parse_take(query), 45 "FILTER": lambda self, query: query.where(self._parse_conjunction()), 46 "APPEND": lambda self, query: query.union( 47 _select_all(self._parse_table()), distinct=False, copy=False 48 ), 49 "REMOVE": lambda self, query: query.except_( 50 _select_all(self._parse_table()), distinct=False, copy=False 51 ), 52 "INTERSECT": lambda self, query: query.intersect( 53 _select_all(self._parse_table()), distinct=False, copy=False 54 ), 55 "SORT": lambda self, query: self._parse_order_by(query), 56 } 57 58 def _parse_statement(self) -> t.Optional[exp.Expression]: 59 expression = self._parse_expression() 60 expression = expression if expression else self._parse_query() 61 return expression 62 63 def _parse_query(self) -> t.Optional[exp.Query]: 64 from_ = self._parse_from() 65 66 if not from_: 67 return None 68 69 query = exp.select("*").from_(from_, copy=False) 70 71 while self._match_texts(self.TRANSFORM_PARSERS): 72 query = self.TRANSFORM_PARSERS[self._prev.text.upper()](self, query) 73 74 return query 75 76 def _parse_selection(self, query: exp.Query, append: bool = True) -> exp.Query: 77 if self._match(TokenType.L_BRACE): 78 selects = self._parse_csv(self._parse_expression) 79 80 if not self._match(TokenType.R_BRACE, expression=query): 81 self.raise_error("Expecting }") 82 else: 83 expression = self._parse_expression() 84 selects = [expression] if expression else [] 85 86 projections = { 87 select.alias_or_name: select.this if isinstance(select, exp.Alias) else select 88 for select in query.selects 89 } 90 91 selects = [ 92 select.transform( 93 lambda s: (projections[s.name].copy() if s.name in projections else s) 94 if isinstance(s, exp.Column) 95 else s, 96 copy=False, 97 ) 98 for select in selects 99 ] 100 101 return query.select(*selects, append=append, copy=False) 102 103 def _parse_take(self, query: exp.Query) -> t.Optional[exp.Query]: 104 num = self._parse_number() # TODO: TAKE for ranges a..b 105 return query.limit(num) if num else None 106 107 def _parse_ordered( 108 self, parse_method: t.Optional[t.Callable] = None 109 ) -> t.Optional[exp.Ordered]: 110 asc = self._match(TokenType.PLUS) 111 desc = self._match(TokenType.DASH) or (asc and False) 112 term = term = super()._parse_ordered(parse_method=parse_method) 113 if term and desc: 114 term.set("desc", True) 115 term.set("nulls_first", False) 116 return term 117 118 def _parse_order_by(self, query: exp.Select) -> t.Optional[exp.Query]: 119 l_brace = self._match(TokenType.L_BRACE) 120 expressions = self._parse_csv(self._parse_ordered) 121 if l_brace and not self._match(TokenType.R_BRACE): 122 self.raise_error("Expecting }") 123 return query.order_by(self.expression(exp.Order, expressions=expressions), copy=False) 124 125 def _parse_expression(self) -> t.Optional[exp.Expression]: 126 if self._next and self._next.token_type == TokenType.ALIAS: 127 alias = self._parse_id_var(True) 128 self._match(TokenType.ALIAS) 129 return self.expression(exp.Alias, this=self._parse_conjunction(), alias=alias) 130 return self._parse_conjunction() 131 132 def _parse_table( 133 self, 134 schema: bool = False, 135 joins: bool = False, 136 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 137 parse_bracket: bool = False, 138 is_db_reference: bool = False, 139 ) -> t.Optional[exp.Expression]: 140 return self._parse_table_parts() 141 142 def _parse_from( 143 self, joins: bool = False, skip_from_token: bool = False 144 ) -> t.Optional[exp.From]: 145 if not skip_from_token and not self._match(TokenType.FROM): 146 return None 147 148 return self.expression( 149 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 150 )
15class PRQL(Dialect): 16 DPIPE_IS_STRING_CONCAT = False 17 18 class Tokenizer(tokens.Tokenizer): 19 IDENTIFIERS = ["`"] 20 QUOTES = ["'", '"'] 21 22 SINGLE_TOKENS = { 23 **tokens.Tokenizer.SINGLE_TOKENS, 24 "=": TokenType.ALIAS, 25 "'": TokenType.QUOTE, 26 '"': TokenType.QUOTE, 27 "`": TokenType.IDENTIFIER, 28 "#": TokenType.COMMENT, 29 } 30 31 KEYWORDS = { 32 **tokens.Tokenizer.KEYWORDS, 33 } 34 35 class Parser(parser.Parser): 36 CONJUNCTION = { 37 **parser.Parser.CONJUNCTION, 38 TokenType.DAMP: exp.And, 39 TokenType.DPIPE: exp.Or, 40 } 41 42 TRANSFORM_PARSERS = { 43 "DERIVE": lambda self, query: self._parse_selection(query), 44 "SELECT": lambda self, query: self._parse_selection(query, append=False), 45 "TAKE": lambda self, query: self._parse_take(query), 46 "FILTER": lambda self, query: query.where(self._parse_conjunction()), 47 "APPEND": lambda self, query: query.union( 48 _select_all(self._parse_table()), distinct=False, copy=False 49 ), 50 "REMOVE": lambda self, query: query.except_( 51 _select_all(self._parse_table()), distinct=False, copy=False 52 ), 53 "INTERSECT": lambda self, query: query.intersect( 54 _select_all(self._parse_table()), distinct=False, copy=False 55 ), 56 "SORT": lambda self, query: self._parse_order_by(query), 57 } 58 59 def _parse_statement(self) -> t.Optional[exp.Expression]: 60 expression = self._parse_expression() 61 expression = expression if expression else self._parse_query() 62 return expression 63 64 def _parse_query(self) -> t.Optional[exp.Query]: 65 from_ = self._parse_from() 66 67 if not from_: 68 return None 69 70 query = exp.select("*").from_(from_, copy=False) 71 72 while self._match_texts(self.TRANSFORM_PARSERS): 73 query = self.TRANSFORM_PARSERS[self._prev.text.upper()](self, query) 74 75 return query 76 77 def _parse_selection(self, query: exp.Query, append: bool = True) -> exp.Query: 78 if self._match(TokenType.L_BRACE): 79 selects = self._parse_csv(self._parse_expression) 80 81 if not self._match(TokenType.R_BRACE, expression=query): 82 self.raise_error("Expecting }") 83 else: 84 expression = self._parse_expression() 85 selects = [expression] if expression else [] 86 87 projections = { 88 select.alias_or_name: select.this if isinstance(select, exp.Alias) else select 89 for select in query.selects 90 } 91 92 selects = [ 93 select.transform( 94 lambda s: (projections[s.name].copy() if s.name in projections else s) 95 if isinstance(s, exp.Column) 96 else s, 97 copy=False, 98 ) 99 for select in selects 100 ] 101 102 return query.select(*selects, append=append, copy=False) 103 104 def _parse_take(self, query: exp.Query) -> t.Optional[exp.Query]: 105 num = self._parse_number() # TODO: TAKE for ranges a..b 106 return query.limit(num) if num else None 107 108 def _parse_ordered( 109 self, parse_method: t.Optional[t.Callable] = None 110 ) -> t.Optional[exp.Ordered]: 111 asc = self._match(TokenType.PLUS) 112 desc = self._match(TokenType.DASH) or (asc and False) 113 term = term = super()._parse_ordered(parse_method=parse_method) 114 if term and desc: 115 term.set("desc", True) 116 term.set("nulls_first", False) 117 return term 118 119 def _parse_order_by(self, query: exp.Select) -> t.Optional[exp.Query]: 120 l_brace = self._match(TokenType.L_BRACE) 121 expressions = self._parse_csv(self._parse_ordered) 122 if l_brace and not self._match(TokenType.R_BRACE): 123 self.raise_error("Expecting }") 124 return query.order_by(self.expression(exp.Order, expressions=expressions), copy=False) 125 126 def _parse_expression(self) -> t.Optional[exp.Expression]: 127 if self._next and self._next.token_type == TokenType.ALIAS: 128 alias = self._parse_id_var(True) 129 self._match(TokenType.ALIAS) 130 return self.expression(exp.Alias, this=self._parse_conjunction(), alias=alias) 131 return self._parse_conjunction() 132 133 def _parse_table( 134 self, 135 schema: bool = False, 136 joins: bool = False, 137 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 138 parse_bracket: bool = False, 139 is_db_reference: bool = False, 140 ) -> t.Optional[exp.Expression]: 141 return self._parse_table_parts() 142 143 def _parse_from( 144 self, joins: bool = False, skip_from_token: bool = False 145 ) -> t.Optional[exp.From]: 146 if not skip_from_token and not self._match(TokenType.FROM): 147 return None 148 149 return self.expression( 150 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 151 )
tokenizer_class =
<class 'PRQL.Tokenizer'>
parser_class =
<class 'PRQL.Parser'>
generator_class =
<class 'sqlglot.generator.Generator'>
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- NORMALIZATION_STRATEGY
- IDENTIFIERS_CAN_START_WITH_DIGIT
- STRICT_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
18 class Tokenizer(tokens.Tokenizer): 19 IDENTIFIERS = ["`"] 20 QUOTES = ["'", '"'] 21 22 SINGLE_TOKENS = { 23 **tokens.Tokenizer.SINGLE_TOKENS, 24 "=": TokenType.ALIAS, 25 "'": TokenType.QUOTE, 26 '"': TokenType.QUOTE, 27 "`": TokenType.IDENTIFIER, 28 "#": TokenType.COMMENT, 29 } 30 31 KEYWORDS = { 32 **tokens.Tokenizer.KEYWORDS, 33 }
SINGLE_TOKENS =
{'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.ALIAS: 'ALIAS'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.QUOTE: 'QUOTE'>, '#': <TokenType.COMMENT: 'COMMENT'>}
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ENUM': <TokenType.ENUM: 'ENUM'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'TRUNCATE': <TokenType.TRUNCATE: 'TRUNCATE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'UINT': <TokenType.UINT: 'UINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'BPCHAR': <TokenType.BPCHAR: 'BPCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'SEQUENCE': <TokenType.SEQUENCE: 'SEQUENCE'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
35 class Parser(parser.Parser): 36 CONJUNCTION = { 37 **parser.Parser.CONJUNCTION, 38 TokenType.DAMP: exp.And, 39 TokenType.DPIPE: exp.Or, 40 } 41 42 TRANSFORM_PARSERS = { 43 "DERIVE": lambda self, query: self._parse_selection(query), 44 "SELECT": lambda self, query: self._parse_selection(query, append=False), 45 "TAKE": lambda self, query: self._parse_take(query), 46 "FILTER": lambda self, query: query.where(self._parse_conjunction()), 47 "APPEND": lambda self, query: query.union( 48 _select_all(self._parse_table()), distinct=False, copy=False 49 ), 50 "REMOVE": lambda self, query: query.except_( 51 _select_all(self._parse_table()), distinct=False, copy=False 52 ), 53 "INTERSECT": lambda self, query: query.intersect( 54 _select_all(self._parse_table()), distinct=False, copy=False 55 ), 56 "SORT": lambda self, query: self._parse_order_by(query), 57 } 58 59 def _parse_statement(self) -> t.Optional[exp.Expression]: 60 expression = self._parse_expression() 61 expression = expression if expression else self._parse_query() 62 return expression 63 64 def _parse_query(self) -> t.Optional[exp.Query]: 65 from_ = self._parse_from() 66 67 if not from_: 68 return None 69 70 query = exp.select("*").from_(from_, copy=False) 71 72 while self._match_texts(self.TRANSFORM_PARSERS): 73 query = self.TRANSFORM_PARSERS[self._prev.text.upper()](self, query) 74 75 return query 76 77 def _parse_selection(self, query: exp.Query, append: bool = True) -> exp.Query: 78 if self._match(TokenType.L_BRACE): 79 selects = self._parse_csv(self._parse_expression) 80 81 if not self._match(TokenType.R_BRACE, expression=query): 82 self.raise_error("Expecting }") 83 else: 84 expression = self._parse_expression() 85 selects = [expression] if expression else [] 86 87 projections = { 88 select.alias_or_name: select.this if isinstance(select, exp.Alias) else select 89 for select in query.selects 90 } 91 92 selects = [ 93 select.transform( 94 lambda s: (projections[s.name].copy() if s.name in projections else s) 95 if isinstance(s, exp.Column) 96 else s, 97 copy=False, 98 ) 99 for select in selects 100 ] 101 102 return query.select(*selects, append=append, copy=False) 103 104 def _parse_take(self, query: exp.Query) -> t.Optional[exp.Query]: 105 num = self._parse_number() # TODO: TAKE for ranges a..b 106 return query.limit(num) if num else None 107 108 def _parse_ordered( 109 self, parse_method: t.Optional[t.Callable] = None 110 ) -> t.Optional[exp.Ordered]: 111 asc = self._match(TokenType.PLUS) 112 desc = self._match(TokenType.DASH) or (asc and False) 113 term = term = super()._parse_ordered(parse_method=parse_method) 114 if term and desc: 115 term.set("desc", True) 116 term.set("nulls_first", False) 117 return term 118 119 def _parse_order_by(self, query: exp.Select) -> t.Optional[exp.Query]: 120 l_brace = self._match(TokenType.L_BRACE) 121 expressions = self._parse_csv(self._parse_ordered) 122 if l_brace and not self._match(TokenType.R_BRACE): 123 self.raise_error("Expecting }") 124 return query.order_by(self.expression(exp.Order, expressions=expressions), copy=False) 125 126 def _parse_expression(self) -> t.Optional[exp.Expression]: 127 if self._next and self._next.token_type == TokenType.ALIAS: 128 alias = self._parse_id_var(True) 129 self._match(TokenType.ALIAS) 130 return self.expression(exp.Alias, this=self._parse_conjunction(), alias=alias) 131 return self._parse_conjunction() 132 133 def _parse_table( 134 self, 135 schema: bool = False, 136 joins: bool = False, 137 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 138 parse_bracket: bool = False, 139 is_db_reference: bool = False, 140 ) -> t.Optional[exp.Expression]: 141 return self._parse_table_parts() 142 143 def _parse_from( 144 self, joins: bool = False, skip_from_token: bool = False 145 ) -> t.Optional[exp.From]: 146 if not skip_from_token and not self._match(TokenType.FROM): 147 return None 148 149 return self.expression( 150 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 151 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
CONJUNCTION =
{<TokenType.AND: 'AND'>: <class 'sqlglot.expressions.And'>, <TokenType.OR: 'OR'>: <class 'sqlglot.expressions.Or'>, <TokenType.DAMP: 'DAMP'>: <class 'sqlglot.expressions.And'>, <TokenType.DPIPE: 'DPIPE'>: <class 'sqlglot.expressions.Or'>}
TRANSFORM_PARSERS =
{'DERIVE': <function PRQL.Parser.<lambda>>, 'SELECT': <function PRQL.Parser.<lambda>>, 'TAKE': <function PRQL.Parser.<lambda>>, 'FILTER': <function PRQL.Parser.<lambda>>, 'APPEND': <function PRQL.Parser.<lambda>>, 'REMOVE': <function PRQL.Parser.<lambda>>, 'INTERSECT': <function PRQL.Parser.<lambda>>, 'SORT': <function PRQL.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- FUNCTIONS
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql