mirror of
https://github.com/maxkratz/edgedb.git
synced 2024-09-16 18:59:05 +00:00
Parser error recovery (#5693)
Co-authored-by: Michael J. Sullivan <sully@msully.net>
This commit is contained in:
parent
84d7875481
commit
6f6b4cd117
43 changed files with 2604 additions and 2087 deletions
142
Cargo.lock
generated
142
Cargo.lock
generated
|
@ -11,6 +11,12 @@ dependencies = [
|
|||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "append-only-vec"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5608767d94038891df4c7bb82f6b1beb55fe3d204735985e20de329bc35d5fee"
|
||||
|
||||
[[package]]
|
||||
name = "ascii"
|
||||
version = "0.9.3"
|
||||
|
@ -38,6 +44,7 @@ dependencies = [
|
|||
"num-bigint 0.4.3",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -66,9 +73,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.12.2"
|
||||
version = "3.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c6ed94e98ecff0c12dd1b04c15ec0d7d9458ca8fe806cea6f12954efe74c63b"
|
||||
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
|
@ -203,13 +210,18 @@ dependencies = [
|
|||
name = "edgeql-parser"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"append-only-vec",
|
||||
"base32",
|
||||
"bigdecimal",
|
||||
"bumpalo",
|
||||
"cpython",
|
||||
"edgeql-parser-derive",
|
||||
"indexmap",
|
||||
"memchr",
|
||||
"num-bigint 0.3.3",
|
||||
"phf",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"snafu",
|
||||
"thiserror",
|
||||
|
@ -236,7 +248,11 @@ dependencies = [
|
|||
"cpython",
|
||||
"edgedb-protocol",
|
||||
"edgeql-parser",
|
||||
"indexmap",
|
||||
"num-bigint 0.4.3",
|
||||
"rmp-serde",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -298,9 +314,9 @@ checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.144"
|
||||
version = "0.2.147"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
|
||||
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
|
@ -328,6 +344,18 @@ dependencies = [
|
|||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f6f7833f2cbf2360a6cfd58cd41a53aa7a90bd4c202f5b1c7dd2ed73c57b2c3"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.3"
|
||||
|
@ -379,6 +407,48 @@ version = "1.0.12"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79"
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c"
|
||||
dependencies = [
|
||||
"phf_macros",
|
||||
"phf_shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_macros"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92aacdc5f16768709a569e913f7451034034178b05bdc8acda226659a3dccc66"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pretty_assertions"
|
||||
version = "1.3.0"
|
||||
|
@ -419,6 +489,21 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.8.1"
|
||||
|
@ -436,6 +521,28 @@ version = "0.7.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c"
|
||||
|
||||
[[package]]
|
||||
name = "rmp"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "44519172358fd6d58656c86ab8e7fbc9e1490c3e8f14d35ed78ca0dd07403c9f"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"num-traits",
|
||||
"paste",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rmp-serde"
|
||||
version = "1.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c5b13be192e0220b8afb7222aa5813cb62cc269ebb5cac346ca6487681d2913e"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"rmp",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.13"
|
||||
|
@ -468,6 +575,7 @@ version = "1.0.96"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
|
@ -484,6 +592,12 @@ dependencies = [
|
|||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
|
||||
|
||||
[[package]]
|
||||
name = "snafu"
|
||||
version = "0.7.4"
|
||||
|
@ -601,9 +715,9 @@ checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
|
|||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.86"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73"
|
||||
checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"serde",
|
||||
|
@ -613,9 +727,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-backend"
|
||||
version = "0.2.86"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb"
|
||||
checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"log",
|
||||
|
@ -628,9 +742,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.86"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258"
|
||||
checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"wasm-bindgen-macro-support",
|
||||
|
@ -638,9 +752,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.86"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
|
||||
checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -651,9 +765,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.86"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
|
||||
checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
|
|
|
@ -25,23 +25,14 @@ import logging
|
|||
import os
|
||||
import sys
|
||||
import types
|
||||
import re
|
||||
|
||||
import parsing
|
||||
|
||||
from edb.common.exceptions import add_context, get_context
|
||||
from edb.common import context as pctx
|
||||
from edb.edgeql import tokenizer
|
||||
from edb.errors import EdgeQLSyntaxError
|
||||
from edb import _edgeql_parser as ql_parser
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from edb.edgeql.parser.grammar import rust_lexer
|
||||
from edb.common import context as pctx, debug
|
||||
|
||||
ParserContext = pctx.ParserContext
|
||||
|
||||
logger = logging.getLogger('edb.common.parsing')
|
||||
TRAILING_WS_IN_CONTINUATION = re.compile(r'\\ \s+\n')
|
||||
|
||||
|
||||
class ParserSpecIncompatibleError(Exception):
|
||||
|
@ -131,17 +122,6 @@ def inline(argument_index: int):
|
|||
return decorator
|
||||
|
||||
|
||||
def make_inlining_func(arg_index: int):
|
||||
"""Makes a parser production handler which simply inlines an argument."""
|
||||
# TODO: remove this when Rust parser is merged
|
||||
|
||||
def wrapper(obj, *args, **kwargs):
|
||||
obj.val = args[arg_index].val
|
||||
return obj
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class NontermMeta(type):
|
||||
def __new__(mcls, name, bases, dct):
|
||||
result = super().__new__(mcls, name, bases, dct)
|
||||
|
@ -171,13 +151,7 @@ class NontermMeta(type):
|
|||
attr = lambda self, *args, meth=attr: meth(self, *args)
|
||||
attr.__doc__ = doc
|
||||
|
||||
if inline_index is not None:
|
||||
# TODO: remove this when Rust parser is merged
|
||||
a = make_inlining_func(inline_index)
|
||||
else:
|
||||
a = attr
|
||||
|
||||
a = pctx.has_context(a)
|
||||
a = pctx.has_context(attr)
|
||||
|
||||
a.__doc__ = attr.__doc__
|
||||
a.inline_index = inline_index
|
||||
|
@ -308,70 +282,14 @@ class Precedence(parsing.Precedence, assoc='fail', metaclass=PrecedenceMeta):
|
|||
pass
|
||||
|
||||
|
||||
class ParserError(Exception):
|
||||
def __init__(
|
||||
self, msg=None, *, hint=None, details=None, token=None, line=None,
|
||||
col=None, expr=None, context=None):
|
||||
if msg is None:
|
||||
msg = 'syntax error at or near "%s"' % token
|
||||
super().__init__(msg, hint=hint, details=details)
|
||||
|
||||
self.token = token
|
||||
if line is not None:
|
||||
self.line = line
|
||||
if col is not None:
|
||||
self.col = col
|
||||
self.expr = expr
|
||||
if context:
|
||||
add_context(self, context)
|
||||
if line is None and col is None:
|
||||
self.line = context.start.line
|
||||
self.col = context.start.column
|
||||
|
||||
@property
|
||||
def context(self):
|
||||
try:
|
||||
return get_context(self, pctx.ParserContext)
|
||||
except LookupError:
|
||||
return None
|
||||
|
||||
|
||||
def _derive_hint(
|
||||
input: str,
|
||||
message: str,
|
||||
position: Tuple[int, int, int],
|
||||
) -> Optional[str]:
|
||||
_, _, off = position
|
||||
if message == r"invalid string literal: invalid escape sequence '\ '":
|
||||
if TRAILING_WS_IN_CONTINUATION.search(input[off:]):
|
||||
return "consider removing trailing whitespace"
|
||||
return None
|
||||
|
||||
|
||||
class Parser:
|
||||
class ParserSpec:
|
||||
parser_spec: ClassVar[parsing.Spec | None]
|
||||
lexer: Optional[rust_lexer.EdgeQLLexer]
|
||||
|
||||
def __init__(self, **parser_data):
|
||||
self.lexer = None
|
||||
self.parser = None
|
||||
self.parser_data = parser_data
|
||||
|
||||
def cleanup(self):
|
||||
self.__class__.parser_spec = None
|
||||
self.__class__.lexer_spec = None
|
||||
self.lexer = None
|
||||
self.parser = None
|
||||
|
||||
def get_debug(self):
|
||||
return False
|
||||
|
||||
def get_exception(self, native_err, context, token=None):
|
||||
if not isinstance(native_err, ParserError):
|
||||
return ParserError(native_err.args[0],
|
||||
context=context, token=token)
|
||||
else:
|
||||
return native_err
|
||||
return debug.flags.edgeql_parser
|
||||
|
||||
def get_parser_spec_module(self) -> types.ModuleType:
|
||||
raise NotImplementedError
|
||||
|
@ -421,96 +339,3 @@ class Parser:
|
|||
return os.path.join(
|
||||
os.path.dirname(mod.__file__),
|
||||
mod.__name__.rpartition('.')[2] + '.' + type)
|
||||
|
||||
def get_lexer(self):
|
||||
"""Return an initialized lexer.
|
||||
|
||||
The lexer must implement 'setinputstr' and 'token' methods.
|
||||
A lexer derived from edb.common.lexer.Lexer will satisfy these
|
||||
criteria.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def reset_parser(
|
||||
self,
|
||||
input: Union[str, tokenizer.Source],
|
||||
filename: Optional[str]=None
|
||||
):
|
||||
if not self.parser:
|
||||
self.lexer = self.get_lexer()
|
||||
self.parser = parsing.Lr(self.get_parser_spec())
|
||||
self.parser.parser_data = self.parser_data
|
||||
self.parser.verbose = self.get_debug()
|
||||
|
||||
self.parser.reset()
|
||||
assert self.lexer
|
||||
self.lexer.setinputstr(input, filename=filename)
|
||||
|
||||
def convert_lex_token(self, mod: Any, tok: ql_parser.Token) -> Token:
|
||||
token_cls = mod.TokenMeta.for_lex_token(tok.kind())
|
||||
return token_cls(tok.text(), tok.value(), self.context(tok))
|
||||
|
||||
def parse(
|
||||
self,
|
||||
input: Union[str, tokenizer.Source],
|
||||
filename: Optional[str] = None
|
||||
):
|
||||
try:
|
||||
self.reset_parser(input, filename=filename)
|
||||
assert self.lexer
|
||||
mod = self.get_parser_spec_module()
|
||||
|
||||
while tok := self.lexer.token():
|
||||
token = self.convert_lex_token(mod, tok)
|
||||
if token is None:
|
||||
continue
|
||||
|
||||
self.parser.token(token)
|
||||
|
||||
self.parser.eoi()
|
||||
|
||||
except ql_parser.TokenizerError as e:
|
||||
message, position = e.args
|
||||
|
||||
assert self.lexer
|
||||
hint = _derive_hint(self.lexer.inputstr, message, position)
|
||||
|
||||
raise EdgeQLSyntaxError(
|
||||
message, context=self.context(pos=position), hint=hint
|
||||
) from e
|
||||
|
||||
except parsing.UnexpectedToken as e:
|
||||
raise self.get_exception(
|
||||
e, context=self.context(tok), token=tok
|
||||
) from e
|
||||
|
||||
except ParserError as e:
|
||||
raise self.get_exception(e, context=e.context) from e
|
||||
|
||||
return self.parser.start[0].val
|
||||
|
||||
def context(self, tok=None, pos: Optional[Tuple[int, int, int]] = None):
|
||||
lex = self.lexer
|
||||
assert lex
|
||||
name = lex.filename if lex.filename else '<string>'
|
||||
|
||||
if tok is None:
|
||||
if pos is None:
|
||||
pos = lex.end_of_input
|
||||
context = pctx.ParserContext(
|
||||
name=name, buffer=lex.inputstr,
|
||||
start=pos[2], end=pos[2])
|
||||
else:
|
||||
context = pctx.ParserContext(
|
||||
name=name, buffer=lex.inputstr,
|
||||
start=tok.start()[2],
|
||||
end=tok.end()[2])
|
||||
|
||||
return context
|
||||
|
||||
|
||||
def line_col_from_char_offset(source, position):
|
||||
line = source[:position].count('\n') + 1
|
||||
col = source.rfind('\n', 0, position)
|
||||
col = position if col == -1 else position - col
|
||||
return line, col
|
||||
|
|
|
@ -8,7 +8,8 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
base32 = "0.4.0"
|
||||
bigdecimal = "0.3.0"
|
||||
bigdecimal = { version = "0.3.0", features = ["serde"] }
|
||||
num-bigint = { version = "0.3.0", features = ["serde"] }
|
||||
sha2 = "0.10.2"
|
||||
snafu = "0.7.0"
|
||||
memchr = "2.5.0"
|
||||
|
@ -21,10 +22,14 @@ unicode-width = "0.1.8"
|
|||
edgeql-parser-derive = { path = "edgeql-parser-derive", optional = true }
|
||||
cpython = { version = "0.7.0", optional = true }
|
||||
indexmap = "1.9.3"
|
||||
serde_json = {version="1.0", features=["preserve_order"]}
|
||||
bumpalo = {version="3.13.0", features=["collections"]}
|
||||
phf = { version = "0.11.1", features = ["macros"] }
|
||||
append-only-vec = "0.1.2"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
wasm-lexer = ["wasm-bindgen", "serde"]
|
||||
python = ["cpython", "edgeql-parser-derive"]
|
||||
python = ["cpython", "serde", "edgeql-parser-derive"]
|
||||
|
||||
[lib]
|
||||
|
|
|
@ -7,11 +7,15 @@ rust-version = "1.59"
|
|||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
edgeql-parser = {path = ".."}
|
||||
edgeql-parser = {path = "..", features=["serde"]}
|
||||
bytes = "1.0.1"
|
||||
num-bigint = "0.4.3"
|
||||
bigdecimal = "0.3.0"
|
||||
blake2 = "0.10.4"
|
||||
serde = {version="1.0", features=["derive"]}
|
||||
serde_json = "1.0"
|
||||
rmp-serde = "1.1.1"
|
||||
indexmap = "1.9.3"
|
||||
|
||||
[dependencies.edgedb-protocol]
|
||||
git = "https://github.com/edgedb/edgedb-rust"
|
||||
|
|
|
@ -1,65 +1,92 @@
|
|||
use cpython::{PyObject, ToPyObject, Python, PyErr, PythonObject, PyType};
|
||||
use cpython::exc::Exception;
|
||||
use crate::cpython::PythonObjectWithTypeObject;
|
||||
|
||||
use cpython::exc::Exception;
|
||||
use cpython::{
|
||||
PyClone, PyErr, PyList, PyObject, PyResult, PyType, Python, PythonObject, ToPyObject,
|
||||
};
|
||||
use edgeql_parser::tokenizer::Error;
|
||||
|
||||
// can't use py_exception macro because that fails on dotted module name
|
||||
pub struct TokenizerError(PyObject);
|
||||
pub struct SyntaxError(PyObject);
|
||||
|
||||
pyobject_newtype!(TokenizerError);
|
||||
pyobject_newtype!(SyntaxError);
|
||||
|
||||
impl TokenizerError {
|
||||
impl SyntaxError {
|
||||
pub fn new<T: ToPyObject>(py: Python, args: T) -> PyErr {
|
||||
PyErr::new::<TokenizerError, T>(py, args)
|
||||
PyErr::new::<SyntaxError, T>(py, args)
|
||||
}
|
||||
}
|
||||
|
||||
impl cpython::PythonObjectWithCheckedDowncast for TokenizerError {
|
||||
impl cpython::PythonObjectWithCheckedDowncast for SyntaxError {
|
||||
#[inline]
|
||||
fn downcast_from(py: Python, obj: PyObject)
|
||||
-> Result<TokenizerError, cpython::PythonObjectDowncastError>
|
||||
{
|
||||
if TokenizerError::type_object(py).is_instance(py, &obj) {
|
||||
fn downcast_from(
|
||||
py: Python,
|
||||
obj: PyObject,
|
||||
) -> Result<SyntaxError, cpython::PythonObjectDowncastError> {
|
||||
if SyntaxError::type_object(py).is_instance(py, &obj) {
|
||||
Ok(unsafe { PythonObject::unchecked_downcast_from(obj) })
|
||||
} else {
|
||||
Err(cpython::PythonObjectDowncastError::new(py,
|
||||
"TokenizerError",
|
||||
TokenizerError::type_object(py),
|
||||
Err(cpython::PythonObjectDowncastError::new(
|
||||
py,
|
||||
"SyntaxError",
|
||||
SyntaxError::type_object(py),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn downcast_borrow_from<'a, 'p>(py: Python<'p>, obj: &'a PyObject)
|
||||
-> Result<&'a TokenizerError, cpython::PythonObjectDowncastError<'p>>
|
||||
{
|
||||
if TokenizerError::type_object(py).is_instance(py, obj) {
|
||||
fn downcast_borrow_from<'a, 'p>(
|
||||
py: Python<'p>,
|
||||
obj: &'a PyObject,
|
||||
) -> Result<&'a SyntaxError, cpython::PythonObjectDowncastError<'p>> {
|
||||
if SyntaxError::type_object(py).is_instance(py, obj) {
|
||||
Ok(unsafe { PythonObject::unchecked_downcast_borrow_from(obj) })
|
||||
} else {
|
||||
Err(cpython::PythonObjectDowncastError::new(py,
|
||||
"TokenizerError",
|
||||
TokenizerError::type_object(py),
|
||||
Err(cpython::PythonObjectDowncastError::new(
|
||||
py,
|
||||
"SyntaxError",
|
||||
SyntaxError::type_object(py),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl cpython::PythonObjectWithTypeObject for TokenizerError {
|
||||
impl cpython::PythonObjectWithTypeObject for SyntaxError {
|
||||
#[inline]
|
||||
fn type_object(py: Python) -> PyType {
|
||||
unsafe {
|
||||
static mut TYPE_OBJECT: *mut cpython::_detail::ffi::PyTypeObject
|
||||
= 0 as *mut cpython::_detail::ffi::PyTypeObject;
|
||||
static mut TYPE_OBJECT: *mut cpython::_detail::ffi::PyTypeObject =
|
||||
0 as *mut cpython::_detail::ffi::PyTypeObject;
|
||||
|
||||
if TYPE_OBJECT.is_null() {
|
||||
TYPE_OBJECT = PyErr::new_type(
|
||||
py,
|
||||
"edb._edgeql_parser.TokenizerError",
|
||||
"edb._edgeql_parser.SyntaxError",
|
||||
Some(PythonObject::into_object(py.get_type::<Exception>())),
|
||||
None).as_type_ptr();
|
||||
None,
|
||||
)
|
||||
.as_type_ptr();
|
||||
}
|
||||
|
||||
PyType::from_type_ptr(py, TYPE_OBJECT)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
py_class!(pub class ParserResult |py| {
|
||||
data _out: PyObject;
|
||||
data _errors: PyList;
|
||||
|
||||
def out(&self) -> PyResult<PyObject> {
|
||||
Ok(self._out(py).clone_ref(py))
|
||||
}
|
||||
def errors(&self) -> PyResult<PyList> {
|
||||
Ok(self._errors(py).clone_ref(py))
|
||||
}
|
||||
});
|
||||
|
||||
pub fn parser_error_into_tuple(py: Python, error: Error) -> PyObject {
|
||||
(error.message, (error.span.start, error.span.end))
|
||||
.into_py_object(py)
|
||||
.into_object()
|
||||
}
|
||||
|
|
|
@ -5,8 +5,7 @@ use cpython::exc::RuntimeError;
|
|||
|
||||
use edgeql_parser::hash;
|
||||
|
||||
use crate::errors::TokenizerError;
|
||||
use crate::pynormalize::py_pos;
|
||||
use crate::errors::SyntaxError;
|
||||
|
||||
|
||||
py_class!(pub class Hasher |py| {
|
||||
|
@ -26,7 +25,7 @@ py_class!(pub class Hasher |py| {
|
|||
hasher.add_source(&text)
|
||||
.map_err(|e| match e {
|
||||
hash::Error::Tokenizer(msg, pos) => {
|
||||
TokenizerError::new(py, (msg, py_pos(py, &pos)))
|
||||
SyntaxError::new(py, (msg, (pos.offset, py.None())))
|
||||
}
|
||||
})?;
|
||||
Ok(py.None())
|
||||
|
|
|
@ -1,20 +1,22 @@
|
|||
#[macro_use]
|
||||
extern crate cpython;
|
||||
|
||||
use cpython::PyString;
|
||||
use cpython::{PyObject, PyString};
|
||||
|
||||
mod errors;
|
||||
mod hash;
|
||||
mod keywords;
|
||||
pub mod normalize;
|
||||
mod parser;
|
||||
mod position;
|
||||
mod pynormalize;
|
||||
mod tokenizer;
|
||||
|
||||
use errors::TokenizerError;
|
||||
use errors::{SyntaxError, ParserResult};
|
||||
use parser::{parse, CSTNode, Production};
|
||||
use position::{offset_of_line, SourcePoint};
|
||||
use pynormalize::normalize;
|
||||
use tokenizer::{get_unpickle_fn, tokenize, Token};
|
||||
use tokenizer::{get_fn_unpickle_token, tokenize, OpaqueToken};
|
||||
|
||||
py_module_initializer!(
|
||||
_edgeql_parser,
|
||||
|
@ -22,6 +24,7 @@ py_module_initializer!(
|
|||
PyInit__edgeql_parser,
|
||||
|py, m| {
|
||||
tokenizer::init_module(py);
|
||||
parser::init_module();
|
||||
let keywords = keywords::get_keywords(py)?;
|
||||
m.add(
|
||||
py,
|
||||
|
@ -30,9 +33,10 @@ py_module_initializer!(
|
|||
)?;
|
||||
|
||||
m.add(py, "tokenize", py_fn!(py, tokenize(data: &PyString)))?;
|
||||
m.add(py, "_unpickle_token", get_unpickle_fn(py))?;
|
||||
m.add(py, "Token", py.get_type::<Token>())?;
|
||||
m.add(py, "TokenizerError", py.get_type::<TokenizerError>())?;
|
||||
m.add(py, "_unpickle_token", get_fn_unpickle_token(py))?;
|
||||
m.add(py, "Token", py.get_type::<OpaqueToken>())?;
|
||||
m.add(py, "SyntaxError", py.get_type::<SyntaxError>())?;
|
||||
m.add(py, "ParserResult", py.get_type::<ParserResult>())?;
|
||||
m.add(py, "Entry", py.get_type::<pynormalize::Entry>())?;
|
||||
m.add(py, "SourcePoint", py.get_type::<SourcePoint>())?;
|
||||
m.add(py, "normalize", py_fn!(py, normalize(query: &PyString)))?;
|
||||
|
@ -46,6 +50,13 @@ py_module_initializer!(
|
|||
m.add(py, "partial_reserved_keywords", keywords.partial)?;
|
||||
m.add(py, "future_reserved_keywords", keywords.future)?;
|
||||
m.add(py, "current_reserved_keywords", keywords.current)?;
|
||||
m.add(
|
||||
py,
|
||||
"parse",
|
||||
py_fn!(py, parse(parser_name: &PyString, data: PyObject)),
|
||||
)?;
|
||||
m.add(py, "CSTNode", py.get_type::<CSTNode>())?;
|
||||
m.add(py, "Production", py.get_type::<Production>())?;
|
||||
Ok(())
|
||||
}
|
||||
);
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use edgeql_parser::tokenizer::{Kind, Tokenizer, Token, Value};
|
||||
use edgeql_parser::keywords::Keyword;
|
||||
use edgeql_parser::position::{Pos, Span};
|
||||
use edgeql_parser::tokenizer::{Kind, Token, Tokenizer, Value};
|
||||
|
||||
use blake2::{Blake2b512, Digest};
|
||||
|
||||
|
@ -10,76 +12,29 @@ pub struct Variable {
|
|||
pub value: Value,
|
||||
}
|
||||
|
||||
pub struct Entry<'a> {
|
||||
pub struct Entry {
|
||||
pub processed_source: String,
|
||||
pub hash: [u8; 64],
|
||||
pub tokens: Vec<Token<'a>>,
|
||||
pub tokens: Vec<Token<'static>>,
|
||||
pub variables: Vec<Vec<Variable>>,
|
||||
pub end_pos: Pos,
|
||||
pub named_args: bool,
|
||||
pub first_arg: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Tokenizer(String, Pos),
|
||||
Tokenizer(String, u64),
|
||||
Assertion(String, Pos),
|
||||
}
|
||||
|
||||
fn push_var<'x>(res: &mut Vec<Token<'x>>, module: &'x str, typ: &'x str,
|
||||
var: String, span: Span)
|
||||
{
|
||||
res.push(Token {kind: Kind::OpenParen, text: "(".into(), span, value: None});
|
||||
res.push(Token {kind: Kind::Less, text: "<".into(), span, value: None});
|
||||
res.push(Token {kind: Kind::Ident, text: module.into(), span, value: None});
|
||||
res.push(Token {kind: Kind::Namespace, text: "::".into(), span, value: None});
|
||||
res.push(Token {kind: Kind::Ident, text: typ.into(), span,
|
||||
value: Some(Value::String(typ.to_string())),
|
||||
});
|
||||
res.push(Token {kind: Kind::Greater, text: ">".into(), span, value: None});
|
||||
res.push(Token {kind: Kind::Argument, text: var.into(), span, value: None});
|
||||
res.push(Token {kind: Kind::CloseParen, text: ")".into(), span, value: None});
|
||||
}
|
||||
|
||||
fn scan_vars<'x, 'y: 'x, I>(tokens: I) -> Option<(bool, usize)>
|
||||
where I: IntoIterator<Item=&'x Token<'y>>,
|
||||
{
|
||||
let mut max_visited = None::<usize>;
|
||||
let mut names = BTreeSet::new();
|
||||
for t in tokens {
|
||||
if t.kind == Kind::Argument {
|
||||
if let Ok(v) = t.text[1..].parse() {
|
||||
if max_visited.map(|old| v > old).unwrap_or(true) {
|
||||
max_visited = Some(v);
|
||||
}
|
||||
} else {
|
||||
names.insert(&t.text[..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if names.is_empty() {
|
||||
let next = max_visited.map(|x| x.checked_add(1)).unwrap_or(Some(0))?;
|
||||
Some((false, next))
|
||||
} else if max_visited.is_some() {
|
||||
return None // mixed arguments
|
||||
} else {
|
||||
Some((true, names.len()))
|
||||
}
|
||||
}
|
||||
|
||||
fn hash(text: &str) -> [u8; 64] {
|
||||
let mut result = [0u8; 64];
|
||||
result.copy_from_slice(&Blake2b512::new_with_prefix(text.as_bytes())
|
||||
.finalize());
|
||||
return result;
|
||||
}
|
||||
|
||||
pub fn normalize(text: &str) -> Result<Entry, Error> {
|
||||
let mut token_stream = Tokenizer::new(text).validated_values();
|
||||
let tokens = (&mut token_stream)
|
||||
let tokens = Tokenizer::new(text)
|
||||
.validated_values()
|
||||
.with_eof()
|
||||
.map(|x| x.map(|t| t.cloned()))
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.map_err(|e| Error::Tokenizer(e.message, e.span.start))?;
|
||||
let end_pos = token_stream.current_pos();
|
||||
|
||||
let (named_args, var_idx) = match scan_vars(&tokens) {
|
||||
Some(pair) => pair,
|
||||
None => {
|
||||
|
@ -90,7 +45,6 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
|
|||
processed_source,
|
||||
tokens,
|
||||
variables: Vec::new(),
|
||||
end_pos,
|
||||
named_args: false,
|
||||
first_arg: None,
|
||||
});
|
||||
|
@ -110,7 +64,7 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
|
|||
}
|
||||
};
|
||||
let mut last_was_set = false;
|
||||
for (idx, tok) in tokens.iter().enumerate() {
|
||||
for tok in &tokens {
|
||||
let mut is_set = false;
|
||||
match tok.kind {
|
||||
Kind::IntConst
|
||||
|
@ -120,69 +74,64 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
|
|||
// Don't replace 'LIMIT 1' as a special case
|
||||
&& (tok.text != "1"
|
||||
|| !matches!(rewritten_tokens.last(),
|
||||
Some(Token { kind: Kind::Keyword, ref text, .. })
|
||||
if text.eq_ignore_ascii_case("LIMIT")))
|
||||
Some(Token { kind: Kind::Keyword(Keyword("limit")), .. })))
|
||||
&& tok.text != "9223372036854775808"
|
||||
=> {
|
||||
push_var(&mut rewritten_tokens, "__std__", "int64",
|
||||
rewritten_tokens.extend(arg_type_cast( "__std__", "int64",
|
||||
next_var(),
|
||||
tok.span);
|
||||
tok.span));
|
||||
variables.push(Variable {
|
||||
value: tok.value.clone().unwrap(),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
Kind::FloatConst => {
|
||||
push_var(&mut rewritten_tokens, "__std__", "float64",
|
||||
rewritten_tokens.extend(arg_type_cast( "__std__", "float64",
|
||||
next_var(),
|
||||
tok.span);
|
||||
tok.span));
|
||||
variables.push(Variable {
|
||||
value: tok.value.clone().unwrap(),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
Kind::BigIntConst => {
|
||||
push_var(&mut rewritten_tokens, "__std__", "bigint",
|
||||
rewritten_tokens.extend(arg_type_cast( "__std__", "bigint",
|
||||
next_var(),
|
||||
tok.span);
|
||||
tok.span));
|
||||
variables.push(Variable {
|
||||
value: tok.value.clone().unwrap(),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
Kind::DecimalConst => {
|
||||
push_var(&mut rewritten_tokens, "__std__", "decimal",
|
||||
rewritten_tokens.extend(arg_type_cast( "__std__", "decimal",
|
||||
next_var(),
|
||||
tok.span);
|
||||
tok.span));
|
||||
variables.push(Variable {
|
||||
value: tok.value.clone().unwrap(),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
Kind::Str => {
|
||||
push_var(&mut rewritten_tokens, "__std__", "str",
|
||||
rewritten_tokens.extend(arg_type_cast( "__std__", "str",
|
||||
next_var(),
|
||||
tok.span);
|
||||
tok.span));
|
||||
variables.push(Variable {
|
||||
value: tok.value.clone().unwrap(),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
Kind::Keyword
|
||||
if (matches!(&(&tok.text[..].to_uppercase())[..],
|
||||
"CONFIGURE"|"CREATE"|"ALTER"|"DROP"|"START"|"ANALYZE")
|
||||
|| (last_was_set &&
|
||||
matches!(&(&tok.text[..].to_uppercase())[..],
|
||||
"GLOBAL"))
|
||||
)
|
||||
=> {
|
||||
Kind::Keyword(Keyword(kw))
|
||||
if (
|
||||
matches!(kw, "configure"|"create"|"alter"|"drop"|"start"|"analyze")
|
||||
|| (last_was_set && kw == "global")
|
||||
) => {
|
||||
let processed_source = serialize_tokens(&tokens);
|
||||
return Ok(Entry {
|
||||
hash: hash(&processed_source),
|
||||
processed_source,
|
||||
tokens,
|
||||
variables: Vec::new(),
|
||||
end_pos,
|
||||
named_args: false,
|
||||
first_arg: None,
|
||||
});
|
||||
|
@ -192,14 +141,11 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
|
|||
// because the only statements with internal semis are DDL
|
||||
// statements, which we don't support anyway.
|
||||
Kind::Semicolon => {
|
||||
if idx + 1 < tokens.len() {
|
||||
all_variables.push(variables);
|
||||
variables = Vec::new();
|
||||
}
|
||||
all_variables.push(variables);
|
||||
variables = Vec::new();
|
||||
rewritten_tokens.push(tok.clone());
|
||||
}
|
||||
Kind::Keyword
|
||||
if (matches!(&(&tok.text[..].to_uppercase())[..], "SET")) => {
|
||||
Kind::Keyword(Keyword("set")) => {
|
||||
is_set = true;
|
||||
rewritten_tokens.push(tok.clone());
|
||||
}
|
||||
|
@ -214,75 +160,39 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
|
|||
hash: hash(&processed_source),
|
||||
processed_source,
|
||||
named_args,
|
||||
first_arg: if counter <= var_idx { None } else { Some(var_idx) },
|
||||
first_arg: if counter <= var_idx {
|
||||
None
|
||||
} else {
|
||||
Some(var_idx)
|
||||
},
|
||||
tokens: rewritten_tokens,
|
||||
variables: all_variables,
|
||||
end_pos,
|
||||
});
|
||||
}
|
||||
|
||||
fn is_operator(token: &Token) -> bool {
|
||||
use edgeql_parser::tokenizer::Kind::*;
|
||||
match token.kind {
|
||||
| Assign
|
||||
| SubAssign
|
||||
| AddAssign
|
||||
| Arrow
|
||||
| Coalesce
|
||||
| Namespace
|
||||
| DoubleSplat
|
||||
| BackwardLink
|
||||
| FloorDiv
|
||||
| Concat
|
||||
| GreaterEq
|
||||
| LessEq
|
||||
| NotEq
|
||||
| NotDistinctFrom
|
||||
| DistinctFrom
|
||||
| Comma
|
||||
| OpenParen
|
||||
| CloseParen
|
||||
| OpenBracket
|
||||
| CloseBracket
|
||||
| OpenBrace
|
||||
| CloseBrace
|
||||
| Dot
|
||||
| Semicolon
|
||||
| Colon
|
||||
| Add
|
||||
| Sub
|
||||
| Mul
|
||||
| Div
|
||||
| Modulo
|
||||
| Pow
|
||||
| Less
|
||||
| Greater
|
||||
| Eq
|
||||
| Ampersand
|
||||
| Pipe
|
||||
| At
|
||||
=> true,
|
||||
| DecimalConst
|
||||
| FloatConst
|
||||
| IntConst
|
||||
| BigIntConst
|
||||
| BinStr
|
||||
| Argument
|
||||
| Str
|
||||
| BacktickName
|
||||
| Keyword
|
||||
| Ident
|
||||
| Substitution
|
||||
=> false,
|
||||
Assign | SubAssign | AddAssign | Arrow | Coalesce | Namespace | DoubleSplat
|
||||
| BackwardLink | FloorDiv | Concat | GreaterEq | LessEq | NotEq | NotDistinctFrom
|
||||
| DistinctFrom | Comma | OpenParen | CloseParen | OpenBracket | CloseBracket
|
||||
| OpenBrace | CloseBrace | Dot | Semicolon | Colon | Add | Sub | Mul | Div | Modulo
|
||||
| Pow | Less | Greater | Eq | Ampersand | Pipe | At => true,
|
||||
DecimalConst | FloatConst | IntConst | BigIntConst | BinStr | Argument | Str
|
||||
| BacktickName | Keyword(_) | Ident | Substitution | EOF | EOI | Epsilon => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_tokens(tokens: &[Token<'_>]) -> String {
|
||||
fn serialize_tokens(tokens: &[Token]) -> String {
|
||||
use edgeql_parser::tokenizer::Kind::Argument;
|
||||
|
||||
let mut buf = String::new();
|
||||
let mut needs_space = false;
|
||||
for token in tokens {
|
||||
if matches!(token.kind, Kind::EOF | Kind::EOI) {
|
||||
break;
|
||||
}
|
||||
|
||||
if needs_space && !is_operator(token) && token.kind != Argument {
|
||||
buf.push(' ');
|
||||
}
|
||||
|
@ -292,12 +202,78 @@ fn serialize_tokens(tokens: &[Token<'_>]) -> String {
|
|||
return buf;
|
||||
}
|
||||
|
||||
fn scan_vars<'x, 'y: 'x, I>(tokens: I) -> Option<(bool, usize)>
|
||||
where
|
||||
I: IntoIterator<Item = &'x Token<'x>>,
|
||||
{
|
||||
let mut max_visited = None::<usize>;
|
||||
let mut names = BTreeSet::new();
|
||||
for t in tokens {
|
||||
if t.kind == Kind::Argument {
|
||||
if let Ok(v) = t.text[1..].parse() {
|
||||
if max_visited.map(|old| v > old).unwrap_or(true) {
|
||||
max_visited = Some(v);
|
||||
}
|
||||
} else {
|
||||
names.insert(&t.text[..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if names.is_empty() {
|
||||
let next = max_visited.map(|x| x.checked_add(1)).unwrap_or(Some(0))?;
|
||||
Some((false, next))
|
||||
} else if max_visited.is_some() {
|
||||
return None; // mixed arguments
|
||||
} else {
|
||||
Some((true, names.len()))
|
||||
}
|
||||
}
|
||||
|
||||
fn hash(text: &str) -> [u8; 64] {
|
||||
let mut result = [0u8; 64];
|
||||
result.copy_from_slice(&Blake2b512::new_with_prefix(text.as_bytes()).finalize());
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Produces tokens corresponding to (<module::typ>$var)
|
||||
fn arg_type_cast(
|
||||
module: &'static str,
|
||||
typ: &'static str,
|
||||
var: String,
|
||||
span: Span,
|
||||
) -> [Token<'static>; 8] {
|
||||
fn tk(kind: Kind, text: Cow<'_, str>, span: Span) -> Token {
|
||||
let value = if kind == Kind::Ident {
|
||||
Some(Value::String(text.to_string()))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
Token {
|
||||
kind,
|
||||
text,
|
||||
value,
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
[
|
||||
tk(Kind::OpenParen, "(".into(), span),
|
||||
tk(Kind::Less, "<".into(), span),
|
||||
tk(Kind::Ident, module.into(), span),
|
||||
tk(Kind::Namespace, "::".into(), span),
|
||||
tk(Kind::Ident, typ.into(), span),
|
||||
tk(Kind::Greater, ">".into(), span),
|
||||
tk(Kind::Argument, var.into(), span),
|
||||
tk(Kind::CloseParen, ")".into(), span),
|
||||
]
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::scan_vars;
|
||||
use edgeql_parser::tokenizer::{Token, Tokenizer};
|
||||
|
||||
fn tokenize<'x>(s: &'x str) -> Vec<Token<'x>> {
|
||||
fn tokenize<'x>(s: &'x str) -> Vec<Token> {
|
||||
let mut r = Vec::new();
|
||||
let mut s = Tokenizer::new(s);
|
||||
loop {
|
||||
|
@ -328,8 +304,10 @@ mod test {
|
|||
assert_eq!(scan_vars(&tokenize("$a")).unwrap(), (true, 1));
|
||||
assert_eq!(scan_vars(&tokenize("$b $c $d")).unwrap(), (true, 3));
|
||||
assert_eq!(scan_vars(&tokenize("$b $c $b")).unwrap(), (true, 2));
|
||||
assert_eq!(scan_vars(&tokenize("$a $b $b $a $c $xx")).unwrap(),
|
||||
(true, 4));
|
||||
assert_eq!(
|
||||
scan_vars(&tokenize("$a $b $b $a $c $xx")).unwrap(),
|
||||
(true, 4)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -339,5 +317,4 @@ mod test {
|
|||
assert_eq!(scan_vars(&tokenize("$b $c $100")), None);
|
||||
assert_eq!(scan_vars(&tokenize("$10 $xx $yy")), None);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
163
edb/edgeql-parser/edgeql-parser-python/src/parser.rs
Normal file
163
edb/edgeql-parser/edgeql-parser-python/src/parser.rs
Normal file
|
@ -0,0 +1,163 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use cpython::{
|
||||
ObjectProtocol, PyClone, PyInt, PyList, PyObject, PyResult, PyString, PyTuple, Python,
|
||||
PythonObject, PythonObjectWithCheckedDowncast, ToPyObject,
|
||||
};
|
||||
|
||||
use edgeql_parser::parser;
|
||||
|
||||
use crate::errors::{parser_error_into_tuple, ParserResult};
|
||||
use crate::pynormalize::value_to_py_object;
|
||||
use crate::tokenizer::OpaqueToken;
|
||||
|
||||
pub fn parse(py: Python, parser_name: &PyString, tokens: PyObject) -> PyResult<PyTuple> {
|
||||
let (spec, productions) = load_spec(py, parser_name.to_string(py)?.as_ref())?;
|
||||
|
||||
let tokens = downcast_tokens(py, tokens)?;
|
||||
|
||||
let context = parser::Context::new(spec);
|
||||
let (cst, errors) = parser::parse(&tokens, &context);
|
||||
|
||||
let cst = cst.map(|c| to_py_cst(c, py)).transpose()?;
|
||||
|
||||
let errors = errors
|
||||
.into_iter()
|
||||
.map(|e| parser_error_into_tuple(py, e))
|
||||
.collect::<Vec<_>>();
|
||||
let errors = PyList::new(py, &errors);
|
||||
|
||||
let res = ParserResult::create_instance(py, cst.into_py_object(py), errors)?;
|
||||
|
||||
Ok((res, productions).into_py_object(py))
|
||||
}
|
||||
|
||||
py_class!(pub class CSTNode |py| {
|
||||
data _production: PyObject;
|
||||
data _terminal: PyObject;
|
||||
|
||||
def production(&self) -> PyResult<PyObject> {
|
||||
Ok(self._production(py).clone_ref(py))
|
||||
}
|
||||
def terminal(&self) -> PyResult<PyObject> {
|
||||
Ok(self._terminal(py).clone_ref(py))
|
||||
}
|
||||
});
|
||||
|
||||
py_class!(pub class Production |py| {
|
||||
data _id: PyInt;
|
||||
data _args: PyList;
|
||||
|
||||
def id(&self) -> PyResult<PyInt> {
|
||||
Ok(self._id(py).clone_ref(py))
|
||||
}
|
||||
def args(&self) -> PyResult<PyList> {
|
||||
Ok(self._args(py).clone_ref(py))
|
||||
}
|
||||
});
|
||||
|
||||
py_class!(pub class Terminal |py| {
|
||||
data _text: PyString;
|
||||
data _value: PyObject;
|
||||
data _start: u64;
|
||||
data _end: u64;
|
||||
|
||||
def text(&self) -> PyResult<PyString> {
|
||||
Ok(self._text(py).clone_ref(py))
|
||||
}
|
||||
def value(&self) -> PyResult<PyObject> {
|
||||
Ok(self._value(py).clone_ref(py))
|
||||
}
|
||||
def start(&self) -> PyResult<u64> {
|
||||
Ok(*self._start(py))
|
||||
}
|
||||
def end(&self) -> PyResult<u64> {
|
||||
Ok(*self._end(py))
|
||||
}
|
||||
});
|
||||
|
||||
static mut PARSER_SPECS: Option<HashMap<String, (parser::Spec, PyObject)>> = None;
|
||||
|
||||
pub fn init_module() {
|
||||
unsafe {
|
||||
PARSER_SPECS = Some(HashMap::new());
|
||||
}
|
||||
}
|
||||
|
||||
fn downcast_tokens<'a>(py: Python, token_list: PyObject) -> PyResult<Vec<parser::Terminal>> {
|
||||
let tokens = PyList::downcast_from(py, token_list)?;
|
||||
|
||||
let mut buf = Vec::with_capacity(tokens.len(py));
|
||||
for token in tokens.iter(py) {
|
||||
let token = OpaqueToken::downcast_from(py, token)?;
|
||||
let token = token.inner(py);
|
||||
|
||||
buf.push(parser::Terminal::from_token(token));
|
||||
}
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
fn load_spec(py: Python, parser_name: &str) -> PyResult<&'static (parser::Spec, PyObject)> {
|
||||
let parser_specs = unsafe { PARSER_SPECS.as_mut().unwrap() };
|
||||
if !parser_specs.contains_key(parser_name) {
|
||||
let parser_mod = py.import("edb.edgeql.parser.parser")?;
|
||||
|
||||
let process_spec = py.import("edb.edgeql.parser")?.get(py, "process_spec")?;
|
||||
|
||||
let parser_cls = parser_mod.get(py, parser_name)?;
|
||||
let parser = parser_cls.call(py, PyTuple::new(py, &[]), None)?;
|
||||
|
||||
let res = process_spec.call(py, (parser,), None)?;
|
||||
let res = PyTuple::downcast_from(py, res)?;
|
||||
|
||||
let spec_json = PyString::downcast_from(py, res.get_item(py, 0))?;
|
||||
let spec_json = spec_json.to_string(py).unwrap();
|
||||
|
||||
let productions = res.get_item(py, 1);
|
||||
|
||||
let spec = parser::Spec::from_json(&spec_json).unwrap();
|
||||
|
||||
parser_specs.insert(parser_name.to_string(), (spec, productions));
|
||||
}
|
||||
|
||||
Ok(unsafe { PARSER_SPECS.as_ref().unwrap().get(parser_name).unwrap() })
|
||||
}
|
||||
|
||||
fn to_py_cst<'a>(cst: &'a parser::CSTNode<'a>, py: Python) -> PyResult<CSTNode> {
|
||||
match cst {
|
||||
parser::CSTNode::Empty => CSTNode::create_instance(py, py.None(), py.None()),
|
||||
parser::CSTNode::Terminal(token) => CSTNode::create_instance(
|
||||
py,
|
||||
py.None(),
|
||||
Terminal::create_instance(
|
||||
py,
|
||||
token.text.to_py_object(py),
|
||||
if let Some(val) = &token.value {
|
||||
value_to_py_object(py, val)?
|
||||
} else {
|
||||
py.None()
|
||||
},
|
||||
token.span.start,
|
||||
token.span.end,
|
||||
)?
|
||||
.into_object(),
|
||||
),
|
||||
parser::CSTNode::Production(prod) => CSTNode::create_instance(
|
||||
py,
|
||||
Production::create_instance(
|
||||
py,
|
||||
prod.id.into_py_object(py),
|
||||
PyList::new(
|
||||
py,
|
||||
prod.args
|
||||
.iter()
|
||||
.map(|a| to_py_cst(a, py).map(|x| x.into_object()))
|
||||
.collect::<PyResult<Vec<_>>>()?
|
||||
.as_slice(),
|
||||
),
|
||||
)?
|
||||
.into_object(),
|
||||
py.None(),
|
||||
),
|
||||
}
|
||||
}
|
|
@ -1,19 +1,19 @@
|
|||
use std::convert::TryFrom;
|
||||
|
||||
use bigdecimal::Num;
|
||||
use cpython::exc::AssertionError;
|
||||
use cpython::{PyBytes, PyErr, PyInt, PyTuple, PythonObject, ToPyObject};
|
||||
use cpython::{PyBytes, PyErr, PyInt, PythonObject, ToPyObject};
|
||||
use cpython::{PyClone, PyDict, PyList, PyResult, PyString, Python};
|
||||
use cpython::{PyFloat, PyObject};
|
||||
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
use edgedb_protocol::codec;
|
||||
use edgedb_protocol::model::{BigInt, Decimal};
|
||||
use edgeql_parser::position::Pos;
|
||||
use edgeql_parser::tokenizer::Value;
|
||||
|
||||
use crate::errors::TokenizerError;
|
||||
use crate::errors::SyntaxError;
|
||||
use crate::normalize::{normalize as _normalize, Error, Variable};
|
||||
use crate::tokenizer::convert_tokens;
|
||||
use crate::tokenizer::tokens_to_py;
|
||||
|
||||
py_class!(pub class Entry |py| {
|
||||
data _key: PyBytes;
|
||||
|
@ -60,10 +60,6 @@ py_class!(pub class Entry |py| {
|
|||
}
|
||||
});
|
||||
|
||||
pub fn py_pos(py: Python, pos: &Pos) -> PyTuple {
|
||||
(pos.line, pos.column, pos.offset).to_py_object(py)
|
||||
}
|
||||
|
||||
pub fn serialize_extra(variables: &[Variable]) -> Result<Bytes, String> {
|
||||
use edgedb_protocol::codec::Codec;
|
||||
use edgedb_protocol::value::Value as P;
|
||||
|
@ -91,8 +87,15 @@ pub fn serialize_extra(variables: &[Variable]) -> Result<Bytes, String> {
|
|||
.map_err(|e| format!("float cannot be encoded: {}", e))?;
|
||||
}
|
||||
Value::BigInt(ref v) => {
|
||||
let val = BigInt::try_from(v.clone())
|
||||
.map_err(|e| format!("bigint cannot be encoded: {}", e))?;
|
||||
// We have two different versions of BigInt implementations here.
|
||||
// We have to use bigdecimal::num_bigint::BigInt because it can parse with radix 16.
|
||||
|
||||
let val = bigdecimal::num_bigint::BigInt::from_str_radix(v, 16)
|
||||
.map_err(|e| format!("bigint cannot be encoded: {}", e))
|
||||
.and_then(|x| {
|
||||
BigInt::try_from(x).map_err(|e| format!("bigint cannot be encoded: {}", e))
|
||||
})?;
|
||||
|
||||
codec::BigInt
|
||||
.encode(&mut buf, &P::BigInt(val))
|
||||
.map_err(|e| format!("bigint cannot be encoded: {}", e))?;
|
||||
|
@ -145,7 +148,7 @@ pub fn normalize(py: Python<'_>, text: &PyString) -> PyResult<Entry> {
|
|||
py,
|
||||
/* key: */ PyBytes::new(py, &entry.hash[..]),
|
||||
/* processed_source: */ entry.processed_source,
|
||||
/* tokens: */ convert_tokens(py, entry.tokens, entry.end_pos)?,
|
||||
/* tokens: */ tokens_to_py(py, entry.tokens)?,
|
||||
/* extra_blobs: */ blobs,
|
||||
/* extra_named: */ entry.named_args,
|
||||
/* first_extra: */ entry.first_arg,
|
||||
|
@ -154,7 +157,7 @@ pub fn normalize(py: Python<'_>, text: &PyString) -> PyResult<Entry> {
|
|||
)?)
|
||||
}
|
||||
Err(Error::Tokenizer(msg, pos)) => {
|
||||
return Err(TokenizerError::new(py, (msg, py_pos(py, &pos))))
|
||||
return Err(SyntaxError::new(py, (msg, (pos, py.None()))))
|
||||
}
|
||||
Err(Error::Assertion(msg, pos)) => {
|
||||
return Err(PyErr::new::<AssertionError, _>(
|
||||
|
@ -170,10 +173,9 @@ pub fn value_to_py_object(py: Python, val: &Value) -> PyResult<PyObject> {
|
|||
Value::Int(v) => v.to_py_object(py).into_object(),
|
||||
Value::String(v) => v.to_py_object(py).into_object(),
|
||||
Value::Float(v) => v.to_py_object(py).into_object(),
|
||||
Value::BigInt(v) => {
|
||||
py.get_type::<PyInt>()
|
||||
.call(py, (v.to_str_radix(16), 16.to_py_object(py)), None)?
|
||||
}
|
||||
Value::BigInt(v) => py
|
||||
.get_type::<PyInt>()
|
||||
.call(py, (v, 16.to_py_object(py)), None)?,
|
||||
Value::Decimal(v) => py.get_type::<PyFloat>().call(py, (v.to_string(),), None)?,
|
||||
Value::Bytes(v) => PyBytes::new(py, v).into_object(),
|
||||
})
|
||||
|
|
|
@ -1,572 +1,94 @@
|
|||
use std::collections::HashMap;
|
||||
use cpython::{PyBytes, PyClone, PyResult, PyString, Python, PythonObject};
|
||||
use cpython::{PyList, PyObject, PyTuple, ToPyObject};
|
||||
|
||||
use cpython::{PyString, PyResult, Python, PyClone, PythonObject};
|
||||
use cpython::{PyTuple, PyList, PyObject, ToPyObject, ObjectProtocol};
|
||||
use cpython::{FromPyObject};
|
||||
use edgeql_parser::tokenizer::{Token, Tokenizer};
|
||||
|
||||
use edgeql_parser::tokenizer::{Kind, is_keyword, Tokenizer, Token as PToken};
|
||||
use edgeql_parser::tokenizer::{MAX_KEYWORD_LENGTH};
|
||||
use edgeql_parser::position::Pos;
|
||||
use edgeql_parser::keywords::{PARTIAL_RESERVED_KEYWORDS, UNRESERVED_KEYWORDS};
|
||||
use edgeql_parser::keywords::{CURRENT_RESERVED_KEYWORDS};
|
||||
use edgeql_parser::keywords::{FUTURE_RESERVED_KEYWORDS};
|
||||
use crate::errors::{parser_error_into_tuple, ParserResult};
|
||||
|
||||
use crate::errors::TokenizerError;
|
||||
use crate::pynormalize::{py_pos, value_to_py_object};
|
||||
pub fn tokenize(py: Python, s: &PyString) -> PyResult<ParserResult> {
|
||||
let data = s.to_string(py)?;
|
||||
|
||||
static mut TOKENS: Option<Tokens> = None;
|
||||
let mut token_stream = Tokenizer::new(&data[..]).validated_values().with_eof();
|
||||
|
||||
let mut tokens: Vec<_> = Vec::new();
|
||||
let mut errors: Vec<_> = Vec::new();
|
||||
|
||||
fn rs_pos(py: Python, value: &PyObject) -> PyResult<Pos> {
|
||||
let (line, column, offset) = FromPyObject::extract(py, value)?;
|
||||
Ok(Pos { line, column, offset })
|
||||
for res in &mut token_stream {
|
||||
match res {
|
||||
Ok(token) => tokens.push(token),
|
||||
Err(e) => {
|
||||
errors.push(parser_error_into_tuple(py, e));
|
||||
|
||||
// TODO: fix tokenizer to skip bad tokens and continue
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let tokens = tokens_to_py(py, tokens)?;
|
||||
|
||||
let errors = PyList::new(py, errors.as_slice()).to_py_object(py);
|
||||
|
||||
ParserResult::create_instance(py, tokens.into_object(), errors)
|
||||
}
|
||||
|
||||
py_class!(pub class Token |py| {
|
||||
data _kind: PyString;
|
||||
data _text: PyString;
|
||||
data _value: PyObject;
|
||||
data _start: Pos;
|
||||
data _end: Pos;
|
||||
def kind(&self) -> PyResult<PyString> {
|
||||
Ok(self._kind(py).clone_ref(py))
|
||||
}
|
||||
def text(&self) -> PyResult<PyString> {
|
||||
Ok(self._text(py).clone_ref(py))
|
||||
}
|
||||
def value(&self) -> PyResult<PyObject> {
|
||||
Ok(self._value(py).clone_ref(py))
|
||||
}
|
||||
def start(&self) -> PyResult<PyTuple> {
|
||||
Ok(py_pos(py, self._start(py)))
|
||||
}
|
||||
def end(&self) -> PyResult<PyTuple> {
|
||||
Ok(py_pos(py, self._end(py)))
|
||||
}
|
||||
// An opaque wrapper around [edgeql_parser::tokenizer::Token].
|
||||
// Supports Python pickle serialization.
|
||||
py_class!(pub class OpaqueToken |py| {
|
||||
data _inner: Token<'static>;
|
||||
|
||||
def __repr__(&self) -> PyResult<PyString> {
|
||||
let val = self._value(py);
|
||||
let s = if *val == py.None() {
|
||||
format!("<Token {}>", self._kind(py).to_string(py)?)
|
||||
} else {
|
||||
format!("<Token {} {}>",
|
||||
self._kind(py).to_string(py)?,
|
||||
val.repr(py)?.to_string(py)?)
|
||||
};
|
||||
Ok(PyString::new(py, &s))
|
||||
Ok(PyString::new(py, &self._inner(py).to_string()))
|
||||
}
|
||||
def __reduce__(&self) -> PyResult<PyTuple> {
|
||||
let data: Vec<u8> = rmp_serde::to_vec(self._inner(py)).unwrap().to_vec();
|
||||
|
||||
return Ok((
|
||||
get_unpickle_fn(py),
|
||||
get_fn_unpickle_token(py),
|
||||
(
|
||||
self._kind(py),
|
||||
self._text(py),
|
||||
self._value(py),
|
||||
py_pos(py, self._start(py)),
|
||||
py_pos(py, self._end(py)),
|
||||
PyBytes::new(py, &data),
|
||||
),
|
||||
).to_py_object(py))
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
pub struct Tokens {
|
||||
ident: PyString,
|
||||
argument: PyString,
|
||||
eof: PyString,
|
||||
empty: PyString,
|
||||
substitution: PyString,
|
||||
|
||||
named_only: PyString,
|
||||
named_only_val: PyString,
|
||||
set_annotation: PyString,
|
||||
set_annotation_val: PyString,
|
||||
set_type: PyString,
|
||||
set_type_val: PyString,
|
||||
extension_package: PyString,
|
||||
extension_package_val: PyString,
|
||||
order_by: PyString,
|
||||
order_by_val: PyString,
|
||||
|
||||
dot: PyString,
|
||||
backward_link: PyString,
|
||||
open_bracket: PyString,
|
||||
close_bracket: PyString,
|
||||
open_paren: PyString,
|
||||
close_paren: PyString,
|
||||
open_brace: PyString,
|
||||
close_brace: PyString,
|
||||
namespace: PyString,
|
||||
double_splat: PyString,
|
||||
coalesce: PyString,
|
||||
colon: PyString,
|
||||
semicolon: PyString,
|
||||
comma: PyString,
|
||||
add: PyString,
|
||||
concat: PyString,
|
||||
sub: PyString,
|
||||
mul: PyString,
|
||||
div: PyString,
|
||||
floor_div: PyString,
|
||||
modulo: PyString,
|
||||
pow: PyString,
|
||||
less: PyString,
|
||||
greater: PyString,
|
||||
eq: PyString,
|
||||
ampersand: PyString,
|
||||
pipe: PyString,
|
||||
at: PyString,
|
||||
|
||||
iconst: PyString,
|
||||
niconst: PyString,
|
||||
fconst: PyString,
|
||||
nfconst: PyString,
|
||||
bconst: PyString,
|
||||
sconst: PyString,
|
||||
|
||||
greater_eq: PyString,
|
||||
less_eq: PyString,
|
||||
not_eq: PyString,
|
||||
distinct_from: PyString,
|
||||
not_distinct_from: PyString,
|
||||
|
||||
assign: PyString,
|
||||
add_assign: PyString,
|
||||
sub_assign: PyString,
|
||||
arrow: PyString,
|
||||
|
||||
keywords: HashMap<String, TokenInfo>,
|
||||
unpickle_token: PyObject,
|
||||
}
|
||||
|
||||
struct Cache {
|
||||
keyword_buf: String,
|
||||
}
|
||||
|
||||
pub struct TokenInfo {
|
||||
pub kind: Kind,
|
||||
pub name: PyString,
|
||||
pub value: Option<PyString>,
|
||||
}
|
||||
|
||||
pub fn init_module(py: Python) {
|
||||
unsafe {
|
||||
TOKENS = Some(Tokens::new(py))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn _unpickle_token(py: Python,
|
||||
kind: &PyString, text: &PyString, value: &PyObject,
|
||||
start: &PyObject, end: &PyObject)
|
||||
-> PyResult<Token>
|
||||
{
|
||||
// TODO(tailhook) We might some strings from Tokens structure
|
||||
// (i.e. internning them).
|
||||
// But if we're storing a collection of tokens
|
||||
// they will store the tokens only once, so it
|
||||
// doesn't seem to help that much.
|
||||
Token::create_instance(py,
|
||||
kind.clone_ref(py),
|
||||
text.clone_ref(py),
|
||||
value.clone_ref(py),
|
||||
rs_pos(py, start)?,
|
||||
rs_pos(py, end)?)
|
||||
}
|
||||
|
||||
pub fn tokenize(py: Python, s: &PyString) -> PyResult<PyList> {
|
||||
let data = s.to_string(py)?;
|
||||
|
||||
let mut token_stream = Tokenizer::new(&data[..]).validated_values();
|
||||
let rust_tokens: Vec<_> = py.allow_threads(|| {
|
||||
(&mut token_stream).collect::<Result<_, _>>()
|
||||
}).map_err(|e| {
|
||||
TokenizerError::new(py, (e.message, py_pos(py, &e.span.start)))
|
||||
})?;
|
||||
return convert_tokens(py, rust_tokens, token_stream.current_pos());
|
||||
}
|
||||
|
||||
pub fn convert_tokens(py: Python, rust_tokens: Vec<PToken<'_>>,
|
||||
end_pos: Pos)
|
||||
-> PyResult<PyList>
|
||||
{
|
||||
let tokens = unsafe { TOKENS.as_ref().expect("module initialized") };
|
||||
let mut cache = Cache {
|
||||
keyword_buf: String::with_capacity(MAX_KEYWORD_LENGTH),
|
||||
};
|
||||
pub fn tokens_to_py(py: Python, rust_tokens: Vec<Token>) -> PyResult<PyList> {
|
||||
let mut buf = Vec::with_capacity(rust_tokens.len());
|
||||
for tok in rust_tokens {
|
||||
let (kind, text) = get_token_kind_and_name(py, tokens, &mut cache, &tok);
|
||||
let py_tok = OpaqueToken::create_instance(py, tok.cloned())?.into_object();
|
||||
|
||||
let value = tok.value.as_ref()
|
||||
.map(|v| value_to_py_object(py, v)).transpose()?
|
||||
.unwrap_or_else(|| py.None());
|
||||
|
||||
let py_tok = Token::create_instance(
|
||||
py, kind, text, value, tok.span.start, tok.span.end
|
||||
)?;
|
||||
|
||||
buf.push(py_tok.into_object());
|
||||
buf.push(py_tok);
|
||||
}
|
||||
buf.push(Token::create_instance(
|
||||
py,
|
||||
tokens.eof.clone_ref(py),
|
||||
tokens.empty.clone_ref(py),
|
||||
py.None(),
|
||||
end_pos,
|
||||
end_pos
|
||||
)?.into_object()
|
||||
);
|
||||
Ok(PyList::new(py, &buf[..]))
|
||||
}
|
||||
|
||||
impl Tokens {
|
||||
pub fn new(py: Python) -> Tokens {
|
||||
let mut res = Tokens {
|
||||
ident: PyString::new(py, "IDENT"),
|
||||
argument: PyString::new(py, "ARGUMENT"),
|
||||
eof: PyString::new(py, "EOF"),
|
||||
empty: PyString::new(py, ""),
|
||||
substitution: PyString::new(py, "SUBSTITUTION"),
|
||||
named_only: PyString::new(py, "NAMEDONLY"),
|
||||
named_only_val: PyString::new(py, "NAMED ONLY"),
|
||||
set_annotation: PyString::new(py, "SETANNOTATION"),
|
||||
set_annotation_val: PyString::new(py, "SET ANNOTATION"),
|
||||
set_type: PyString::new(py, "SETTYPE"),
|
||||
set_type_val: PyString::new(py, "SET TYPE"),
|
||||
extension_package: PyString::new(py, "EXTENSIONPACKAGE"),
|
||||
extension_package_val: PyString::new(py, "EXTENSION PACKAGE"),
|
||||
order_by: PyString::new(py, "ORDERBY"),
|
||||
order_by_val: PyString::new(py, "ORDER BY"),
|
||||
/// To support pickle serialization of OpaqueTokens, we need to provide a
|
||||
/// deserialization function in __reduce__ methods.
|
||||
/// This function must not be inlined and must be globally accessible.
|
||||
/// To achieve this, we expose it a part of the module definition
|
||||
/// (`_unpickle_token`) and save reference to is in the `FN_UNPICKLE_TOKEN`.
|
||||
///
|
||||
/// A bit hackly, but it works.
|
||||
static mut FN_UNPICKLE_TOKEN: Option<PyObject> = None;
|
||||
|
||||
dot: PyString::new(py, "."),
|
||||
backward_link: PyString::new(py, ".<"),
|
||||
open_bracket: PyString::new(py, "["),
|
||||
close_bracket: PyString::new(py, "]"),
|
||||
open_paren: PyString::new(py, "("),
|
||||
close_paren: PyString::new(py, ")"),
|
||||
open_brace: PyString::new(py, "{"),
|
||||
close_brace: PyString::new(py, "}"),
|
||||
namespace: PyString::new(py, "::"),
|
||||
double_splat: PyString::new(py, "**"),
|
||||
coalesce: PyString::new(py, "??"),
|
||||
colon: PyString::new(py, ":"),
|
||||
semicolon: PyString::new(py, ";"),
|
||||
comma: PyString::new(py, ","),
|
||||
add: PyString::new(py, "+"),
|
||||
concat: PyString::new(py, "++"),
|
||||
sub: PyString::new(py, "-"),
|
||||
mul: PyString::new(py, "*"),
|
||||
div: PyString::new(py, "/"),
|
||||
floor_div: PyString::new(py, "//"),
|
||||
modulo: PyString::new(py, "%"),
|
||||
pow: PyString::new(py, "^"),
|
||||
less: PyString::new(py, "<"),
|
||||
greater: PyString::new(py, ">"),
|
||||
eq: PyString::new(py, "="),
|
||||
ampersand: PyString::new(py, "&"),
|
||||
pipe: PyString::new(py, "|"),
|
||||
at: PyString::new(py, "@"),
|
||||
|
||||
iconst: PyString::new(py, "ICONST"),
|
||||
niconst: PyString::new(py, "NICONST"),
|
||||
fconst: PyString::new(py, "FCONST"),
|
||||
nfconst: PyString::new(py, "NFCONST"),
|
||||
bconst: PyString::new(py, "BCONST"),
|
||||
sconst: PyString::new(py, "SCONST"),
|
||||
|
||||
// as OP
|
||||
greater_eq: PyString::new(py, ">="),
|
||||
less_eq: PyString::new(py, "<="),
|
||||
not_eq: PyString::new(py, "!="),
|
||||
distinct_from: PyString::new(py, "?!="),
|
||||
not_distinct_from: PyString::new(py, "?="),
|
||||
|
||||
assign: PyString::new(py, ":="),
|
||||
add_assign: PyString::new(py, "+="),
|
||||
sub_assign: PyString::new(py, "-="),
|
||||
arrow: PyString::new(py, "->"),
|
||||
|
||||
keywords: HashMap::new(),
|
||||
unpickle_token: py_fn!(py, _unpickle_token(
|
||||
kind: &PyString, text: &PyString, value: &PyObject,
|
||||
start: &PyObject, end: &PyObject)),
|
||||
};
|
||||
// 'EOF'
|
||||
for kw in UNRESERVED_KEYWORDS.iter() {
|
||||
res.add_kw(py, kw);
|
||||
}
|
||||
for kw in PARTIAL_RESERVED_KEYWORDS.iter() {
|
||||
res.add_kw(py, kw);
|
||||
}
|
||||
for kw in CURRENT_RESERVED_KEYWORDS.iter() {
|
||||
res.add_kw(py, kw);
|
||||
}
|
||||
for kw in FUTURE_RESERVED_KEYWORDS.iter() {
|
||||
res.add_kw(py, kw);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
fn add_kw(&mut self, py: Python, name: &str) {
|
||||
let py_name = PyString::new(py, &name.to_ascii_uppercase());
|
||||
let tok_name = if name.starts_with("__") && name.ends_with("__") {
|
||||
format!("DUNDER{}", name[2..name.len()-2].to_ascii_uppercase())
|
||||
.to_py_object(py)
|
||||
} else {
|
||||
py_name.clone_ref(py)
|
||||
};
|
||||
self.keywords.insert(name.into(), TokenInfo {
|
||||
kind: if is_keyword(name) { Kind::Keyword } else { Kind::Ident },
|
||||
name: tok_name,
|
||||
value: None,
|
||||
});
|
||||
pub fn init_module(py: Python) {
|
||||
unsafe {
|
||||
FN_UNPICKLE_TOKEN = Some(py_fn!(py, _unpickle_token(bytes: &PyBytes)));
|
||||
}
|
||||
}
|
||||
|
||||
fn get_token_kind_and_name(
|
||||
py: Python,
|
||||
tokens: &Tokens,
|
||||
cache: &mut Cache,
|
||||
token: &PToken,
|
||||
) -> (PyString, PyString) {
|
||||
use Kind::*;
|
||||
let text = &token.text[..];
|
||||
match token.kind {
|
||||
Assign => (
|
||||
tokens.assign.clone_ref(py),
|
||||
tokens.assign.clone_ref(py),
|
||||
),
|
||||
SubAssign => (
|
||||
tokens.sub_assign.clone_ref(py),
|
||||
tokens.sub_assign.clone_ref(py),
|
||||
),
|
||||
AddAssign => (
|
||||
tokens.add_assign.clone_ref(py),
|
||||
tokens.add_assign.clone_ref(py),
|
||||
),
|
||||
Arrow => (
|
||||
tokens.arrow.clone_ref(py),
|
||||
tokens.arrow.clone_ref(py),
|
||||
),
|
||||
Coalesce => (
|
||||
tokens.coalesce.clone_ref(py),
|
||||
tokens.coalesce.clone_ref(py),
|
||||
),
|
||||
Namespace => (
|
||||
tokens.namespace.clone_ref(py),
|
||||
tokens.namespace.clone_ref(py),
|
||||
),
|
||||
DoubleSplat => (
|
||||
tokens.double_splat.clone_ref(py),
|
||||
tokens.double_splat.clone_ref(py),
|
||||
),
|
||||
BackwardLink => (
|
||||
tokens.backward_link.clone_ref(py),
|
||||
tokens.backward_link.clone_ref(py),
|
||||
),
|
||||
FloorDiv => (
|
||||
tokens.floor_div.clone_ref(py),
|
||||
tokens.floor_div.clone_ref(py),
|
||||
),
|
||||
Concat => (
|
||||
tokens.concat.clone_ref(py),
|
||||
tokens.concat.clone_ref(py),
|
||||
),
|
||||
GreaterEq => (
|
||||
tokens.greater_eq.clone_ref(py),
|
||||
tokens.greater_eq.clone_ref(py),
|
||||
),
|
||||
LessEq => (
|
||||
tokens.less_eq.clone_ref(py),
|
||||
tokens.less_eq.clone_ref(py),
|
||||
),
|
||||
NotEq => (
|
||||
tokens.not_eq.clone_ref(py),
|
||||
tokens.not_eq.clone_ref(py),
|
||||
),
|
||||
NotDistinctFrom => (
|
||||
tokens.not_distinct_from.clone_ref(py),
|
||||
tokens.not_distinct_from.clone_ref(py),
|
||||
),
|
||||
DistinctFrom => (
|
||||
tokens.distinct_from.clone_ref(py),
|
||||
tokens.distinct_from.clone_ref(py),
|
||||
),
|
||||
Comma => (
|
||||
tokens.comma.clone_ref(py),
|
||||
tokens.comma.clone_ref(py),
|
||||
),
|
||||
OpenParen => (
|
||||
tokens.open_paren.clone_ref(py),
|
||||
tokens.open_paren.clone_ref(py),
|
||||
),
|
||||
CloseParen => (
|
||||
tokens.close_paren.clone_ref(py),
|
||||
tokens.close_paren.clone_ref(py),
|
||||
),
|
||||
OpenBracket => (
|
||||
tokens.open_bracket.clone_ref(py),
|
||||
tokens.open_bracket.clone_ref(py),
|
||||
),
|
||||
CloseBracket => (
|
||||
tokens.close_bracket.clone_ref(py),
|
||||
tokens.close_bracket.clone_ref(py),
|
||||
),
|
||||
OpenBrace => (
|
||||
tokens.open_brace.clone_ref(py),
|
||||
tokens.open_brace.clone_ref(py),
|
||||
),
|
||||
CloseBrace => (
|
||||
tokens.close_brace.clone_ref(py),
|
||||
tokens.close_brace.clone_ref(py),
|
||||
),
|
||||
Dot => (
|
||||
tokens.dot.clone_ref(py),
|
||||
tokens.dot.clone_ref(py),
|
||||
),
|
||||
Semicolon => (
|
||||
tokens.semicolon.clone_ref(py),
|
||||
tokens.semicolon.clone_ref(py),
|
||||
),
|
||||
Colon => (
|
||||
tokens.colon.clone_ref(py),
|
||||
tokens.colon.clone_ref(py),
|
||||
),
|
||||
Add => (
|
||||
tokens.add.clone_ref(py),
|
||||
tokens.add.clone_ref(py),
|
||||
),
|
||||
Sub => (
|
||||
tokens.sub.clone_ref(py),
|
||||
tokens.sub.clone_ref(py),
|
||||
),
|
||||
Mul => (
|
||||
tokens.mul.clone_ref(py),
|
||||
tokens.mul.clone_ref(py),
|
||||
),
|
||||
Div => (
|
||||
tokens.div.clone_ref(py),
|
||||
tokens.div.clone_ref(py),
|
||||
),
|
||||
Modulo => (
|
||||
tokens.modulo.clone_ref(py),
|
||||
tokens.modulo.clone_ref(py),
|
||||
),
|
||||
Pow => (
|
||||
tokens.pow.clone_ref(py),
|
||||
tokens.pow.clone_ref(py),
|
||||
),
|
||||
Less => (
|
||||
tokens.less.clone_ref(py),
|
||||
tokens.less.clone_ref(py),
|
||||
),
|
||||
Greater => (
|
||||
tokens.greater.clone_ref(py),
|
||||
tokens.greater.clone_ref(py),
|
||||
),
|
||||
Eq => (
|
||||
tokens.eq.clone_ref(py),
|
||||
tokens.eq.clone_ref(py),
|
||||
),
|
||||
Ampersand => (
|
||||
tokens.ampersand.clone_ref(py),
|
||||
tokens.ampersand.clone_ref(py),
|
||||
),
|
||||
Pipe => (
|
||||
tokens.pipe.clone_ref(py),
|
||||
tokens.pipe.clone_ref(py),
|
||||
),
|
||||
At => (
|
||||
tokens.at.clone_ref(py),
|
||||
tokens.at.clone_ref(py),
|
||||
),
|
||||
Argument => (
|
||||
tokens.argument.clone_ref(py),
|
||||
PyString::new(py, text),
|
||||
),
|
||||
DecimalConst => (
|
||||
tokens.nfconst.clone_ref(py),
|
||||
PyString::new(py, text),
|
||||
),
|
||||
FloatConst => (
|
||||
tokens.fconst.clone_ref(py),
|
||||
PyString::new(py, text),
|
||||
),
|
||||
IntConst => (
|
||||
tokens.iconst.clone_ref(py),
|
||||
PyString::new(py, text),
|
||||
),
|
||||
BigIntConst => (
|
||||
tokens.niconst.clone_ref(py),
|
||||
PyString::new(py, text),
|
||||
),
|
||||
BinStr => (
|
||||
tokens.bconst.clone_ref(py),
|
||||
PyString::new(py, text),
|
||||
),
|
||||
Str => (
|
||||
tokens.sconst.clone_ref(py),
|
||||
PyString::new(py, text),
|
||||
),
|
||||
BacktickName => (
|
||||
tokens.ident.clone_ref(py),
|
||||
PyString::new(py, text),
|
||||
),
|
||||
Ident | Keyword => match text {
|
||||
"named only" => (
|
||||
tokens.named_only.clone_ref(py),
|
||||
tokens.named_only_val.clone_ref(py),
|
||||
),
|
||||
"set annotation" => (
|
||||
tokens.set_annotation.clone_ref(py),
|
||||
tokens.set_annotation_val.clone_ref(py),
|
||||
),
|
||||
"set type" => (
|
||||
tokens.set_type.clone_ref(py),
|
||||
tokens.set_type_val.clone_ref(py),
|
||||
),
|
||||
"extension package" => {
|
||||
(
|
||||
tokens.extension_package.clone_ref(py),
|
||||
tokens.extension_package_val.clone_ref(py),
|
||||
)},
|
||||
"order by" => (
|
||||
tokens.order_by.clone_ref(py),
|
||||
tokens.order_by_val.clone_ref(py),
|
||||
),
|
||||
pub fn _unpickle_token(py: Python, bytes: &PyBytes) -> PyResult<OpaqueToken> {
|
||||
let token = rmp_serde::from_slice(bytes.data(py)).unwrap();
|
||||
OpaqueToken::create_instance(py, token)
|
||||
}
|
||||
|
||||
_ => {
|
||||
if text.len() > MAX_KEYWORD_LENGTH {
|
||||
(
|
||||
tokens.ident.clone_ref(py),
|
||||
PyString::new(py, text),
|
||||
)
|
||||
} else {
|
||||
cache.keyword_buf.clear();
|
||||
cache.keyword_buf.push_str(text);
|
||||
cache.keyword_buf.make_ascii_lowercase();
|
||||
pub fn get_fn_unpickle_token(py: Python) -> PyObject {
|
||||
let py_function = unsafe { FN_UNPICKLE_TOKEN.as_ref().expect("module initialized") };
|
||||
return py_function.clone_ref(py);
|
||||
}
|
||||
|
||||
let kind = match tokens.keywords.get(&cache.keyword_buf) {
|
||||
Some(keyword) => {
|
||||
debug_assert_eq!(keyword.kind, token.kind);
|
||||
|
||||
keyword.name.clone_ref(py)
|
||||
}
|
||||
None => {
|
||||
debug_assert_eq!(Kind::Ident, token.kind);
|
||||
tokens.ident.clone_ref(py)
|
||||
}
|
||||
};
|
||||
(kind, PyString::new(py, text))
|
||||
}
|
||||
},
|
||||
}
|
||||
Substitution => (
|
||||
tokens.substitution.clone_ref(py),
|
||||
PyString::new(py, text),
|
||||
),
|
||||
impl OpaqueToken {
|
||||
pub(super) fn inner(&self, py: Python) -> Token {
|
||||
self._inner(py).clone()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_unpickle_fn(py: Python) -> PyObject {
|
||||
let tokens = unsafe { TOKENS.as_ref().expect("module initialized") };
|
||||
return tokens.unpickle_token.clone_ref(py);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use edgeql_rust::normalize::{normalize, Variable};
|
||||
use edgeql_parser::tokenizer::{Value as Value};
|
||||
use edgeql_parser::tokenizer::Value;
|
||||
use num_bigint::BigInt;
|
||||
|
||||
|
||||
#[test]
|
||||
|
@ -80,10 +81,10 @@ fn test_bigint() {
|
|||
"SELECT(<__std__::bigint>$0)+(<__std__::bigint>$1)");
|
||||
assert_eq!(entry.variables, vec![vec![
|
||||
Variable {
|
||||
value: Value::BigInt(1.into()),
|
||||
value: Value::BigInt("1".into()),
|
||||
},
|
||||
Variable {
|
||||
value: Value::BigInt(23.into()),
|
||||
value: Value::BigInt(BigInt::from(23).to_str_radix(16)),
|
||||
}
|
||||
]]);
|
||||
}
|
||||
|
@ -97,10 +98,10 @@ fn test_bigint_exponent() {
|
|||
"SELECT(<__std__::bigint>$0)+(<__std__::bigint>$1)");
|
||||
assert_eq!(entry.variables, vec![vec![
|
||||
Variable {
|
||||
value: Value::BigInt(10000000000u64.into()),
|
||||
value: Value::BigInt(BigInt::from(10000000000u64).to_str_radix(16)),
|
||||
},
|
||||
Variable {
|
||||
value: Value::BigInt(230000000000000u64.into()),
|
||||
value: Value::BigInt(BigInt::from(230000000000000u64).to_str_radix(16)),
|
||||
}
|
||||
]]);
|
||||
}
|
||||
|
@ -203,6 +204,7 @@ fn test_script() {
|
|||
value: Value::Int(2),
|
||||
}
|
||||
],
|
||||
vec![]
|
||||
]);
|
||||
}
|
||||
|
||||
|
@ -227,5 +229,6 @@ fn test_script_with_args() {
|
|||
value: Value::Int(2),
|
||||
}
|
||||
],
|
||||
vec![]
|
||||
]);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use crate::position::Pos;
|
||||
use crate::position::{Pos, InflatedPos};
|
||||
use crate::tokenizer::{Kind, self};
|
||||
|
||||
/// Error of expression checking
|
||||
|
@ -79,10 +79,12 @@ pub fn check(text: &str) -> Result<(), Error> {
|
|||
}
|
||||
};
|
||||
let pos = token.span.start;
|
||||
let pos = InflatedPos::from_offset(text.as_bytes(), pos).unwrap().deflate();
|
||||
|
||||
empty = false;
|
||||
match token.kind {
|
||||
Comma | Semicolon if brackets.is_empty() => {
|
||||
return Err(UnexpectedToken(token.text.to_string(), pos));
|
||||
return Err(UnexpectedToken(token.text.into(), pos));
|
||||
}
|
||||
OpenParen | OpenBracket | OpenBrace => {
|
||||
brackets.push((token.kind, pos));
|
||||
|
|
|
@ -3,7 +3,7 @@ use std::fmt::{self, Write};
|
|||
use std::error::Error;
|
||||
use std::char;
|
||||
|
||||
use crate::tokenizer::is_keyword;
|
||||
use crate::keywords;
|
||||
|
||||
/// Error returned from `unquote_string` function
|
||||
///
|
||||
|
@ -23,7 +23,7 @@ pub struct UnquoteError(String);
|
|||
pub fn quote_name(s: &str) -> Cow<str> {
|
||||
if s.chars().all(|c| c.is_alphanumeric() || c == '_') {
|
||||
let lower = s.to_ascii_lowercase();
|
||||
if !is_keyword(&lower) {
|
||||
if keywords::lookup(&lower).is_none() {
|
||||
return s.into();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
pub const UNRESERVED_KEYWORDS: &[&str] = &[
|
||||
use phf::phf_set;
|
||||
|
||||
pub const UNRESERVED_KEYWORDS: phf::Set<&str> = phf_set!(
|
||||
"abort",
|
||||
"abstract",
|
||||
"access",
|
||||
|
@ -103,20 +105,11 @@ pub const UNRESERVED_KEYWORDS: &[&str] = &[
|
|||
"version",
|
||||
"view",
|
||||
"write",
|
||||
];
|
||||
);
|
||||
|
||||
pub const PARTIAL_RESERVED_KEYWORDS: phf::Set<&str> = phf_set!("except", "intersect", "union",);
|
||||
|
||||
pub const PARTIAL_RESERVED_KEYWORDS: &[&str] = &[
|
||||
// Keep in sync with `tokenizer::is_keyword`
|
||||
"except",
|
||||
"intersect",
|
||||
"union",
|
||||
// Keep in sync with `tokenizer::is_keyword`
|
||||
];
|
||||
|
||||
|
||||
pub const FUTURE_RESERVED_KEYWORDS: &[&str] = &[
|
||||
// Keep in sync with `tokenizer::is_keyword`
|
||||
pub const FUTURE_RESERVED_KEYWORDS: phf::Set<&str> = phf_set!(
|
||||
"anyarray",
|
||||
"begin",
|
||||
"case",
|
||||
|
@ -147,18 +140,15 @@ pub const FUTURE_RESERVED_KEYWORDS: &[&str] = &[
|
|||
"when",
|
||||
"window",
|
||||
"never",
|
||||
// Keep in sync with `tokenizer::is_keyword`
|
||||
];
|
||||
);
|
||||
|
||||
pub const CURRENT_RESERVED_KEYWORDS: &[&str] = &[
|
||||
// Keep in sync with `tokenizer::is_keyword`
|
||||
pub const CURRENT_RESERVED_KEYWORDS: phf::Set<&str> = phf_set!(
|
||||
"__source__",
|
||||
"__subject__",
|
||||
"__type__",
|
||||
"__std__",
|
||||
"__edgedbsys__",
|
||||
"__edgedbtpl__",
|
||||
"__std__",
|
||||
"__new__",
|
||||
"__old__",
|
||||
"__specified__",
|
||||
|
@ -207,5 +197,38 @@ pub const CURRENT_RESERVED_KEYWORDS: &[&str] = &[
|
|||
"update",
|
||||
"variadic",
|
||||
"with",
|
||||
// Keep in sync with `tokenizer::is_keyword`
|
||||
];
|
||||
);
|
||||
|
||||
pub const COMBINED_KEYWORDS: phf::Set<&str> = phf_set!(
|
||||
"named only",
|
||||
"set annotation",
|
||||
"set type",
|
||||
"extension package",
|
||||
"order by",
|
||||
);
|
||||
|
||||
pub fn lookup(s: &str) -> Option<Keyword> {
|
||||
None.or_else(|| PARTIAL_RESERVED_KEYWORDS.get_key(s))
|
||||
.or_else(|| FUTURE_RESERVED_KEYWORDS.get_key(s))
|
||||
.or_else(|| CURRENT_RESERVED_KEYWORDS.get_key(s))
|
||||
.map(|x| Keyword(x))
|
||||
}
|
||||
|
||||
pub fn lookup_all(s: &str) -> Option<Keyword> {
|
||||
lookup(s).or_else(|| {
|
||||
None.or_else(|| COMBINED_KEYWORDS.get_key(s))
|
||||
.or_else(|| UNRESERVED_KEYWORDS.get_key(s))
|
||||
.map(|x| Keyword(x))
|
||||
})
|
||||
}
|
||||
|
||||
/// This is required for serde deserializer for Token to work correctly.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
pub struct Keyword(pub &'static str);
|
||||
|
||||
impl From<Keyword> for &'static str {
|
||||
fn from(value: Keyword) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,8 +5,9 @@ pub mod helpers;
|
|||
#[cfg(feature = "python")]
|
||||
pub mod into_python;
|
||||
pub mod keywords;
|
||||
pub mod parser;
|
||||
pub mod position;
|
||||
pub mod preparser;
|
||||
pub mod schema_file;
|
||||
pub mod tokenizer;
|
||||
pub mod validation;
|
||||
pub mod validation;
|
590
edb/edgeql-parser/src/parser.rs
Normal file
590
edb/edgeql-parser/src/parser.rs
Normal file
|
@ -0,0 +1,590 @@
|
|||
use append_only_vec::AppendOnlyVec;
|
||||
use indexmap::IndexMap;
|
||||
|
||||
use crate::helpers::quote_name;
|
||||
use crate::keywords::Keyword;
|
||||
use crate::position::Span;
|
||||
use crate::tokenizer::{Error, Kind, Token, Value};
|
||||
|
||||
pub struct Context<'s> {
|
||||
spec: &'s Spec,
|
||||
arena: bumpalo::Bump,
|
||||
terminal_arena: AppendOnlyVec<Terminal>,
|
||||
}
|
||||
|
||||
impl<'s> Context<'s> {
|
||||
pub fn new(spec: &'s Spec) -> Self {
|
||||
Context {
|
||||
spec,
|
||||
arena: bumpalo::Bump::new(),
|
||||
terminal_arena: AppendOnlyVec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse<'a>(input: &'a [Terminal], ctx: &'a Context) -> (Option<&'a CSTNode<'a>>, Vec<Error>) {
|
||||
let stack_top = ctx.arena.alloc(StackNode {
|
||||
parent: None,
|
||||
state: 0,
|
||||
value: CSTNode::Empty,
|
||||
});
|
||||
let initial_track = Parser {
|
||||
stack_top,
|
||||
error_cost: 0,
|
||||
node_count: 0,
|
||||
can_recover: true,
|
||||
errors: Vec::new(),
|
||||
};
|
||||
|
||||
// append EIO
|
||||
let end = input.last().map(|t| t.span.end).unwrap_or_default();
|
||||
let eoi = ctx.alloc_terminal(Terminal {
|
||||
kind: Kind::EOI,
|
||||
span: Span { start: end, end },
|
||||
text: "".to_string(),
|
||||
value: None,
|
||||
});
|
||||
let input = input.iter().chain(Some(eoi));
|
||||
|
||||
let mut parsers = vec![initial_track];
|
||||
let mut prev_span: Option<Span> = None;
|
||||
let mut new_parsers = Vec::with_capacity(parsers.len() + 5);
|
||||
|
||||
for token in input {
|
||||
while let Some(mut parser) = parsers.pop() {
|
||||
let res = parser.act(ctx, token);
|
||||
|
||||
if res.is_ok() {
|
||||
// base case: ok
|
||||
parser.node_successful();
|
||||
new_parsers.push(parser);
|
||||
} else {
|
||||
// error: try to recover
|
||||
|
||||
let gap_span = {
|
||||
let prev_end = prev_span.map(|p| p.end).unwrap_or(token.span.start);
|
||||
|
||||
Span {
|
||||
start: prev_end,
|
||||
end: token.span.start,
|
||||
}
|
||||
};
|
||||
|
||||
// option 1: inject a token
|
||||
let possible_actions = &ctx.spec.actions[parser.stack_top.state];
|
||||
for token_kind in possible_actions.keys() {
|
||||
let mut inject = parser.clone();
|
||||
|
||||
let injection = new_token_for_injection(token_kind, ctx);
|
||||
|
||||
let cost = error_cost(token_kind);
|
||||
let error = Error::new(format!("Missing {injection}")).with_span(gap_span);
|
||||
inject.push_error(error, cost);
|
||||
|
||||
if inject.error_cost <= ERROR_COST_INJECT_MAX {
|
||||
// println!(" --> [inject {injection}]");
|
||||
|
||||
if inject.act(ctx, injection).is_ok() {
|
||||
// insert into parsers, to retry the original token
|
||||
parsers.push(inject);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// option 2: skip the token
|
||||
|
||||
let mut skip = parser;
|
||||
let error = Error::new(format!("Unexpected {token}")).with_span(token.span);
|
||||
skip.push_error(error, ERROR_COST_SKIP);
|
||||
if token.kind == Kind::EOF {
|
||||
// extra penalty
|
||||
skip.error_cost += ERROR_COST_INJECT_MAX;
|
||||
skip.can_recover = false;
|
||||
};
|
||||
|
||||
// println!(" --> [skip]");
|
||||
|
||||
// insert into new_parsers, so the token is skipped
|
||||
new_parsers.push(skip);
|
||||
}
|
||||
}
|
||||
|
||||
// has any parser recovered?
|
||||
if new_parsers.len() > 1 {
|
||||
let recovered = new_parsers.iter().position(Parser::has_recovered);
|
||||
|
||||
if let Some(recovered) = recovered {
|
||||
let mut recovered = new_parsers.swap_remove(recovered);
|
||||
recovered.error_cost = 0;
|
||||
|
||||
new_parsers.clear();
|
||||
new_parsers.push(recovered);
|
||||
}
|
||||
}
|
||||
|
||||
// prune: pick only X best parsers
|
||||
if new_parsers.len() > PARSER_COUNT_MAX {
|
||||
new_parsers.sort_by_key(Parser::adjusted_cost);
|
||||
new_parsers.drain(PARSER_COUNT_MAX..);
|
||||
}
|
||||
|
||||
assert!(parsers.is_empty());
|
||||
std::mem::swap(&mut parsers, &mut new_parsers);
|
||||
prev_span = Some(token.span);
|
||||
}
|
||||
|
||||
// there will always be a parser left,
|
||||
// since we always allow a token to be skipped
|
||||
let mut parser = parsers.into_iter().min_by_key(|p| p.error_cost).unwrap();
|
||||
parser.finish();
|
||||
|
||||
let node = if parser.can_recover {
|
||||
Some(&parser.stack_top.value)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
(node, parser.errors)
|
||||
}
|
||||
|
||||
impl<'s> Context<'s> {
|
||||
fn alloc_terminal(&self, t: Terminal) -> &'_ Terminal {
|
||||
let idx = self.terminal_arena.push(t);
|
||||
&self.terminal_arena[idx]
|
||||
}
|
||||
}
|
||||
|
||||
fn new_token_for_injection<'a>(kind: &Kind, ctx: &'a Context) -> &'a Terminal {
|
||||
ctx.alloc_terminal(Terminal {
|
||||
kind: kind.clone(),
|
||||
text: kind.text().unwrap_or_default().to_string(),
|
||||
value: match kind {
|
||||
Kind::Keyword(Keyword(kw)) => Some(Value::String(kw.to_string())),
|
||||
Kind::Ident => Some(Value::String("my_name".to_string())),
|
||||
_ => None,
|
||||
},
|
||||
span: Span::default(),
|
||||
})
|
||||
}
|
||||
|
||||
pub struct Spec {
|
||||
pub actions: Vec<IndexMap<Kind, Action>>,
|
||||
pub goto: Vec<IndexMap<String, usize>>,
|
||||
pub start: String,
|
||||
pub inlines: IndexMap<usize, u8>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
#[cfg_attr(feature = "serde", serde(untagged))]
|
||||
pub enum Action {
|
||||
Shift(usize),
|
||||
Reduce(Reduce),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
pub struct Reduce {
|
||||
/// Index of the production in the associated production array
|
||||
pub production_id: usize,
|
||||
|
||||
pub non_term: String,
|
||||
|
||||
/// Number of arguments
|
||||
pub cnt: usize,
|
||||
}
|
||||
|
||||
/// A node of the CST tree.
|
||||
///
|
||||
/// Warning: allocated in the bumpalo arena, which does not Drop.
|
||||
/// Any types that do allocation with global allocator (such as String or Vec),
|
||||
/// must manually drop. This is why Terminal has a special vec arena that does
|
||||
/// Drop.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum CSTNode<'a> {
|
||||
Empty,
|
||||
Terminal(&'a Terminal),
|
||||
Production(Production<'a>),
|
||||
}
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Terminal {
|
||||
pub kind: Kind,
|
||||
pub text: String,
|
||||
pub value: Option<Value>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Production<'a> {
|
||||
pub id: usize,
|
||||
pub args: &'a [CSTNode<'a>],
|
||||
}
|
||||
|
||||
struct StackNode<'p> {
|
||||
parent: Option<&'p StackNode<'p>>,
|
||||
|
||||
state: usize,
|
||||
value: CSTNode<'p>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Parser<'s> {
|
||||
stack_top: &'s StackNode<'s>,
|
||||
|
||||
/// sum of cost of every error recovery action
|
||||
error_cost: u16,
|
||||
|
||||
/// number of nodes pushed to stack since last error
|
||||
node_count: u16,
|
||||
|
||||
can_recover: bool,
|
||||
|
||||
errors: Vec<Error>,
|
||||
}
|
||||
|
||||
impl<'s> Parser<'s> {
|
||||
fn act(&mut self, ctx: &'s Context, token: &'s Terminal) -> Result<(), ()> {
|
||||
// self.print_stack();
|
||||
// println!("INPUT: {}", token.text);
|
||||
|
||||
loop {
|
||||
// find next action
|
||||
let Some(action) = ctx.spec.actions[self.stack_top.state].get(&token.kind) else {
|
||||
return Err(());
|
||||
};
|
||||
|
||||
match action {
|
||||
Action::Shift(next) => {
|
||||
// println!(" --> [shift {next}]");
|
||||
|
||||
// push on stack
|
||||
self.push_on_stack(ctx, *next, CSTNode::Terminal(token));
|
||||
return Ok(());
|
||||
}
|
||||
Action::Reduce(reduce) => {
|
||||
self.reduce(ctx, reduce);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn reduce(&mut self, ctx: &'s Context, reduce: &'s Reduce) {
|
||||
let args = ctx.arena.alloc_slice_fill_with(reduce.cnt, |_| {
|
||||
let v = self.stack_top.value;
|
||||
self.stack_top = self.stack_top.parent.unwrap();
|
||||
v
|
||||
});
|
||||
args.reverse();
|
||||
|
||||
let value = CSTNode::Production(Production {
|
||||
id: reduce.production_id,
|
||||
args,
|
||||
});
|
||||
|
||||
let nstate = self.stack_top.state;
|
||||
|
||||
let next = *ctx.spec.goto[nstate].get(&reduce.non_term).unwrap();
|
||||
|
||||
// inline (if there is an inlining rule)
|
||||
let mut value = value;
|
||||
if let CSTNode::Production(production) = value {
|
||||
if let Some(inline_position) = ctx.spec.inlines.get(&production.id) {
|
||||
// inline rule found
|
||||
let args = production.args;
|
||||
let span = get_span_of_nodes(&args);
|
||||
|
||||
value = args[*inline_position as usize];
|
||||
|
||||
extend_span(&mut value, span, ctx);
|
||||
} else {
|
||||
// place back
|
||||
value = CSTNode::Production(production);
|
||||
}
|
||||
}
|
||||
|
||||
self.push_on_stack(ctx, next, value);
|
||||
|
||||
// println!(
|
||||
// " --> [reduce {} ::= ({} popped) at {}/{}]",
|
||||
// production, cnt, state, nstate
|
||||
// );
|
||||
// self.print_stack();
|
||||
}
|
||||
|
||||
pub fn push_on_stack(&mut self, ctx: &'s Context, state: usize, value: CSTNode<'s>) {
|
||||
let node = StackNode {
|
||||
parent: Some(self.stack_top),
|
||||
state,
|
||||
value,
|
||||
};
|
||||
self.stack_top = ctx.arena.alloc(node);
|
||||
}
|
||||
|
||||
pub fn finish(&mut self) {
|
||||
debug_assert!(matches!(
|
||||
&self.stack_top.value,
|
||||
CSTNode::Terminal(Terminal {
|
||||
kind: Kind::EOI,
|
||||
..
|
||||
})
|
||||
));
|
||||
self.stack_top = self.stack_top.parent.unwrap();
|
||||
|
||||
// self.print_stack();
|
||||
// println!(" --> accept");
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
let first = self.stack_top.parent.unwrap();
|
||||
assert!(
|
||||
matches!(&first.value, CSTNode::Empty),
|
||||
"expected 'Empty' found {:?}",
|
||||
first.value
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(never)]
|
||||
fn print_stack(&self) {
|
||||
let prefix = "STACK: ";
|
||||
|
||||
let mut stack = Vec::new();
|
||||
let mut node = Some(self.stack_top);
|
||||
while let Some(n) = node {
|
||||
stack.push(n);
|
||||
node = n.parent.clone();
|
||||
}
|
||||
stack.reverse();
|
||||
|
||||
let names = stack
|
||||
.iter()
|
||||
.map(|s| format!("{:?}", s.value))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut states = format!("{:6}", ' ');
|
||||
for (index, node) in stack.iter().enumerate() {
|
||||
let name_width = names[index].chars().count();
|
||||
states += &format!(" {:<width$}", node.state, width = name_width);
|
||||
}
|
||||
|
||||
println!("{}{}", prefix, names.join(" "));
|
||||
println!("{}", states);
|
||||
}
|
||||
|
||||
fn push_error(&mut self, error: Error, cost: u16) {
|
||||
self.errors.push(error);
|
||||
self.error_cost += cost;
|
||||
self.node_count = 0;
|
||||
}
|
||||
|
||||
fn node_successful(&mut self) {
|
||||
self.node_count += 1;
|
||||
}
|
||||
|
||||
/// Error cost, subtracted by a function of successfully parsed nodes.
|
||||
fn adjusted_cost(&self) -> u16 {
|
||||
let x = self.node_count.saturating_sub(3);
|
||||
self.error_cost.saturating_sub(x * x)
|
||||
}
|
||||
|
||||
fn has_recovered(&self) -> bool {
|
||||
self.can_recover && self.adjusted_cost() == 0
|
||||
}
|
||||
}
|
||||
|
||||
fn get_span_of_nodes(args: &[CSTNode]) -> Option<Span> {
|
||||
let start = args.iter().find_map(|x| match x {
|
||||
CSTNode::Terminal(t) => Some(t.span.start),
|
||||
_ => None,
|
||||
})?;
|
||||
let end = args.iter().rev().find_map(|x| match x {
|
||||
CSTNode::Terminal(t) => Some(t.span.end),
|
||||
_ => None,
|
||||
})?;
|
||||
Some(Span { start, end })
|
||||
}
|
||||
|
||||
fn extend_span<'a>(value: &mut CSTNode<'a>, span: Option<Span>, ctx: &'a Context) {
|
||||
let Some(span) = span else {
|
||||
return;
|
||||
};
|
||||
|
||||
let CSTNode::Terminal(terminal) = value else {
|
||||
return
|
||||
};
|
||||
|
||||
let mut new_term = terminal.clone();
|
||||
|
||||
if span.start < new_term.span.start {
|
||||
new_term.span.start = span.start;
|
||||
}
|
||||
if span.end > new_term.span.end {
|
||||
new_term.span.end = span.end;
|
||||
}
|
||||
*terminal = ctx.alloc_terminal(new_term);
|
||||
}
|
||||
|
||||
const PARSER_COUNT_MAX: usize = 10;
|
||||
|
||||
const ERROR_COST_INJECT_MAX: u16 = 15;
|
||||
const ERROR_COST_SKIP: u16 = 3;
|
||||
|
||||
fn error_cost(kind: &Kind) -> u16 {
|
||||
use Kind::*;
|
||||
|
||||
match kind {
|
||||
Ident => 9,
|
||||
Substitution => 8,
|
||||
Keyword(_) => 10,
|
||||
|
||||
Dot => 5,
|
||||
OpenBrace | OpenBracket | OpenParen => 5,
|
||||
|
||||
CloseBrace | CloseBracket | CloseParen => 1,
|
||||
|
||||
Namespace => 10,
|
||||
Semicolon | Comma | Colon => 2,
|
||||
Eq => 5,
|
||||
|
||||
At => 6,
|
||||
IntConst => 8,
|
||||
|
||||
Assign | Arrow => 5,
|
||||
|
||||
_ => 100, // forbidden
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Terminal {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if self.text.is_empty() {
|
||||
return write!(f, "{}", self.kind.user_friendly_text().unwrap_or_default());
|
||||
}
|
||||
|
||||
match self.kind {
|
||||
Kind::Ident => write!(f, "'{}'", "e_name(&self.text)),
|
||||
Kind::Keyword(Keyword(kw)) => write!(f, "keyword '{}'", kw.to_ascii_uppercase()),
|
||||
_ => write!(f, "'{}'", self.text),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Default for CSTNode<'a> {
|
||||
fn default() -> Self {
|
||||
CSTNode::Empty
|
||||
}
|
||||
}
|
||||
|
||||
impl Terminal {
|
||||
pub fn from_token(token: Token) -> Self {
|
||||
Terminal {
|
||||
kind: token.kind,
|
||||
text: token.text.into(),
|
||||
value: token.value,
|
||||
span: token.span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
impl Spec {
|
||||
pub fn from_json(j_spec: &str) -> Result<Spec, String> {
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
||||
struct SpecJson {
|
||||
pub actions: Vec<Vec<(String, Action)>>,
|
||||
pub goto: Vec<Vec<(String, usize)>>,
|
||||
pub start: String,
|
||||
pub inlines: Vec<(usize, u8)>,
|
||||
}
|
||||
|
||||
let v = serde_json::from_str::<SpecJson>(j_spec).map_err(|e| e.to_string())?;
|
||||
|
||||
let actions = v
|
||||
.actions
|
||||
.into_iter()
|
||||
.map(|x| x.into_iter().map(|(k, a)| (get_token_kind(&k), a)))
|
||||
.map(IndexMap::from_iter)
|
||||
.collect();
|
||||
let goto = v.goto.into_iter().map(IndexMap::from_iter).collect();
|
||||
let inlines = IndexMap::from_iter(v.inlines);
|
||||
Ok(Spec {
|
||||
actions,
|
||||
goto,
|
||||
start: v.start,
|
||||
inlines,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
fn get_token_kind(token_name: &str) -> Kind {
|
||||
use Kind::*;
|
||||
|
||||
match token_name {
|
||||
"+" => Add,
|
||||
"&" => Ampersand,
|
||||
"@" => At,
|
||||
".<" => BackwardLink,
|
||||
"}" => CloseBrace,
|
||||
"]" => CloseBracket,
|
||||
")" => CloseParen,
|
||||
"??" => Coalesce,
|
||||
":" => Colon,
|
||||
"," => Comma,
|
||||
"++" => Concat,
|
||||
"/" => Div,
|
||||
"." => Dot,
|
||||
"**" => DoubleSplat,
|
||||
"=" => Eq,
|
||||
"//" => FloorDiv,
|
||||
"%" => Modulo,
|
||||
"*" => Mul,
|
||||
"::" => Namespace,
|
||||
"{" => OpenBrace,
|
||||
"[" => OpenBracket,
|
||||
"(" => OpenParen,
|
||||
"|" => Pipe,
|
||||
"^" => Pow,
|
||||
";" => Semicolon,
|
||||
"-" => Sub,
|
||||
|
||||
"?!=" => DistinctFrom,
|
||||
">=" => GreaterEq,
|
||||
"<=" => LessEq,
|
||||
"?=" => NotDistinctFrom,
|
||||
"!=" => NotEq,
|
||||
"<" => Less,
|
||||
">" => Greater,
|
||||
|
||||
"IDENT" => Ident,
|
||||
"EOF" => EOF,
|
||||
"<$>" => EOI,
|
||||
"<e>" => Epsilon,
|
||||
|
||||
"BCONST" => BinStr,
|
||||
"FCONST" => FloatConst,
|
||||
"ICONST" => IntConst,
|
||||
"NFCONST" => DecimalConst,
|
||||
"NICONST" => BigIntConst,
|
||||
"SCONST" => Str,
|
||||
|
||||
"+=" => AddAssign,
|
||||
"->" => Arrow,
|
||||
":=" => Assign,
|
||||
"-=" => SubAssign,
|
||||
|
||||
"ARGUMENT" => Argument,
|
||||
"SUBSTITUTION" => Substitution,
|
||||
|
||||
_ => {
|
||||
let mut token_name = token_name.to_lowercase();
|
||||
|
||||
if let Some(rem) = token_name.strip_prefix("dunder") {
|
||||
token_name = format!("__{rem}__");
|
||||
}
|
||||
|
||||
let kw = crate::keywords::lookup_all(&token_name)
|
||||
.unwrap_or_else(|| panic!("unknown keyword {token_name}"));
|
||||
Keyword(kw)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -4,13 +4,24 @@ use std::str::{from_utf8, Utf8Error};
|
|||
use unicode_width::UnicodeWidthStr;
|
||||
|
||||
/// Span of an element in source code
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
pub struct Span {
|
||||
pub start: Pos,
|
||||
pub end: Pos,
|
||||
/// Byte offset in the original file
|
||||
///
|
||||
/// Technically you can read > 4Gb file on 32bit machine so it may
|
||||
/// not fit in usize
|
||||
pub start: u64,
|
||||
|
||||
/// Byte offset in the original file
|
||||
///
|
||||
/// Technically you can read > 4Gb file on 32bit machine so it may
|
||||
/// not fit in usize
|
||||
pub end: u64,
|
||||
}
|
||||
/// Original position of element in source code
|
||||
#[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Copy, Default, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
pub struct Pos {
|
||||
/// One-based line number
|
||||
pub line: usize,
|
||||
|
@ -88,6 +99,11 @@ fn new_lines_in_fragment(data: &[u8]) -> u64 {
|
|||
|
||||
impl InflatedPos {
|
||||
|
||||
pub fn from_offset(data: &[u8], offset: u64) -> Result<InflatedPos, InflatingError> {
|
||||
let res = Self::from_offsets(data, &[offset as usize])?;
|
||||
Ok(res.into_iter().next().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_offsets(data: &[u8], offsets: &[usize])
|
||||
-> Result<Vec<InflatedPos>, InflatingError>
|
||||
{
|
||||
|
@ -125,6 +141,14 @@ impl InflatedPos {
|
|||
}
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
pub fn deflate(self) -> Pos {
|
||||
Pos {
|
||||
line: self.line as usize + 1,
|
||||
column: self.column as usize + 1,
|
||||
offset: self.offset,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -4,11 +4,13 @@ use bigdecimal::num_bigint::ToBigInt;
|
|||
use bigdecimal::BigDecimal;
|
||||
|
||||
use crate::helpers::{unquote_bytes, unquote_string};
|
||||
use crate::position::Pos;
|
||||
use crate::keywords::Keyword;
|
||||
use crate::position::{Pos, Span};
|
||||
use crate::tokenizer::{Error, Kind, Token, Tokenizer, Value, MAX_KEYWORD_LENGTH};
|
||||
|
||||
/// Applies additional validation to the tokens.
|
||||
/// Combines multi-word keywords into single tokens.
|
||||
/// Remaps a few token kinds.
|
||||
pub struct Validator<'a> {
|
||||
pub inner: Tokenizer<'a>,
|
||||
|
||||
|
@ -30,12 +32,14 @@ impl<'a> Iterator for Validator<'a> {
|
|||
Err(e) => return Some(Err(Error::new(e).with_span(token.span))),
|
||||
};
|
||||
|
||||
if let Some(text) = self.combine_multi_word_keywords(&token) {
|
||||
token.kind = Kind::Keyword;
|
||||
token.text = text.into();
|
||||
if let Some(keyword) = self.combine_multi_word_keywords(&token) {
|
||||
token.text = keyword.into();
|
||||
token.kind = Kind::Keyword(Keyword(keyword));
|
||||
self.peeked = None;
|
||||
}
|
||||
|
||||
token.kind = remap_kind(token.kind);
|
||||
|
||||
Some(Ok(token))
|
||||
}
|
||||
}
|
||||
|
@ -49,6 +53,13 @@ impl<'a> Validator<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn with_eof(self) -> WithEof<'a> {
|
||||
WithEof {
|
||||
inner: self,
|
||||
emitted: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Mimics behavior of [std::iter::Peekable]. We could use that, but it
|
||||
/// hides access to underlying iterator.
|
||||
fn next_inner(&mut self) -> Option<Result<Token<'a>, Error>> {
|
||||
|
@ -61,7 +72,7 @@ impl<'a> Validator<'a> {
|
|||
|
||||
/// Mimics behavior of [std::iter::Peekable]. We could use that, but it
|
||||
/// hides access to underlying iterator.
|
||||
fn peek(&mut self) -> &Option<Result<Token<'a>, Error>> {
|
||||
fn peek(&mut self) -> &Option<Result<Token, Error>> {
|
||||
if self.peeked.is_none() {
|
||||
self.peeked = Some(self.inner.next());
|
||||
}
|
||||
|
@ -73,8 +84,8 @@ impl<'a> Validator<'a> {
|
|||
self.inner.current_pos()
|
||||
}
|
||||
|
||||
fn combine_multi_word_keywords(&mut self, token: &Token) -> Option<&'static str> {
|
||||
if !matches!(token.kind, Kind::Ident | Kind::Keyword) {
|
||||
fn combine_multi_word_keywords(&mut self, token: &Token<'a>) -> Option<&'static str> {
|
||||
if !matches!(token.kind, Kind::Ident | Kind::Keyword(_)) {
|
||||
return None;
|
||||
}
|
||||
let text = &token.text;
|
||||
|
@ -115,19 +126,19 @@ impl<'a> Validator<'a> {
|
|||
return None;
|
||||
}
|
||||
|
||||
fn peek_keyword(&mut self, kw: &str) -> bool {
|
||||
fn peek_keyword(&mut self, kw: &'static str) -> bool {
|
||||
self.peek()
|
||||
.as_ref()
|
||||
.and_then(|res| res.as_ref().ok())
|
||||
.map(|t| {
|
||||
(t.kind == Kind::Ident || t.kind == Kind::Keyword)
|
||||
&& t.text.eq_ignore_ascii_case(kw)
|
||||
t.kind == Kind::Keyword(Keyword(kw))
|
||||
|| (t.kind == Kind::Ident && t.text.eq_ignore_ascii_case(kw))
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_value(token: &Token<'_>) -> Result<Option<Value>, String> {
|
||||
pub fn parse_value(token: &Token) -> Result<Option<Value>, String> {
|
||||
use Kind::*;
|
||||
let text = &token.text;
|
||||
let string_value = match token.kind {
|
||||
|
@ -175,33 +186,69 @@ pub fn parse_value(token: &Token<'_>) -> Result<Option<Value>, String> {
|
|||
// Python has no problem of representing such a positive
|
||||
// value, though.
|
||||
return u64::from_str(&text.replace("_", ""))
|
||||
.map(|x| Value::Int(x as i64))
|
||||
.map(Some)
|
||||
.map(|x| Some(Value::Int(x as i64)))
|
||||
.map_err(|e| format!("error reading int: {}", e));
|
||||
}
|
||||
BigIntConst => {
|
||||
let dec = text[..text.len() - 1]
|
||||
return text[..text.len() - 1]
|
||||
.replace("_", "")
|
||||
.parse::<BigDecimal>()
|
||||
.map_err(|e| format!("error reading bigint: {}", e))?;
|
||||
// this conversion to decimal and back to string
|
||||
// fixes thing like `1e2n` which we support for bigints
|
||||
return Ok(Some(Value::BigInt(
|
||||
dec.to_bigint()
|
||||
.ok_or_else(|| "number is not integer".to_string())?,
|
||||
)));
|
||||
.map_err(|e| format!("error reading bigint: {}", e))
|
||||
// this conversion to decimal and back to string
|
||||
// fixes thing like `1e2n` which we support for bigints
|
||||
.and_then(|x| {
|
||||
x.to_bigint()
|
||||
.ok_or_else(|| "number is not integer".to_string())
|
||||
})
|
||||
.map(|x| Some(Value::BigInt(x.to_str_radix(16))));
|
||||
}
|
||||
BinStr => {
|
||||
return unquote_bytes(text).map(Value::Bytes).map(Some);
|
||||
}
|
||||
|
||||
Str => unquote_string(text)
|
||||
.map_err(|s| s.to_string())?
|
||||
.to_string(),
|
||||
Str => unquote_string(text).map_err(|s| s.to_string())?.to_string(),
|
||||
BacktickName => text[1..text.len() - 1].replace("``", "`"),
|
||||
Ident | Keyword => text.to_string(),
|
||||
Ident | Keyword(_) => text.to_string(),
|
||||
Substitution => text[2..text.len() - 1].to_string(),
|
||||
_ => return Ok(None),
|
||||
};
|
||||
Ok(Some(Value::String(string_value)))
|
||||
}
|
||||
|
||||
fn remap_kind(kind: Kind) -> Kind {
|
||||
match kind {
|
||||
Kind::BacktickName => Kind::Ident,
|
||||
kind => kind,
|
||||
}
|
||||
}
|
||||
|
||||
pub struct WithEof<'a> {
|
||||
inner: Validator<'a>,
|
||||
|
||||
emitted: bool,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for WithEof<'a> {
|
||||
type Item = Result<Token<'a>, Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(next) = self.inner.next() {
|
||||
Some(next)
|
||||
} else if !self.emitted {
|
||||
self.emitted = true;
|
||||
let pos = self.inner.current_pos().offset;
|
||||
|
||||
Some(Ok(Token {
|
||||
kind: Kind::EOF,
|
||||
text: "".into(),
|
||||
value: None,
|
||||
span: Span {
|
||||
start: pos,
|
||||
end: pos,
|
||||
},
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,6 +39,10 @@ fn tok_err(s: &str) -> String {
|
|||
panic!("No error, where error expected");
|
||||
}
|
||||
|
||||
fn keyword(kw: &'static str) -> Kind {
|
||||
Keyword(edgeql_parser::keywords::Keyword(kw))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn whitespace_and_comments() {
|
||||
assert_eq!(tok_str("# hello { world }"), &[] as &[&str]);
|
||||
|
@ -64,9 +68,9 @@ fn idents() {
|
|||
#[test]
|
||||
fn keywords() {
|
||||
assert_eq!(tok_str("SELECT a"), ["SELECT", "a"]);
|
||||
assert_eq!(tok_typ("SELECT a"), [Keyword, Ident]);
|
||||
assert_eq!(tok_typ("SELECT a"), [keyword("select"), Ident]);
|
||||
assert_eq!(tok_str("with Select"), ["with", "Select"]);
|
||||
assert_eq!(tok_typ("with Select"), [Keyword, Keyword]);
|
||||
assert_eq!(tok_typ("with Select"), [keyword("with"), keyword("select")]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -375,92 +379,98 @@ fn decimal() {
|
|||
|
||||
#[test]
|
||||
fn numbers_from_py() {
|
||||
assert_eq!(tok_str("SELECT 3.5432;"), ["SELECT", "3.5432", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 3.5432;"), [Keyword, FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT +3.5432;"), ["SELECT", "+", "3.5432", ";"]);
|
||||
assert_eq!(tok_str("SELECT 3.5432;"),
|
||||
["SELECT", "3.5432", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 3.5432;"),
|
||||
[keyword("select"), FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT +3.5432;"),
|
||||
["SELECT", "+", "3.5432", ";"]);
|
||||
assert_eq!(tok_typ("SELECT +3.5432;"),
|
||||
[Keyword, Add, FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT -3.5432;"), ["SELECT", "-", "3.5432", ";"]);
|
||||
[keyword("select"), Add, FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT -3.5432;"),
|
||||
["SELECT", "-", "3.5432", ";"]);
|
||||
assert_eq!(tok_typ("SELECT -3.5432;"),
|
||||
[Keyword, Sub, FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 354.32;"), ["SELECT", "354.32", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 354.32;"), [Keyword, FloatConst, Semicolon]);
|
||||
[keyword("select"), Sub, FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 354.32;"),
|
||||
["SELECT", "354.32", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 354.32;"),
|
||||
[keyword("select"), FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 35400000000000.32;"),
|
||||
["SELECT", "35400000000000.32", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 35400000000000.32;"),
|
||||
[Keyword, FloatConst, Semicolon]);
|
||||
[keyword("select"), FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 35400000000000000000.32;"),
|
||||
["SELECT", "35400000000000000000.32", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 35400000000000000000.32;"),
|
||||
[Keyword, FloatConst, Semicolon]);
|
||||
[keyword("select"), FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 3.5432e20;"),
|
||||
["SELECT", "3.5432e20", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 3.5432e20;"),
|
||||
[Keyword, FloatConst, Semicolon]);
|
||||
[keyword("select"), FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 3.5432e+20;"),
|
||||
["SELECT", "3.5432e+20", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 3.5432e+20;"),
|
||||
[Keyword, FloatConst, Semicolon]);
|
||||
[keyword("select"), FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 3.5432e-20;"),
|
||||
["SELECT", "3.5432e-20", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 3.5432e-20;"),
|
||||
[Keyword, FloatConst, Semicolon]);
|
||||
[keyword("select"), FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 354.32e-20;"),
|
||||
["SELECT", "354.32e-20", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 354.32e-20;"),
|
||||
[Keyword, FloatConst, Semicolon]);
|
||||
[keyword("select"), FloatConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT -0n;"),
|
||||
["SELECT", "-", "0n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT -0n;"),
|
||||
[Keyword, Sub, BigIntConst, Semicolon]);
|
||||
[keyword("select"), Sub, BigIntConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 0n;"),
|
||||
["SELECT", "0n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 0n;"),
|
||||
[Keyword, BigIntConst, Semicolon]);
|
||||
[keyword("select"), BigIntConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 1n;"),
|
||||
["SELECT", "1n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 1n;"),
|
||||
[Keyword, BigIntConst, Semicolon]);
|
||||
[keyword("select"), BigIntConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT -1n;"),
|
||||
["SELECT", "-", "1n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT -1n;"),
|
||||
[Keyword, Sub, BigIntConst, Semicolon]);
|
||||
[keyword("select"), Sub, BigIntConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 100000n;"),
|
||||
["SELECT", "100000n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 100000n;"),
|
||||
[Keyword, BigIntConst, Semicolon]);
|
||||
[keyword("select"), BigIntConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT -100000n;"),
|
||||
["SELECT", "-", "100000n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT -100000n;"),
|
||||
[Keyword, Sub, BigIntConst, Semicolon]);
|
||||
[keyword("select"), Sub, BigIntConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT -354.32n;"),
|
||||
["SELECT", "-", "354.32n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT -354.32n;"),
|
||||
[Keyword, Sub, DecimalConst, Semicolon]);
|
||||
[keyword("select"), Sub, DecimalConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 35400000000000.32n;"),
|
||||
["SELECT", "35400000000000.32n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 35400000000000.32n;"),
|
||||
[Keyword, DecimalConst, Semicolon]);
|
||||
[keyword("select"), DecimalConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT -35400000000000000000.32n;"),
|
||||
["SELECT", "-", "35400000000000000000.32n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT -35400000000000000000.32n;"),
|
||||
[Keyword, Sub, DecimalConst, Semicolon]);
|
||||
[keyword("select"), Sub, DecimalConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 3.5432e20n;"),
|
||||
["SELECT", "3.5432e20n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 3.5432e20n;"),
|
||||
[Keyword, DecimalConst, Semicolon]);
|
||||
[keyword("select"), DecimalConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT -3.5432e+20n;"),
|
||||
["SELECT", "-", "3.5432e+20n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT -3.5432e+20n;"),
|
||||
[Keyword, Sub, DecimalConst, Semicolon]);
|
||||
[keyword("select"), Sub, DecimalConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 3.5432e-20n;"),
|
||||
["SELECT", "3.5432e-20n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 3.5432e-20n;"),
|
||||
[Keyword, DecimalConst, Semicolon]);
|
||||
[keyword("select"), DecimalConst, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT 354.32e-20n;"),
|
||||
["SELECT", "354.32e-20n", ";"]);
|
||||
assert_eq!(tok_typ("SELECT 354.32e-20n;"),
|
||||
[Keyword, DecimalConst, Semicolon]);
|
||||
[keyword("select"), DecimalConst, Semicolon]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -598,7 +608,7 @@ fn strings() {
|
|||
assert_eq!(tok_str(r#" rb'hello' "#), [r#"rb'hello'"#]);
|
||||
assert_eq!(tok_typ(r#" rb'hello' "#), [BinStr]);
|
||||
assert_eq!(tok_str(r#" `hello` "#), [r#"`hello`"#]);
|
||||
assert_eq!(tok_typ(r#" `hello` "#), [BacktickName]);
|
||||
assert_eq!(tok_typ(r#" `hello` "#), [Ident]);
|
||||
|
||||
assert_eq!(tok_str(r#" "hello""#), [r#""hello""#]);
|
||||
assert_eq!(tok_typ(r#" "hello""#), [Str]);
|
||||
|
@ -617,7 +627,7 @@ fn strings() {
|
|||
assert_eq!(tok_str(r#" rb'hello'"#), [r#"rb'hello'"#]);
|
||||
assert_eq!(tok_typ(r#" rb'hello'"#), [BinStr]);
|
||||
assert_eq!(tok_str(r#" `hello`"#), [r#"`hello`"#]);
|
||||
assert_eq!(tok_typ(r#" `hello`"#), [BacktickName]);
|
||||
assert_eq!(tok_typ(r#" `hello`"#), [Ident]);
|
||||
|
||||
assert_eq!(tok_str(r#" "h\"ello" "#), [r#""h\"ello""#]);
|
||||
assert_eq!(tok_typ(r#" "h\"ello" "#), [Str]);
|
||||
|
@ -636,9 +646,9 @@ fn strings() {
|
|||
assert_eq!(tok_str(r#" rb'hello\' "#), [r#"rb'hello\'"#]);
|
||||
assert_eq!(tok_typ(r#" rb'hello\' "#), [BinStr]);
|
||||
assert_eq!(tok_str(r#" `hello\` "#), [r#"`hello\`"#]);
|
||||
assert_eq!(tok_typ(r#" `hello\` "#), [BacktickName]);
|
||||
assert_eq!(tok_typ(r#" `hello\` "#), [Ident]);
|
||||
assert_eq!(tok_str(r#" `hel``lo` "#), [r#"`hel``lo`"#]);
|
||||
assert_eq!(tok_typ(r#" `hel``lo` "#), [BacktickName]);
|
||||
assert_eq!(tok_typ(r#" `hel``lo` "#), [Ident]);
|
||||
|
||||
assert_eq!(tok_str(r#" "h'el`lo" "#), [r#""h'el`lo""#]);
|
||||
assert_eq!(tok_typ(r#" "h'el`lo" "#), [Str]);
|
||||
|
@ -657,7 +667,7 @@ fn strings() {
|
|||
assert_eq!(tok_str(r#" rb'h"el`lo' "#), [r#"rb'h"el`lo'"#]);
|
||||
assert_eq!(tok_typ(r#" rb'h"el`lo' "#), [BinStr]);
|
||||
assert_eq!(tok_str(r#" `h'el"lo` "#), [r#"`h'el"lo`"#]);
|
||||
assert_eq!(tok_typ(r#" `h'el"lo\` "#), [BacktickName]);
|
||||
assert_eq!(tok_typ(r#" `h'el"lo\` "#), [Ident]);
|
||||
|
||||
assert_eq!(tok_str(" \"hel\nlo\" "), ["\"hel\nlo\""]);
|
||||
assert_eq!(tok_typ(" \"hel\nlo\" "), [Str]);
|
||||
|
@ -676,7 +686,7 @@ fn strings() {
|
|||
assert_eq!(tok_str(" rb'hel\nlo' "), ["rb'hel\nlo'"]);
|
||||
assert_eq!(tok_str(" br'hel\nlo' "), ["br'hel\nlo'"]);
|
||||
assert_eq!(tok_str(" `hel\nlo` "), ["`hel\nlo`"]);
|
||||
assert_eq!(tok_typ(" `hel\nlo` "), [BacktickName]);
|
||||
assert_eq!(tok_typ(" `hel\nlo` "), [Ident]);
|
||||
|
||||
assert_eq!(tok_err(r#""hello"#),
|
||||
"unterminated string, quoted by `\"`");
|
||||
|
@ -762,15 +772,15 @@ fn test_dollar() {
|
|||
assert_eq!(tok_str("select $$ something $$; x"),
|
||||
["select", "$$ something $$", ";", "x"]);
|
||||
assert_eq!(tok_typ("select $$ something $$; x"),
|
||||
[Keyword, Str, Semicolon, Ident]);
|
||||
[keyword("select"), Str, Semicolon, Ident]);
|
||||
assert_eq!(tok_str("select $a$ ; $b$ ; $b$ ; $a$; x"),
|
||||
["select", "$a$ ; $b$ ; $b$ ; $a$", ";", "x"]);
|
||||
assert_eq!(tok_typ("select $a$ ; $b$ ; $b$ ; $a$; x"),
|
||||
[Keyword, Str, Semicolon, Ident]);
|
||||
[keyword("select"), Str, Semicolon, Ident]);
|
||||
assert_eq!(tok_str("select $a$ ; $b$ ; $a$; x"),
|
||||
["select", "$a$ ; $b$ ; $a$", ";", "x"]);
|
||||
assert_eq!(tok_typ("select $a$ ; $b$ ; $a$; x"),
|
||||
[Keyword, Str, Semicolon, Ident]);
|
||||
[keyword("select"), Str, Semicolon, Ident]);
|
||||
assert_eq!(tok_err("select $$ ; $ab$ test;"),
|
||||
"unterminated string started with $$");
|
||||
assert_eq!(tok_err("select $a$ ; $$ test;"),
|
||||
|
@ -782,24 +792,24 @@ fn test_dollar() {
|
|||
assert_eq!(tok_str("select $a$a$ ; $a$ test;"),
|
||||
["select", "$a$a$ ; $a$", "test", ";"]);
|
||||
assert_eq!(tok_typ("select $a$a$ ; $a$ test;"),
|
||||
[Keyword, Str, Ident, Semicolon]);
|
||||
[keyword("select"), Str, Ident, Semicolon]);
|
||||
assert_eq!(tok_str("select $a+b; $b test; $a+b; $b ;"),
|
||||
["select", "$a", "+", "b", ";", "$b", "test",
|
||||
";", "$a", "+", "b", ";", "$b", ";"]);
|
||||
assert_eq!(tok_typ("select $a+b; $b test; $a+b; $b ;"),
|
||||
[Keyword, Argument, Add, Ident, Semicolon, Argument, Ident,
|
||||
[keyword("select"), Argument, Add, Ident, Semicolon, Argument, Ident,
|
||||
Semicolon, Argument, Add, Ident, Semicolon, Argument, Semicolon]);
|
||||
assert_eq!(tok_str("select $def x$y test; $def x$y"),
|
||||
["select", "$def", "x", "$y", "test",
|
||||
";", "$def", "x", "$y"]);
|
||||
assert_eq!(tok_typ("select $def x$y test; $def x$y"),
|
||||
[Keyword, Argument, Ident, Argument, Ident,
|
||||
[keyword("select"), Argument, Ident, Argument, Ident,
|
||||
Semicolon, Argument, Ident, Argument]);
|
||||
assert_eq!(tok_str("select $`x``y` + $0 + $`zz` + $1.2 + $фыва"),
|
||||
["select", "$`x``y`", "+", "$0", "+", "$`zz`", "+", "$1", ".", "2",
|
||||
"+", "$фыва"]);
|
||||
assert_eq!(tok_typ("select $`x``y` + $0 + $`zz` + $1.2 + $фыва"),
|
||||
[Keyword, Argument, Add, Argument, Add, Argument,
|
||||
[keyword("select"), Argument, Add, Argument, Add, Argument,
|
||||
Add, Argument, Dot, IntConst, Add, Argument]);
|
||||
assert_eq!(tok_err(r#"$-"#),
|
||||
"bare $ is not allowed");
|
||||
|
@ -831,11 +841,11 @@ fn test_substitution() {
|
|||
assert_eq!(tok_str("SELECT \\(expr);"),
|
||||
["SELECT", "\\(expr)", ";"]);
|
||||
assert_eq!(tok_typ("SELECT \\(expr);"),
|
||||
[Keyword, Substitution, Semicolon]);
|
||||
[keyword("select"), Substitution, Semicolon]);
|
||||
assert_eq!(tok_str("SELECT \\(other_Name1);"),
|
||||
["SELECT", "\\(other_Name1)", ";"]);
|
||||
assert_eq!(tok_typ("SELECT \\(other_Name1);"),
|
||||
[Keyword, Substitution, Semicolon]);
|
||||
[keyword("select"), Substitution, Semicolon]);
|
||||
assert_eq!(tok_err("SELECT \\(some-name);"),
|
||||
"only alphanumerics are allowed in \\(name) token");
|
||||
assert_eq!(tok_err("SELECT \\(some_name"),
|
||||
|
|
|
@ -21,6 +21,7 @@ from __future__ import annotations
|
|||
from typing import *
|
||||
|
||||
import multiprocessing
|
||||
import json
|
||||
|
||||
from edb import errors
|
||||
from edb.common import parsing
|
||||
|
@ -29,7 +30,7 @@ from . import parser as qlparser
|
|||
from .. import ast as qlast
|
||||
from .. import tokenizer as qltokenizer
|
||||
|
||||
EdgeQLParserBase = qlparser.EdgeQLParserBase
|
||||
EdgeQLParserBase = qlparser.EdgeQLParserSpec
|
||||
|
||||
|
||||
def append_module_aliases(tree, aliases):
|
||||
|
@ -48,11 +49,9 @@ def append_module_aliases(tree, aliases):
|
|||
|
||||
def parse_fragment(
|
||||
source: Union[qltokenizer.Source, str],
|
||||
filename: Optional[str]=None,
|
||||
filename: Optional[str] = None,
|
||||
) -> qlast.Expr:
|
||||
if isinstance(source, str):
|
||||
source = qltokenizer.Source.from_string(source)
|
||||
parser = qlparser.EdgeQLExpressionParser()
|
||||
parser = qlparser.EdgeQLExpressionSpec().get_parser()
|
||||
res = parser.parse(source, filename=filename)
|
||||
assert isinstance(res, qlast.Expr)
|
||||
return res
|
||||
|
@ -60,11 +59,9 @@ def parse_fragment(
|
|||
|
||||
def parse_single(
|
||||
source: Union[qltokenizer.Source, str],
|
||||
filename: Optional[str]=None,
|
||||
filename: Optional[str] = None,
|
||||
) -> qlast.Statement:
|
||||
if isinstance(source, str):
|
||||
source = qltokenizer.Source.from_string(source)
|
||||
parser = qlparser.EdgeQLSingleParser()
|
||||
parser = qlparser.EdgeQLSingleSpec().get_parser()
|
||||
res = parser.parse(source, filename=filename)
|
||||
assert isinstance(res, (qlast.Query | qlast.Command))
|
||||
return res
|
||||
|
@ -106,9 +103,7 @@ def parse_command(
|
|||
|
||||
|
||||
def parse_block(source: Union[qltokenizer.Source, str]) -> List[qlast.Base]:
|
||||
if isinstance(source, str):
|
||||
source = qltokenizer.Source.from_string(source)
|
||||
parser = qlparser.EdgeQLBlockParser()
|
||||
parser = qlparser.EdgeQLBlockSpec().get_parser()
|
||||
return parser.parse(source)
|
||||
|
||||
|
||||
|
@ -122,9 +117,8 @@ def parse_migration_body_block(
|
|||
# where the source contexts don't matter anyway.
|
||||
source = '{' + source + '}'
|
||||
|
||||
tsource = qltokenizer.Source.from_string(source)
|
||||
parser = qlparser.EdgeQLMigrationBodyParser()
|
||||
return parser.parse(tsource)
|
||||
parser = qlparser.EdgeQLMigrationBodySpec().get_parser()
|
||||
return parser.parse(source)
|
||||
|
||||
|
||||
def parse_extension_package_body_block(
|
||||
|
@ -137,31 +131,30 @@ def parse_extension_package_body_block(
|
|||
# where the source contexts don't matter anyway.
|
||||
source = '{' + source + '}'
|
||||
|
||||
tsource = qltokenizer.Source.from_string(source)
|
||||
parser = qlparser.EdgeQLExtensionPackageBodyParser()
|
||||
return parser.parse(tsource)
|
||||
parser = qlparser.EdgeQLExtensionPackageBodySpec().get_parser()
|
||||
return parser.parse(source)
|
||||
|
||||
|
||||
def parse_sdl(expr: str):
|
||||
parser = qlparser.EdgeSDLParser()
|
||||
parser = qlparser.EdgeSDLSpec().get_parser()
|
||||
return parser.parse(expr)
|
||||
|
||||
|
||||
def _load_parser(parser: qlparser.EdgeQLParserBase) -> None:
|
||||
def _load_parser(parser: qlparser.EdgeQLParserSpec) -> None:
|
||||
parser.get_parser_spec(allow_rebuild=True)
|
||||
|
||||
|
||||
def preload(
|
||||
allow_rebuild: bool = True,
|
||||
paralellize: bool = False,
|
||||
parsers: Optional[List[qlparser.EdgeQLParserBase]] = None,
|
||||
parsers: Optional[List[qlparser.EdgeQLParserSpec]] = None,
|
||||
) -> None:
|
||||
if parsers is None:
|
||||
parsers = [
|
||||
qlparser.EdgeQLBlockParser(),
|
||||
qlparser.EdgeQLSingleParser(),
|
||||
qlparser.EdgeQLExpressionParser(),
|
||||
qlparser.EdgeSDLParser(),
|
||||
qlparser.EdgeQLBlockSpec(),
|
||||
qlparser.EdgeQLSingleSpec(),
|
||||
qlparser.EdgeQLExpressionSpec(),
|
||||
qlparser.EdgeSDLSpec(),
|
||||
]
|
||||
|
||||
if not paralellize:
|
||||
|
@ -188,3 +181,73 @@ def preload(
|
|||
pool.map(_load_parser, parsers_to_rebuild)
|
||||
|
||||
preload(parsers=parsers, allow_rebuild=False)
|
||||
|
||||
|
||||
def process_spec(parser: parsing.ParserSpec) -> Tuple[str, List[Any]]:
|
||||
# Converts a ParserSpec into JSON. Called from edgeql-parser Rust crate.
|
||||
|
||||
spec = parser.get_parser_spec()
|
||||
assert spec.pureLR
|
||||
|
||||
token_map: Dict[str, str] = {
|
||||
v._token: c for (_, c), v in parsing.TokenMeta.token_map.items()
|
||||
}
|
||||
|
||||
# productions
|
||||
productions: List[Any] = []
|
||||
production_ids: Dict[Any, int] = {}
|
||||
inlines: List[Tuple[int, int]] = []
|
||||
|
||||
def get_production_id(prod: Any) -> int:
|
||||
if prod in production_ids:
|
||||
return production_ids[prod]
|
||||
|
||||
id = len(productions)
|
||||
productions.append(prod)
|
||||
production_ids[prod] = id
|
||||
|
||||
inline = getattr(prod.method, 'inline_index', None)
|
||||
if inline is not None:
|
||||
assert isinstance(inline, int)
|
||||
inlines.append((id, inline))
|
||||
|
||||
return id
|
||||
|
||||
actions = []
|
||||
for st_actions in spec.actions():
|
||||
out_st_actions = []
|
||||
for tok, acts in st_actions.items():
|
||||
act = cast(Any, acts[0])
|
||||
|
||||
str_tok = token_map.get(str(tok), str(tok))
|
||||
if 'ShiftAction' in str(type(act)):
|
||||
action_obj: Any = int(act.nextState)
|
||||
else:
|
||||
prod = act.production
|
||||
action_obj = {
|
||||
'production_id': get_production_id(prod),
|
||||
'non_term': str(prod.lhs),
|
||||
'cnt': len(prod.rhs),
|
||||
}
|
||||
|
||||
out_st_actions.append((str_tok, action_obj))
|
||||
|
||||
actions.append(out_st_actions)
|
||||
|
||||
# goto
|
||||
goto = []
|
||||
for st_goto in spec.goto():
|
||||
out_goto = []
|
||||
for nterm, action in st_goto.items():
|
||||
out_goto.append((str(nterm), action))
|
||||
|
||||
goto.append(out_goto)
|
||||
|
||||
res = {
|
||||
'actions': actions,
|
||||
'goto': goto,
|
||||
'start': str(spec.start_sym()),
|
||||
'inlines': inlines,
|
||||
}
|
||||
res_json = json.dumps(res)
|
||||
return (res_json, productions)
|
||||
|
|
|
@ -1,53 +0,0 @@
|
|||
#
|
||||
# This source file is part of the EdgeDB open source project.
|
||||
#
|
||||
# Copyright 2020-present MagicStack Inc. and the EdgeDB authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
from typing import *
|
||||
|
||||
from collections import deque
|
||||
|
||||
from edb.edgeql import tokenizer
|
||||
from edb import _edgeql_parser as ql_parser
|
||||
|
||||
|
||||
class EdgeQLLexer:
|
||||
inputstr: str
|
||||
tokens: Optional[Deque[ql_parser.Token]]
|
||||
filename: Optional[str]
|
||||
end_of_input: Tuple[int, int, int]
|
||||
|
||||
def __init__(self):
|
||||
self.filename = None # TODO
|
||||
|
||||
def setinputstr(
|
||||
self,
|
||||
source: Union[str, tokenizer.Source],
|
||||
filename: Optional[str]=None,
|
||||
) -> None:
|
||||
if isinstance(source, str):
|
||||
source = tokenizer.Source.from_string(source)
|
||||
|
||||
self.inputstr = source.text()
|
||||
self.filename = filename
|
||||
self.tokens = deque(source.tokens())
|
||||
self.end_of_input = self.tokens[-1].end()
|
||||
|
||||
def token(self) -> ql_parser.Token:
|
||||
if self.tokens:
|
||||
return self.tokens.popleft()
|
|
@ -174,23 +174,23 @@ class T_PIPE(Token, lextoken='|'):
|
|||
pass
|
||||
|
||||
|
||||
class T_NAMEDONLY(Token):
|
||||
class T_NAMEDONLY(Token, lextoken='named only'):
|
||||
pass
|
||||
|
||||
|
||||
class T_SETANNOTATION(Token):
|
||||
class T_SETANNOTATION(Token, lextoken='set annotation'):
|
||||
pass
|
||||
|
||||
|
||||
class T_SETTYPE(Token):
|
||||
class T_SETTYPE(Token, lextoken='set type'):
|
||||
pass
|
||||
|
||||
|
||||
class T_EXTENSIONPACKAGE(Token):
|
||||
class T_EXTENSIONPACKAGE(Token, lextoken='extension package'):
|
||||
pass
|
||||
|
||||
|
||||
class T_ORDERBY(Token):
|
||||
class T_ORDERBY(Token, lextoken='order by'):
|
||||
pass
|
||||
|
||||
|
||||
|
|
|
@ -18,297 +18,176 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import *
|
||||
|
||||
from edb import errors
|
||||
from edb.common import debug, parsing
|
||||
from edb.common import context as pctx
|
||||
from edb.common.english import add_a as a
|
||||
from edb.common import parsing
|
||||
|
||||
from .grammar import rust_lexer, tokens
|
||||
from .grammar import expressions as gr_exprs
|
||||
from .grammar import commondl as gr_commondl
|
||||
from .grammar import keywords as gr_keywords
|
||||
from .. import tokenizer
|
||||
|
||||
import edb._edgeql_parser as ql_parser
|
||||
|
||||
|
||||
class EdgeQLParserBase(parsing.Parser):
|
||||
def get_debug(self):
|
||||
return debug.flags.edgeql_parser
|
||||
|
||||
def get_exception(self, native_err, context, token=None):
|
||||
msg = native_err.args[0]
|
||||
details = None
|
||||
hint = None
|
||||
|
||||
if isinstance(native_err, errors.EdgeQLSyntaxError):
|
||||
return native_err
|
||||
else:
|
||||
if msg.startswith('Unexpected token: '):
|
||||
token = token or getattr(native_err, 'token', None)
|
||||
token_kind = token.kind()
|
||||
ltok = self.parser._stack[-1][0]
|
||||
|
||||
is_reserved = (
|
||||
token.text().lower()
|
||||
in gr_keywords.by_type[gr_keywords.RESERVED_KEYWORD]
|
||||
)
|
||||
|
||||
# Look at the parsing stack and use tokens and
|
||||
# non-terminals to infer the parser rule when the
|
||||
# error occurred.
|
||||
i, rule = self._get_rule()
|
||||
|
||||
if not token or token_kind == 'EOF':
|
||||
msg = 'Unexpected end of line'
|
||||
elif (
|
||||
rule == 'shape' and
|
||||
token_kind == 'IDENT' and
|
||||
isinstance(ltok, parsing.Nonterm)
|
||||
):
|
||||
# Make sure that the previous element in the stack
|
||||
# is some kind of Nonterminal, because if it's
|
||||
# not, this is probably not an issue of a missing
|
||||
# COMMA.
|
||||
hint = (f"It appears that a ',' is missing in {a(rule)} "
|
||||
f"before {token.text()!r}")
|
||||
elif (
|
||||
rule == 'list of arguments' and
|
||||
# The stack is like <NodeName> LPAREN <AnyIdentifier>
|
||||
i == 1 and
|
||||
isinstance(ltok, (gr_exprs.AnyIdentifier,
|
||||
tokens.T_WITH,
|
||||
tokens.T_SELECT,
|
||||
tokens.T_FOR,
|
||||
tokens.T_INSERT,
|
||||
tokens.T_UPDATE,
|
||||
tokens.T_DELETE))
|
||||
):
|
||||
hint = ("Missing parentheses around statement used "
|
||||
"as an expression")
|
||||
# We want the error context correspond to the
|
||||
# statement keyword
|
||||
context = ltok.context
|
||||
token = None
|
||||
elif (
|
||||
rule == 'array slice' and
|
||||
# The offending token was something that could
|
||||
# make an expression
|
||||
token_kind in {'IDENT', 'ICONST'} and
|
||||
not isinstance(ltok, tokens.T_COLON)
|
||||
):
|
||||
hint = (f"It appears that a ':' is missing in {a(rule)} "
|
||||
f"before {token.text()!r}")
|
||||
elif (
|
||||
rule in {'list of arguments', 'tuple', 'array'} and
|
||||
# The offending token was something that could
|
||||
# make an expression
|
||||
token_kind in {
|
||||
'IDENT', 'TRUE', 'FALSE',
|
||||
'ICONST', 'FCONST', 'NICONST', 'NFCONST',
|
||||
'BCONST', 'SCONST',
|
||||
} and
|
||||
not isinstance(ltok, tokens.T_COMMA)
|
||||
):
|
||||
hint = (f"It appears that a ',' is missing in {a(rule)} "
|
||||
f"before {token.text()!r}")
|
||||
elif (
|
||||
rule == 'definition' and
|
||||
token_kind == 'IDENT'
|
||||
):
|
||||
# Something went wrong in a definition, so check
|
||||
# if the last successful token is a keyword.
|
||||
if (
|
||||
isinstance(ltok, gr_exprs.Identifier) and
|
||||
ltok.val.upper() == 'INDEX'
|
||||
):
|
||||
msg = (f"Expected 'ON', but got {token.text()!r} "
|
||||
f"instead")
|
||||
else:
|
||||
msg = f'Unexpected {token.text()!r}'
|
||||
elif rule == 'for iterator':
|
||||
msg = ("Missing parentheses around complex expression in "
|
||||
"a FOR iterator clause")
|
||||
|
||||
if i > 0:
|
||||
context = pctx.merge_context([
|
||||
self.parser._stack[-i][0].context, context,
|
||||
])
|
||||
token = None
|
||||
elif hasattr(token, 'val'):
|
||||
msg = f'Unexpected {token.val!r}'
|
||||
elif token_kind == 'NL':
|
||||
msg = 'Unexpected end of line'
|
||||
elif token.text() == "explain":
|
||||
msg = f'Unexpected keyword {token.text()!r}'
|
||||
hint = f'Use `analyze` to show query performance details'
|
||||
elif is_reserved and not isinstance(ltok, gr_exprs.Expr):
|
||||
# Another token followed by a reserved keyword:
|
||||
# likely an attempt to use keyword as identifier
|
||||
msg = f'Unexpected keyword {token.text()!r}'
|
||||
details = (
|
||||
f'Token {token.text()!r} is a reserved keyword and'
|
||||
f' cannot be used as an identifier'
|
||||
)
|
||||
hint = (
|
||||
f'Use a different identifier or quote the name with'
|
||||
f' backticks: `{token.text()}`'
|
||||
)
|
||||
else:
|
||||
msg = f'Unexpected {token.text()!r}'
|
||||
|
||||
return errors.EdgeQLSyntaxError(
|
||||
msg, details=details, hint=hint, context=context, token=token)
|
||||
|
||||
def _get_rule(self):
|
||||
ltok = self.parser._stack[-1][0]
|
||||
# Look at the parsing stack and use tokens and non-terminals
|
||||
# to infer the parser rule when the error occurred.
|
||||
rule = ''
|
||||
|
||||
def _matches_for(i):
|
||||
return (
|
||||
len(self.parser._stack) >= i + 3
|
||||
and isinstance(self.parser._stack[-3 - i][0], tokens.T_FOR)
|
||||
and isinstance(
|
||||
self.parser._stack[-2 - i][0], gr_exprs.Identifier)
|
||||
and isinstance(self.parser._stack[-1 - i][0], tokens.T_IN)
|
||||
)
|
||||
|
||||
# Check if we're in the `FOR x IN <bad_token>` situation
|
||||
if (
|
||||
len(self.parser._stack) >= 4
|
||||
and isinstance(self.parser._stack[-2][0], tokens.T_RANGBRACKET)
|
||||
and isinstance(self.parser._stack[-3][0], gr_exprs.FullTypeExpr)
|
||||
and isinstance(self.parser._stack[-4][0], tokens.T_LANGBRACKET)
|
||||
and _matches_for(4)
|
||||
):
|
||||
return 4, 'for iterator'
|
||||
|
||||
if (
|
||||
len(self.parser._stack) >= 2
|
||||
and isinstance(self.parser._stack[-2][0], gr_exprs.AtomicExpr)
|
||||
and _matches_for(2)
|
||||
):
|
||||
return 2, 'for iterator'
|
||||
|
||||
if (
|
||||
len(self.parser._stack) >= 1
|
||||
and isinstance(self.parser._stack[-1][0], gr_exprs.BaseAtomicExpr)
|
||||
and _matches_for(1)
|
||||
):
|
||||
return 1, 'for iterator'
|
||||
|
||||
if _matches_for(0):
|
||||
return 0, 'for iterator'
|
||||
|
||||
# If the last valid token was a closing brace/parent/bracket,
|
||||
# so we need to find a match for it before deciding what rule
|
||||
# context we're in.
|
||||
need_match = isinstance(ltok, (tokens.T_RBRACE,
|
||||
tokens.T_RPAREN,
|
||||
tokens.T_RBRACKET))
|
||||
nextel = None
|
||||
for i, (el, _) in enumerate(reversed(self.parser._stack)):
|
||||
if isinstance(el, tokens.Token):
|
||||
# We'll need the element right before "{", "[", or "(".
|
||||
prevel = self.parser._stack[-2 - i][0]
|
||||
|
||||
if isinstance(el, tokens.T_LBRACE):
|
||||
if need_match and isinstance(ltok,
|
||||
tokens.T_RBRACE):
|
||||
# This is matched, while we're looking
|
||||
# for unmatched braces.
|
||||
need_match = False
|
||||
continue
|
||||
|
||||
elif isinstance(prevel, gr_commondl.OptExtending):
|
||||
# This is some SDL/DDL
|
||||
rule = 'definition'
|
||||
elif (
|
||||
isinstance(prevel, gr_exprs.Expr) or
|
||||
(
|
||||
isinstance(prevel, tokens.T_COLON) and
|
||||
isinstance(self.parser._stack[-3 - i][0],
|
||||
gr_exprs.ShapePointer)
|
||||
)
|
||||
):
|
||||
# This is some kind of shape.
|
||||
rule = 'shape'
|
||||
break
|
||||
elif isinstance(el, tokens.T_LPAREN):
|
||||
if need_match and isinstance(ltok,
|
||||
tokens.T_RPAREN):
|
||||
# This is matched, while we're looking
|
||||
# for unmatched parentheses.
|
||||
need_match = False
|
||||
continue
|
||||
elif isinstance(prevel, gr_exprs.NodeName):
|
||||
rule = 'list of arguments'
|
||||
elif isinstance(nextel, (tokens.T_FOR,
|
||||
tokens.T_SELECT,
|
||||
tokens.T_UPDATE,
|
||||
tokens.T_DELETE,
|
||||
tokens.T_INSERT,
|
||||
tokens.T_FOR)):
|
||||
# A parenthesized subquery expression,
|
||||
# we should leave the error as is.
|
||||
break
|
||||
else:
|
||||
rule = 'tuple'
|
||||
break
|
||||
elif isinstance(el, tokens.T_LBRACKET):
|
||||
if need_match and isinstance(ltok,
|
||||
tokens.T_RBRACKET):
|
||||
# This is matched, while we're looking
|
||||
# for unmatched brackets.
|
||||
need_match = False
|
||||
continue
|
||||
# This is either an array literal or
|
||||
# array index.
|
||||
elif isinstance(prevel, gr_exprs.Expr):
|
||||
rule = 'array slice'
|
||||
else:
|
||||
rule = 'array'
|
||||
break
|
||||
|
||||
# Also keep track of the element right after current.
|
||||
nextel = el
|
||||
|
||||
return i, rule
|
||||
|
||||
def get_lexer(self):
|
||||
return rust_lexer.EdgeQLLexer()
|
||||
class EdgeQLParserSpec(parsing.ParserSpec):
|
||||
def get_parser(self):
|
||||
return EdgeQLParser(self)
|
||||
|
||||
|
||||
class EdgeQLSingleParser(EdgeQLParserBase):
|
||||
class EdgeQLSingleSpec(EdgeQLParserSpec):
|
||||
def get_parser_spec_module(self):
|
||||
from .grammar import single
|
||||
|
||||
return single
|
||||
|
||||
|
||||
class EdgeQLExpressionParser(EdgeQLParserBase):
|
||||
class EdgeQLExpressionSpec(EdgeQLParserSpec):
|
||||
def get_parser_spec_module(self):
|
||||
from .grammar import fragment
|
||||
|
||||
return fragment
|
||||
|
||||
|
||||
class EdgeQLBlockParser(EdgeQLParserBase):
|
||||
class EdgeQLBlockSpec(EdgeQLParserSpec):
|
||||
def get_parser_spec_module(self):
|
||||
from .grammar import block
|
||||
|
||||
return block
|
||||
|
||||
|
||||
class EdgeQLMigrationBodyParser(EdgeQLParserBase):
|
||||
class EdgeQLMigrationBodySpec(EdgeQLParserSpec):
|
||||
def get_parser_spec_module(self):
|
||||
from .grammar import migration_body
|
||||
|
||||
return migration_body
|
||||
|
||||
|
||||
class EdgeQLExtensionPackageBodyParser(EdgeQLParserBase):
|
||||
class EdgeQLExtensionPackageBodySpec(EdgeQLParserSpec):
|
||||
def get_parser_spec_module(self):
|
||||
from .grammar import extension_package_body
|
||||
|
||||
return extension_package_body
|
||||
|
||||
|
||||
class EdgeSDLParser(EdgeQLParserBase):
|
||||
class EdgeSDLSpec(EdgeQLParserSpec):
|
||||
def get_parser_spec_module(self):
|
||||
from .grammar import sdldocument
|
||||
|
||||
return sdldocument
|
||||
|
||||
|
||||
class EdgeQLParser:
|
||||
spec: EdgeQLParserSpec
|
||||
|
||||
filename: Optional[str]
|
||||
source: tokenizer.Source
|
||||
|
||||
def __init__(self, p: EdgeQLParserSpec):
|
||||
self.spec = p
|
||||
self.filename = None
|
||||
|
||||
mod = self.spec.get_parser_spec_module()
|
||||
self.token_map = {}
|
||||
for (_, token), cls in mod.TokenMeta.token_map.items():
|
||||
self.token_map[token] = cls
|
||||
|
||||
def get_parser_spec(self, allow_rebuild=False):
|
||||
return self.spec.get_parser_spec(allow_rebuild=allow_rebuild)
|
||||
|
||||
def parse(
|
||||
self,
|
||||
source: Union[str, tokenizer.Source],
|
||||
filename: Optional[str] = None,
|
||||
):
|
||||
if isinstance(source, str):
|
||||
source = tokenizer.Source.from_string(source)
|
||||
|
||||
self.filename = filename
|
||||
self.source = source
|
||||
|
||||
parser_name = self.spec.__class__.__name__
|
||||
result, productions = ql_parser.parse(parser_name, source.tokens())
|
||||
|
||||
if len(result.errors()) > 0:
|
||||
# TODO: emit multiple errors
|
||||
|
||||
# Heuristic to pick the error:
|
||||
# - first encountered,
|
||||
# - Unexpected before Missing,
|
||||
# - original order.
|
||||
errs: List[Tuple[str, Tuple[int, Optional[int]]]] = result.errors()
|
||||
errs.sort(key=lambda e: (e[1][0], -ord(e[0][1])))
|
||||
error = errs[0]
|
||||
|
||||
message, span = error
|
||||
position = tokenizer.inflate_position(source.text(), span)
|
||||
|
||||
raise errors.EdgeQLSyntaxError(message, position=position)
|
||||
|
||||
return self._cst_to_ast(result.out(), productions).val
|
||||
|
||||
def _cst_to_ast(
|
||||
self, cst: ql_parser.CSTNode, productions: List[Callable]
|
||||
) -> Any:
|
||||
# Converts CST into AST by calling methods from the grammar classes.
|
||||
#
|
||||
# This function was originally written as a simple recursion.
|
||||
# Then I had to unfold it, because it was hitting recursion limit.
|
||||
# Stack here contains all remaining things to do:
|
||||
# - CST node means the node has to be processed and pushed onto the
|
||||
# result stack,
|
||||
# - production means that all args of production have been processed
|
||||
# are are ready to be passed to the production method. The result is
|
||||
# obviously pushed onto the result stack
|
||||
|
||||
stack: List[ql_parser.CSTNode | ql_parser.Production] = [cst]
|
||||
result: List[Any] = []
|
||||
|
||||
while len(stack) > 0:
|
||||
node = stack.pop()
|
||||
|
||||
if isinstance(node, ql_parser.CSTNode):
|
||||
# this would be the body of the original recursion function
|
||||
|
||||
if terminal := node.terminal():
|
||||
# Terminal is simple: just convert to parsing.Token
|
||||
context = parsing.ParserContext(
|
||||
name=self.filename,
|
||||
buffer=self.source.text(),
|
||||
start=terminal.start(),
|
||||
end=terminal.end(),
|
||||
)
|
||||
result.append(
|
||||
parsing.Token(
|
||||
terminal.text(), terminal.value(), context
|
||||
)
|
||||
)
|
||||
|
||||
elif production := node.production():
|
||||
# Production needs to first process all args, then
|
||||
# call the appropriate method.
|
||||
# (this is all in reverse, because stacks)
|
||||
stack.append(production)
|
||||
args = list(production.args())
|
||||
args.reverse()
|
||||
stack.extend(args)
|
||||
else:
|
||||
raise NotImplementedError(node)
|
||||
|
||||
elif isinstance(node, ql_parser.Production):
|
||||
# production args are done, get them out of result stack
|
||||
len_args = len(node.args())
|
||||
split_at = len(result) - len_args
|
||||
args = result[split_at:]
|
||||
result = result[0:split_at]
|
||||
|
||||
# find correct method to call
|
||||
production_id = node.id()
|
||||
production = productions[production_id]
|
||||
|
||||
sym = production.lhs.nontermType()
|
||||
assert len(args) == len(production.rhs)
|
||||
production.method(sym, *args)
|
||||
|
||||
# push into result stack
|
||||
result.append(sym)
|
||||
return result.pop()
|
||||
|
|
|
@ -31,7 +31,6 @@ TRAILING_WS_IN_CONTINUATION = re.compile(r'\\ \s+\n')
|
|||
|
||||
|
||||
class Source:
|
||||
|
||||
def __init__(self, text: str, tokens: List[ql_parser.Token]) -> None:
|
||||
self._cache_key = hashlib.blake2b(text.encode('utf-8')).digest()
|
||||
self._text = text
|
||||
|
@ -67,7 +66,6 @@ class Source:
|
|||
|
||||
|
||||
class NormalizedSource(Source):
|
||||
|
||||
def __init__(self, normalized: ql_parser.Entry, text: str) -> None:
|
||||
self._text = text
|
||||
self._cache_key = normalized.key()
|
||||
|
@ -103,32 +101,70 @@ class NormalizedSource(Source):
|
|||
return cls(_normalize(text), text)
|
||||
|
||||
|
||||
def inflate_span(
|
||||
source: str, span: Tuple[int, Optional[int]]
|
||||
) -> Tuple[ql_parser.SourcePoint, ql_parser.SourcePoint]:
|
||||
(start, end) = span
|
||||
source_bytes = source.encode('utf-8')
|
||||
|
||||
[start_sp] = ql_parser.SourcePoint.from_offsets(source_bytes, [start])
|
||||
|
||||
if end is not None:
|
||||
[end_sp] = ql_parser.SourcePoint.from_offsets(source_bytes, [end])
|
||||
else:
|
||||
end_sp = None
|
||||
|
||||
return (start_sp, end_sp)
|
||||
|
||||
|
||||
def inflate_position(
|
||||
source: str, span: Tuple[int, Optional[int]]
|
||||
) -> Tuple[int, int, int, Optional[int]]:
|
||||
(start, end) = inflate_span(source, span)
|
||||
return (
|
||||
start.column,
|
||||
start.line,
|
||||
start.offset,
|
||||
end.offset if end else None,
|
||||
)
|
||||
|
||||
|
||||
def _tokenize(eql: str) -> List[ql_parser.Token]:
|
||||
try:
|
||||
return ql_parser.tokenize(eql)
|
||||
except ql_parser.TokenizerError as e:
|
||||
message, position = e.args
|
||||
result = ql_parser.tokenize(eql)
|
||||
|
||||
if len(result.errors()) > 0:
|
||||
# TODO: emit multiple errors
|
||||
error = result.errors()[0]
|
||||
|
||||
message, span = error
|
||||
position = inflate_position(eql, span)
|
||||
|
||||
hint = _derive_hint(eql, message, position)
|
||||
raise errors.EdgeQLSyntaxError(
|
||||
message, position=position, hint=hint) from e
|
||||
raise errors.EdgeQLSyntaxError(message, position=position, hint=hint)
|
||||
|
||||
return result.out()
|
||||
|
||||
|
||||
def _normalize(eql: str) -> ql_parser.Entry:
|
||||
try:
|
||||
return ql_parser.normalize(eql)
|
||||
except ql_parser.TokenizerError as e:
|
||||
message, position = e.args
|
||||
except ql_parser.SyntaxError as e:
|
||||
message, span = e.args
|
||||
position = inflate_position(eql, span)
|
||||
|
||||
hint = _derive_hint(eql, message, position)
|
||||
raise errors.EdgeQLSyntaxError(
|
||||
message, position=position, hint=hint) from e
|
||||
message, position=position, hint=hint
|
||||
) from e
|
||||
|
||||
|
||||
def _derive_hint(
|
||||
input: str,
|
||||
message: str,
|
||||
position: Tuple[int, int, int],
|
||||
position: Tuple[int, int, int, Optional[int]],
|
||||
) -> Optional[str]:
|
||||
_, _, off = position
|
||||
_, _, off, _ = position
|
||||
|
||||
if message.endswith(
|
||||
r"invalid string literal: invalid escape sequence '\ '"
|
||||
):
|
||||
|
|
|
@ -90,7 +90,7 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):
|
|||
hint: Optional[str] = None,
|
||||
details: Optional[str] = None,
|
||||
context=None,
|
||||
position: Optional[tuple[Optional[int], ...]] = None,
|
||||
position: Optional[tuple[int, int, int, int | None]] = None,
|
||||
filename: Optional[str] = None,
|
||||
token=None,
|
||||
pgext_code: Optional[str] = None,
|
||||
|
@ -125,7 +125,7 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):
|
|||
def set_filename(self, filename):
|
||||
self._attrs[FIELD_FILENAME] = filename
|
||||
|
||||
def set_linecol(self, line, col):
|
||||
def set_linecol(self, line: Optional[int], col: Optional[int]):
|
||||
if line is not None:
|
||||
self._attrs[FIELD_LINE_START] = str(line)
|
||||
if col is not None:
|
||||
|
@ -143,7 +143,10 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):
|
|||
def has_source_context(self):
|
||||
return FIELD_DETAILS in self._attrs
|
||||
|
||||
def set_source_context(self, context):
|
||||
def set_source_context(self, context: Optional[pctx.ParserContext]):
|
||||
if not context:
|
||||
return
|
||||
|
||||
start = context.start_point
|
||||
end = context.end_point
|
||||
ex.replace_context(self, context)
|
||||
|
@ -163,17 +166,14 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):
|
|||
|
||||
def set_position(
|
||||
self,
|
||||
line: Optional[int] = None,
|
||||
column: Optional[int] = None,
|
||||
start: Optional[int] = None,
|
||||
end: Optional[int] = None,
|
||||
column: int,
|
||||
line: int,
|
||||
start: int,
|
||||
end: Optional[int],
|
||||
):
|
||||
self.set_linecol(line, column)
|
||||
if start is not None:
|
||||
self._attrs[FIELD_POSITION_START] = str(start)
|
||||
end = end or start
|
||||
if end is not None:
|
||||
self._attrs[FIELD_POSITION_END] = str(end)
|
||||
self._attrs[FIELD_POSITION_START] = str(start)
|
||||
self._attrs[FIELD_POSITION_END] = str(end or start)
|
||||
|
||||
@property
|
||||
def line(self):
|
||||
|
|
|
@ -195,7 +195,6 @@ class BaseSyntaxTest(BaseDocTest):
|
|||
markup.dump(inast)
|
||||
|
||||
# make sure that the AST has context
|
||||
#
|
||||
context.ContextValidator().visit(inast)
|
||||
|
||||
processed_src = self.ast_to_source(inast)
|
||||
|
|
|
@ -723,7 +723,7 @@ class EQLFunctionDirective(BaseEQLDirective):
|
|||
from edb.edgeql import codegen as ql_gen
|
||||
from edb.edgeql import qltypes
|
||||
|
||||
parser = edgeql_parser.EdgeQLBlockParser()
|
||||
parser = edgeql_parser.EdgeQLBlockSpec().get_parser()
|
||||
try:
|
||||
astnode = parser.parse(
|
||||
f'create function {sig} using SQL function "xxx";')[0]
|
||||
|
@ -796,7 +796,7 @@ class EQLConstraintDirective(BaseEQLDirective):
|
|||
from edb.edgeql import ast as ql_ast
|
||||
from edb.edgeql import codegen as ql_gen
|
||||
|
||||
parser = edgeql_parser.EdgeQLBlockParser()
|
||||
parser = edgeql_parser.EdgeQLBlockSpec().get_parser()
|
||||
try:
|
||||
astnode = parser.parse(
|
||||
f'create abstract constraint {sig};')[0]
|
||||
|
|
|
@ -72,4 +72,5 @@ from . import wipe # noqa
|
|||
from . import gen_test_dumps # noqa
|
||||
from . import gen_sql_introspection # noqa
|
||||
from . import gen_rust_ast # noqa
|
||||
from . import parser_demo # noqa
|
||||
from .profiling import cli as prof_cli # noqa
|
||||
|
|
299
edb/tools/parser_demo.py
Normal file
299
edb/tools/parser_demo.py
Normal file
|
@ -0,0 +1,299 @@
|
|||
#
|
||||
# This source file is part of the EdgeDB open source project.
|
||||
#
|
||||
# Copyright 2020-present MagicStack Inc. and the EdgeDB authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import *
|
||||
|
||||
from edb.edgeql import ast as qlast
|
||||
from edb.edgeql import tokenizer
|
||||
from edb.edgeql.parser import parser as qlparser
|
||||
|
||||
import edb._edgeql_parser as ql_parser
|
||||
|
||||
from edb.tools.edb import edbcommands
|
||||
|
||||
|
||||
@edbcommands.command("parser-demo")
|
||||
def main():
|
||||
for q in QUERIES:
|
||||
sdl = q.startswith('sdl')
|
||||
if sdl:
|
||||
q = q[3:]
|
||||
|
||||
try:
|
||||
# s = tokenizer.NormalizedSource.from_string(q)
|
||||
source = tokenizer.Source.from_string(q)
|
||||
except BaseException as e:
|
||||
print('Error during tokenization:')
|
||||
print(e)
|
||||
continue
|
||||
|
||||
if sdl:
|
||||
spec = qlparser.EdgeSDLSpec()
|
||||
else:
|
||||
spec = qlparser.EdgeQLBlockSpec()
|
||||
parser = spec.get_parser()
|
||||
|
||||
parser.filename = None
|
||||
parser.source = source
|
||||
|
||||
parser_name = spec.__class__.__name__
|
||||
result, productions = ql_parser.parse(parser_name, source.tokens())
|
||||
|
||||
print('-' * 30)
|
||||
print()
|
||||
|
||||
for index, error in enumerate(result.errors()):
|
||||
message, span = error
|
||||
(start, end) = tokenizer.inflate_span(source.text(), span)
|
||||
|
||||
print(f'Error [{index+1}/{len(result.errors())}]:')
|
||||
print(
|
||||
'\n'.join(
|
||||
source.text().splitlines()[(start.line - 1) : end.line]
|
||||
)
|
||||
)
|
||||
print(
|
||||
' ' * (start.column - 1)
|
||||
+ '^'
|
||||
+ '-' * (end.column - start.column - 1)
|
||||
+ ' '
|
||||
+ message
|
||||
)
|
||||
print()
|
||||
|
||||
if result.out():
|
||||
try:
|
||||
ast = parser._cst_to_ast(result.out(), productions).val
|
||||
except BaseException:
|
||||
ast = None
|
||||
if ast:
|
||||
print('Recovered AST:')
|
||||
if isinstance(ast, list):
|
||||
for x in ast:
|
||||
x.dump_edgeql()
|
||||
elif isinstance(ast, qlast.Base):
|
||||
ast.dump_edgeql()
|
||||
else:
|
||||
print(ast)
|
||||
|
||||
|
||||
QUERIES = [
|
||||
'''
|
||||
select 1
|
||||
''',
|
||||
'''
|
||||
select User { name, email } filter .name = 'Sully'
|
||||
''',
|
||||
'''
|
||||
SELECT {354.32,
|
||||
35400000000000.32,
|
||||
35400000000000000000.32,
|
||||
3.5432e20,
|
||||
3.5432e+20,
|
||||
3.5432e-20,
|
||||
3.543_2e-20,
|
||||
354.32e-20,
|
||||
2_354.32e-20,
|
||||
0e-999
|
||||
}
|
||||
''',
|
||||
'''
|
||||
with module cards
|
||||
for g in (group Card by .element) union (for gi in 0 union (
|
||||
element := g.key.element,
|
||||
cst := sum(g.elements.cost + gi),
|
||||
))
|
||||
''',
|
||||
'''
|
||||
select '10 seconds'
|
||||
''',
|
||||
'''SELECT (User.id, User { name := ''',
|
||||
'''SELECT (false, }]})''',
|
||||
'''
|
||||
SELECT User { name, last_name }
|
||||
WITH u := User SELECT u;
|
||||
''',
|
||||
'''
|
||||
SELECT (false, true false])
|
||||
''',
|
||||
'''
|
||||
for c Card union c.hello
|
||||
''',
|
||||
'''
|
||||
SELECT User id, name }
|
||||
''',
|
||||
'''
|
||||
CREATE TYPE cfg::TestSessionConfig EXTENDING cfg::ConfigObject {
|
||||
CREATE REQUIRED PROPERTY name -> std::str {
|
||||
CREATE CONSTRAINT std::exclusive;
|
||||
}
|
||||
};
|
||||
''',
|
||||
'''
|
||||
CREATE FUNCTION
|
||||
std::_gen_series(
|
||||
`start`: std::int64,
|
||||
stop: std::int64
|
||||
) -> SET OF std::int64
|
||||
{
|
||||
SET volatility := 'Immutable';
|
||||
USING SQL FUNCTION 'generate_series';
|
||||
};
|
||||
''',
|
||||
'''
|
||||
select b"04e3b";
|
||||
''',
|
||||
'''
|
||||
select User { intersect };
|
||||
''',
|
||||
'''
|
||||
create module __std__;
|
||||
''',
|
||||
'''
|
||||
create type Hello {
|
||||
create property intersect -> str;
|
||||
create property `__std__` -> str;
|
||||
};
|
||||
''',
|
||||
'''
|
||||
SELECT
|
||||
count(
|
||||
schema::Module
|
||||
FILTER NOT .builtin AND NOT .name = "default"
|
||||
) + count(
|
||||
schema::Object
|
||||
FILTER .name LIKE "default::%"
|
||||
) > 0
|
||||
''',
|
||||
'''sdl
|
||||
module test {
|
||||
function len1(a: str b: str) -> std::str {
|
||||
using SQL function 'length1'
|
||||
}
|
||||
''',
|
||||
'''
|
||||
SELECT len('');
|
||||
''',
|
||||
'''
|
||||
SELECT __std__::len({'hello', 'world'});
|
||||
''',
|
||||
'''sdl
|
||||
module test {
|
||||
alias FooBaz := [1 2];
|
||||
};
|
||||
''',
|
||||
'''
|
||||
SEL ECT 1
|
||||
''',
|
||||
'''
|
||||
SELECT (
|
||||
foo: 1,
|
||||
bar := 3
|
||||
);
|
||||
''',
|
||||
'''
|
||||
SELECT (
|
||||
foo: (
|
||||
bar: 42
|
||||
)
|
||||
);
|
||||
''',
|
||||
'''
|
||||
SELECT count(FOR X IN {Foo} UNION X);
|
||||
''',
|
||||
'''
|
||||
SELECT some_agg(User.name) OVER (ORDER BY User.age ASC);
|
||||
SELECT some_agg(User.name) OVER (
|
||||
PARTITION BY strlen(User.name)
|
||||
ORDER BY User.age ASC);
|
||||
SELECT some_agg(User.name) OVER (
|
||||
PARTITION BY User.email, User.age
|
||||
ORDER BY User.age ASC);
|
||||
SELECT some_agg(User.name) OVER (
|
||||
PARTITION BY User.email, User.age
|
||||
ORDER BY User.age ASC THEN User.name ASC);
|
||||
''',
|
||||
'''
|
||||
SELECT Issue{
|
||||
name,
|
||||
related_to *-1,
|
||||
};
|
||||
''',
|
||||
'''
|
||||
SELECT __type__;
|
||||
''',
|
||||
'''
|
||||
SELECT Issue{
|
||||
name,
|
||||
related_to *,
|
||||
};
|
||||
''',
|
||||
'''
|
||||
SELECT Foo {(bar)};
|
||||
''',
|
||||
'''
|
||||
SELECT Foo.__source__;
|
||||
''',
|
||||
'''
|
||||
SELECT Foo.bar@__type__;
|
||||
''',
|
||||
'''
|
||||
SELECT Foo {
|
||||
__type__.name
|
||||
};
|
||||
''',
|
||||
'''
|
||||
SELECT INTROSPECT tuple<int64>;
|
||||
''',
|
||||
'''
|
||||
CREATE FUNCTION std::strlen(string: std::str = '1', abc: std::str)
|
||||
-> std::int64 {};
|
||||
''',
|
||||
'''
|
||||
SELECT Obj.n + random()
|
||||
''',
|
||||
'''
|
||||
CREATE MIGRATION { ;;; CREATE TYPE Foo ;;; CREATE TYPE Bar ;;; };
|
||||
''',
|
||||
'''
|
||||
SELECT (User IS (Named, Text));
|
||||
''',
|
||||
'''sdl
|
||||
module test {
|
||||
scalar type foobar {
|
||||
index prop on (__source__);
|
||||
};
|
||||
};
|
||||
''',
|
||||
'''
|
||||
INSERT Foo FILTER Foo.bar = 42;
|
||||
''',
|
||||
'''sdl
|
||||
module test {
|
||||
function some_func($`(`: str = ) ) -> std::str {
|
||||
using edgeql function 'some_other_func';
|
||||
}
|
||||
};
|
||||
''',
|
||||
'''
|
||||
SELECT (a := 1, foo);
|
||||
''',
|
||||
'''
|
||||
CREATE MODULE `__std__`;
|
||||
''',
|
||||
]
|
|
@ -4336,7 +4336,7 @@ class TestEdgeQLDDL(tb.DDLTestCase):
|
|||
async def test_edgeql_ddl_function_20(self):
|
||||
with self.assertRaisesRegex(
|
||||
edgedb.EdgeQLSyntaxError,
|
||||
r"Unexpected ';'"):
|
||||
"Unexpected ';'"):
|
||||
|
||||
await self.con.execute(r'''
|
||||
CREATE FUNCTION ddlf_20(f: int64) -> int64
|
||||
|
@ -12337,7 +12337,7 @@ type default::Foo {
|
|||
with self.assertRaisesRegex(
|
||||
edgedb.SchemaDefinitionError,
|
||||
r"possibly more than one element returned by the index expression",
|
||||
_line=4, _col=34
|
||||
_line=4, _col=38
|
||||
):
|
||||
await self.con.execute(r"""
|
||||
CREATE TYPE Foo {
|
||||
|
@ -12350,7 +12350,7 @@ type default::Foo {
|
|||
with self.assertRaisesRegex(
|
||||
edgedb.SchemaDefinitionError,
|
||||
r"possibly more than one element returned by the index expression",
|
||||
_line=5, _col=34
|
||||
_line=5, _col=38
|
||||
):
|
||||
await self.con.execute(r"""
|
||||
CREATE TYPE Foo {
|
||||
|
@ -12364,7 +12364,7 @@ type default::Foo {
|
|||
with self.assertRaisesRegex(
|
||||
edgedb.SchemaDefinitionError,
|
||||
r"possibly more than one element returned by the index expression",
|
||||
_line=5, _col=34
|
||||
_line=5, _col=38
|
||||
):
|
||||
await self.con.execute(r"""
|
||||
CREATE TYPE Foo {
|
||||
|
@ -12770,7 +12770,7 @@ CREATE MIGRATION m14i24uhm6przo3bpl2lqndphuomfrtq3qdjaqdg6fza7h6m7tlbra
|
|||
# work, and there is a commented bit below to test that.
|
||||
async with self.assertRaisesRegexTx(
|
||||
edgedb.QueryError,
|
||||
"Unexpected keyword 'global'"):
|
||||
"Unexpected keyword 'GLOBAL'"):
|
||||
await self.con.execute('''
|
||||
CREATE MIGRATION
|
||||
{
|
||||
|
|
|
@ -175,7 +175,7 @@ class TestEdgeQLExplain(tb.QueryTestCase):
|
|||
"contexts": [
|
||||
{
|
||||
"buffer_idx": 0,
|
||||
"end": 116,
|
||||
"end": 115,
|
||||
"start": 74,
|
||||
}
|
||||
],
|
||||
|
@ -278,7 +278,7 @@ class TestEdgeQLExplain(tb.QueryTestCase):
|
|||
"contexts": [
|
||||
{
|
||||
"buffer_idx": 0,
|
||||
"end": 174,
|
||||
"end": 173,
|
||||
"start": 134,
|
||||
},
|
||||
],
|
||||
|
|
|
@ -7238,7 +7238,7 @@ aa \
|
|||
edgedb.QueryError,
|
||||
r'possibly more than one element returned by an expression '
|
||||
r'where only singletons are allowed',
|
||||
_position=29):
|
||||
_position=35):
|
||||
|
||||
await self.con.execute('''\
|
||||
SELECT Issue LIMIT LogEntry.spent_time;
|
||||
|
@ -7249,7 +7249,7 @@ aa \
|
|||
edgedb.QueryError,
|
||||
r'possibly more than one element returned by an expression '
|
||||
r'where only singletons are allowed',
|
||||
_position=29):
|
||||
_position=36):
|
||||
|
||||
await self.con.execute('''\
|
||||
SELECT Issue OFFSET LogEntry.spent_time;
|
||||
|
@ -7695,7 +7695,7 @@ aa \
|
|||
async def test_edgeql_expr_error_after_extraction_01(self):
|
||||
with self.assertRaisesRegex(
|
||||
edgedb.QueryError,
|
||||
"Unexpected \"'1'\""):
|
||||
"Unexpected ''1''"):
|
||||
|
||||
await self.con.query("""
|
||||
SELECT '''1''';
|
||||
|
|
|
@ -450,7 +450,7 @@ class TestInsert(tb.QueryTestCase):
|
|||
async def test_edgeql_insert_nested_07(self):
|
||||
with self.assertRaisesRegex(
|
||||
edgedb.EdgeQLSyntaxError,
|
||||
"Unexpected 'Subordinate'"):
|
||||
r"Missing '\{'"):
|
||||
await self.con.execute('''
|
||||
INSERT InsertTest {
|
||||
subordinates: Subordinate {
|
||||
|
|
|
@ -54,7 +54,7 @@ class TestEdgeQLIRScopeTree(tb.BaseEdgeQLCompilerTest):
|
|||
|
||||
@tb.must_fail(errors.QueryError,
|
||||
"reference to 'User.name' changes the interpretation",
|
||||
line=3, col=9)
|
||||
line=3, col=16)
|
||||
def test_edgeql_ir_scope_tree_bad_01(self):
|
||||
"""
|
||||
SELECT User.deck
|
||||
|
@ -63,7 +63,7 @@ class TestEdgeQLIRScopeTree(tb.BaseEdgeQLCompilerTest):
|
|||
|
||||
@tb.must_fail(errors.QueryError,
|
||||
"reference to 'User' changes the interpretation",
|
||||
line=3, col=9)
|
||||
line=3, col=16)
|
||||
def test_edgeql_ir_scope_tree_bad_02(self):
|
||||
"""
|
||||
SELECT User.deck
|
||||
|
|
|
@ -2227,7 +2227,7 @@ class TestEdgeQLSelect(tb.QueryTestCase):
|
|||
edgedb.QueryError,
|
||||
"cannot redefine the cardinality of link 'related_to': it is "
|
||||
"defined as 'multi' in the base object type 'default::Issue'",
|
||||
_position=73,
|
||||
_position=74,
|
||||
):
|
||||
await self.con.execute("""
|
||||
SELECT Issue {
|
||||
|
@ -2253,7 +2253,7 @@ class TestEdgeQLSelect(tb.QueryTestCase):
|
|||
edgedb.QueryError,
|
||||
"cannot redefine link 'status' as optional: it is "
|
||||
"defined as required in the base object type 'default::Issue'",
|
||||
_position=71,
|
||||
_position=72,
|
||||
):
|
||||
await self.con.execute("""
|
||||
SELECT Issue {
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2228,7 +2228,7 @@ class TestUpdate(tb.QueryTestCase):
|
|||
edgedb.QueryError,
|
||||
"cannot update link 'readonly_tag': "
|
||||
"it is declared as read-only",
|
||||
_position=147,
|
||||
_position=148,
|
||||
):
|
||||
await self.con.execute(r'''
|
||||
UPDATE UpdateTest
|
||||
|
|
|
@ -1711,7 +1711,7 @@ class TestEdgeQLVolatility(tb.QueryTestCase):
|
|||
with self.assertRaisesRegex(
|
||||
edgedb.QueryError,
|
||||
"can not take cross product of volatile operation",
|
||||
_position=36):
|
||||
_position=37):
|
||||
await self.con.execute(
|
||||
r"""
|
||||
SELECT {1,2} + (FOR x in {1,2,3} UNION (x*random()))
|
||||
|
@ -1722,7 +1722,7 @@ class TestEdgeQLVolatility(tb.QueryTestCase):
|
|||
with self.assertRaisesRegex(
|
||||
edgedb.QueryError,
|
||||
"can not take cross product of volatile operation",
|
||||
_position=36):
|
||||
_position=37):
|
||||
await self.con.execute(
|
||||
r"""
|
||||
SELECT ({1,2}, (INSERT Obj { n := 100 }))
|
||||
|
@ -1733,7 +1733,7 @@ class TestEdgeQLVolatility(tb.QueryTestCase):
|
|||
with self.assertRaisesRegex(
|
||||
edgedb.QueryError,
|
||||
"can not take cross product of volatile operation",
|
||||
_position=64):
|
||||
_position=65):
|
||||
await self.con.execute(
|
||||
r"""
|
||||
SELECT ({1,2},
|
||||
|
|
|
@ -396,7 +396,7 @@ class TestSchema(tb.BaseSchemaLoadTest):
|
|||
@tb.must_fail(errors.InvalidPropertyTargetError,
|
||||
"invalid property type: expected a scalar type, "
|
||||
"or a scalar collection, got object type 'test::Object'",
|
||||
position=73)
|
||||
position=74)
|
||||
def test_schema_bad_prop_02(self):
|
||||
"""
|
||||
type Object {
|
||||
|
@ -1433,7 +1433,7 @@ class TestSchema(tb.BaseSchemaLoadTest):
|
|||
|
||||
@tb.must_fail(errors.SchemaDefinitionError,
|
||||
"missing value for required property",
|
||||
line=9, col=42)
|
||||
line=10, col=25)
|
||||
def test_schema_rewrite_missing_required_01(self):
|
||||
"""
|
||||
type Project {
|
||||
|
|
|
@ -36,7 +36,7 @@ class SchemaSyntaxTest(tb.BaseSyntaxTest):
|
|||
|
||||
@classmethod
|
||||
def get_parser(cls):
|
||||
return ql_parser.EdgeSDLParser()
|
||||
return ql_parser.EdgeSDLSpec().get_parser()
|
||||
|
||||
|
||||
class TestEdgeSchemaParser(SchemaSyntaxTest):
|
||||
|
@ -265,8 +265,8 @@ class TestEdgeSchemaParser(SchemaSyntaxTest):
|
|||
};
|
||||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, "Unexpected keyword 'Commit'",
|
||||
line=3, col=18)
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, "Missing identifier",
|
||||
line=3, col=17)
|
||||
def test_eschema_syntax_type_11(self):
|
||||
"""
|
||||
module test {
|
||||
|
@ -748,7 +748,7 @@ class TestEdgeSchemaParser(SchemaSyntaxTest):
|
|||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError,
|
||||
r"Expected 'ON', but got 'prop' instead", line=4, col=23)
|
||||
r"Missing ':='", line=4, col=22)
|
||||
def test_eschema_syntax_index_03(self):
|
||||
"""
|
||||
module test {
|
||||
|
@ -757,6 +757,8 @@ class TestEdgeSchemaParser(SchemaSyntaxTest):
|
|||
};
|
||||
};
|
||||
"""
|
||||
# XXX: error recovery quality regression
|
||||
# Expected 'ON', but got 'prop' instead
|
||||
|
||||
def test_eschema_syntax_index_04(self):
|
||||
"""
|
||||
|
@ -876,8 +878,8 @@ type LogEntry extending OwnedObject, Text {
|
|||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError,
|
||||
r"Unexpected 'scalar'",
|
||||
line=4, col=9)
|
||||
r"Missing ';'",
|
||||
line=2, col=55)
|
||||
def test_eschema_syntax_ws_03(self):
|
||||
"""
|
||||
scalar type test::newScalarType0 extending str#:
|
||||
|
@ -966,7 +968,7 @@ type LogEntry extending OwnedObject, Text {
|
|||
};
|
||||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'final'",
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'FINAL'",
|
||||
line=3, col=13)
|
||||
def test_eschema_syntax_scalar_07(self):
|
||||
"""
|
||||
|
@ -1043,7 +1045,7 @@ type LogEntry extending OwnedObject, Text {
|
|||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError,
|
||||
r"Unexpected 'delegated'",
|
||||
r"Unexpected keyword 'DELEGATED'",
|
||||
line=3, col=13)
|
||||
def test_eschema_syntax_constraint_02(self):
|
||||
"""
|
||||
|
@ -1112,7 +1114,7 @@ type LogEntry extending OwnedObject, Text {
|
|||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError,
|
||||
r"Unexpected 'constraint'",
|
||||
r"Unexpected keyword 'CONSTRAINT'",
|
||||
line=4, col=26)
|
||||
def test_eschema_syntax_constraint_07(self):
|
||||
"""
|
||||
|
@ -1135,7 +1137,7 @@ type LogEntry extending OwnedObject, Text {
|
|||
};
|
||||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'constraint'",
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'CONSTRAINT'",
|
||||
line=3, col=13)
|
||||
def test_eschema_syntax_constraint_09(self):
|
||||
"""
|
||||
|
@ -1198,7 +1200,7 @@ abstract property test::foo {
|
|||
};
|
||||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'property'",
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'PROPERTY'",
|
||||
line=3, col=13)
|
||||
def test_eschema_syntax_property_05(self):
|
||||
"""
|
||||
|
@ -1410,7 +1412,7 @@ abstract property test::foo {
|
|||
};
|
||||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'link'",
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'LINK'",
|
||||
line=3, col=13)
|
||||
def test_eschema_syntax_link_11(self):
|
||||
"""
|
||||
|
@ -1626,7 +1628,7 @@ abstract property test::foo {
|
|||
def test_eschema_syntax_function_12(self):
|
||||
"""
|
||||
module test {
|
||||
function some_func($`(`: str = ) ) -> std::str {
|
||||
function some_func($`(`: str = () ) -> std::str {
|
||||
using edgeql function 'some_other_func';
|
||||
}
|
||||
};
|
||||
|
@ -1770,10 +1772,8 @@ abstract property test::foo {
|
|||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError,
|
||||
r'Unexpected token:.+b',
|
||||
hint=r"It appears that a ',' is missing in a list of "
|
||||
r"arguments before 'b'",
|
||||
line=3, col=34)
|
||||
r"Missing ','",
|
||||
line=3, col=33)
|
||||
def test_eschema_syntax_function_21(self):
|
||||
"""
|
||||
module test {
|
||||
|
@ -1834,10 +1834,8 @@ abstract property test::foo {
|
|||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError,
|
||||
r'Unexpected token:.+baz',
|
||||
hint=r"It appears that a ',' is missing in a shape "
|
||||
r"before 'baz'",
|
||||
line=5, col=17)
|
||||
r"Missing ','",
|
||||
line=4, col=25)
|
||||
def test_eschema_syntax_alias_04(self):
|
||||
"""
|
||||
module test {
|
||||
|
@ -1850,10 +1848,8 @@ abstract property test::foo {
|
|||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError,
|
||||
r'Unexpected token:.+2',
|
||||
hint=r"It appears that a ',' is missing in a tuple "
|
||||
r"before '2'",
|
||||
line=3, col=32)
|
||||
r"Missing ','",
|
||||
line=3, col=31)
|
||||
def test_eschema_syntax_alias_05(self):
|
||||
"""
|
||||
module test {
|
||||
|
@ -1862,10 +1858,8 @@ abstract property test::foo {
|
|||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError,
|
||||
r'Unexpected token:.+2',
|
||||
hint=r"It appears that a ',' is missing in an array "
|
||||
r"before '2'",
|
||||
line=3, col=32)
|
||||
r"Missing ','",
|
||||
line=3, col=31)
|
||||
def test_eschema_syntax_alias_06(self):
|
||||
"""
|
||||
module test {
|
||||
|
@ -1948,7 +1942,7 @@ abstract property test::foo {
|
|||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError,
|
||||
r"Unexpected keyword 'extending'", line=3, col=46)
|
||||
r"Unexpected keyword 'EXTENDING'", line=3, col=46)
|
||||
def test_eschema_syntax_annotation_14(self):
|
||||
"""
|
||||
module test {
|
||||
|
@ -1956,7 +1950,7 @@ abstract property test::foo {
|
|||
};
|
||||
"""
|
||||
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'annotation'",
|
||||
@tb.must_fail(errors.EdgeQLSyntaxError, r"Missing keyword 'ABSTRACT'",
|
||||
line=2, col=1)
|
||||
def test_eschema_syntax_annotation_15(self):
|
||||
"""
|
||||
|
|
|
@ -109,11 +109,11 @@ class TestServerProto(tb.QueryTestCase):
|
|||
await self.con.query('select syntax error')
|
||||
|
||||
with self.assertRaisesRegex(edgedb.EdgeQLSyntaxError,
|
||||
'Unexpected end of line'):
|
||||
r"Missing '\)'"):
|
||||
await self.con.query('select (')
|
||||
|
||||
with self.assertRaisesRegex(edgedb.EdgeQLSyntaxError,
|
||||
'Unexpected end of line'):
|
||||
r"Missing '\)'"):
|
||||
await self.con.query_json('select (')
|
||||
|
||||
for _ in range(10):
|
||||
|
|
Loading…
Reference in a new issue