Parser error recovery (#5693)

Co-authored-by: Michael J. Sullivan <sully@msully.net>
This commit is contained in:
Aljaž Mur Eržen 2023-07-12 05:54:41 +02:00 committed by GitHub
parent 84d7875481
commit 6f6b4cd117
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
43 changed files with 2604 additions and 2087 deletions

142
Cargo.lock generated
View file

@ -11,6 +11,12 @@ dependencies = [
"memchr",
]
[[package]]
name = "append-only-vec"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5608767d94038891df4c7bb82f6b1beb55fe3d204735985e20de329bc35d5fee"
[[package]]
name = "ascii"
version = "0.9.3"
@ -38,6 +44,7 @@ dependencies = [
"num-bigint 0.4.3",
"num-integer",
"num-traits",
"serde",
]
[[package]]
@ -66,9 +73,9 @@ dependencies = [
[[package]]
name = "bumpalo"
version = "3.12.2"
version = "3.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c6ed94e98ecff0c12dd1b04c15ec0d7d9458ca8fe806cea6f12954efe74c63b"
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
[[package]]
name = "byteorder"
@ -203,13 +210,18 @@ dependencies = [
name = "edgeql-parser"
version = "0.1.0"
dependencies = [
"append-only-vec",
"base32",
"bigdecimal",
"bumpalo",
"cpython",
"edgeql-parser-derive",
"indexmap",
"memchr",
"num-bigint 0.3.3",
"phf",
"serde",
"serde_json",
"sha2",
"snafu",
"thiserror",
@ -236,7 +248,11 @@ dependencies = [
"cpython",
"edgedb-protocol",
"edgeql-parser",
"indexmap",
"num-bigint 0.4.3",
"rmp-serde",
"serde",
"serde_json",
]
[[package]]
@ -298,9 +314,9 @@ checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
[[package]]
name = "libc"
version = "0.2.144"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "log"
@ -328,6 +344,18 @@ dependencies = [
"num-traits",
]
[[package]]
name = "num-bigint"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f6f7833f2cbf2360a6cfd58cd41a53aa7a90bd4c202f5b1c7dd2ed73c57b2c3"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
"serde",
]
[[package]]
name = "num-bigint"
version = "0.4.3"
@ -379,6 +407,48 @@ version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79"
[[package]]
name = "phf"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c"
dependencies = [
"phf_macros",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_macros"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92aacdc5f16768709a569e913f7451034034178b05bdc8acda226659a3dccc66"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "phf_shared"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676"
dependencies = [
"siphasher",
]
[[package]]
name = "pretty_assertions"
version = "1.3.0"
@ -419,6 +489,21 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]]
name = "regex"
version = "1.8.1"
@ -436,6 +521,28 @@ version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c"
[[package]]
name = "rmp"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44519172358fd6d58656c86ab8e7fbc9e1490c3e8f14d35ed78ca0dd07403c9f"
dependencies = [
"byteorder",
"num-traits",
"paste",
]
[[package]]
name = "rmp-serde"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5b13be192e0220b8afb7222aa5813cb62cc269ebb5cac346ca6487681d2913e"
dependencies = [
"byteorder",
"rmp",
"serde",
]
[[package]]
name = "ryu"
version = "1.0.13"
@ -468,6 +575,7 @@ version = "1.0.96"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
@ -484,6 +592,12 @@ dependencies = [
"digest",
]
[[package]]
name = "siphasher"
version = "0.3.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
[[package]]
name = "snafu"
version = "0.7.4"
@ -601,9 +715,9 @@ checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
[[package]]
name = "wasm-bindgen"
version = "0.2.86"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73"
checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
dependencies = [
"cfg-if",
"serde",
@ -613,9 +727,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.86"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb"
checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
dependencies = [
"bumpalo",
"log",
@ -628,9 +742,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.86"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258"
checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@ -638,9 +752,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.86"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [
"proc-macro2",
"quote",
@ -651,9 +765,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.86"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
[[package]]
name = "winapi"

View file

@ -25,23 +25,14 @@ import logging
import os
import sys
import types
import re
import parsing
from edb.common.exceptions import add_context, get_context
from edb.common import context as pctx
from edb.edgeql import tokenizer
from edb.errors import EdgeQLSyntaxError
from edb import _edgeql_parser as ql_parser
if TYPE_CHECKING:
from edb.edgeql.parser.grammar import rust_lexer
from edb.common import context as pctx, debug
ParserContext = pctx.ParserContext
logger = logging.getLogger('edb.common.parsing')
TRAILING_WS_IN_CONTINUATION = re.compile(r'\\ \s+\n')
class ParserSpecIncompatibleError(Exception):
@ -131,17 +122,6 @@ def inline(argument_index: int):
return decorator
def make_inlining_func(arg_index: int):
"""Makes a parser production handler which simply inlines an argument."""
# TODO: remove this when Rust parser is merged
def wrapper(obj, *args, **kwargs):
obj.val = args[arg_index].val
return obj
return wrapper
class NontermMeta(type):
def __new__(mcls, name, bases, dct):
result = super().__new__(mcls, name, bases, dct)
@ -171,13 +151,7 @@ class NontermMeta(type):
attr = lambda self, *args, meth=attr: meth(self, *args)
attr.__doc__ = doc
if inline_index is not None:
# TODO: remove this when Rust parser is merged
a = make_inlining_func(inline_index)
else:
a = attr
a = pctx.has_context(a)
a = pctx.has_context(attr)
a.__doc__ = attr.__doc__
a.inline_index = inline_index
@ -308,70 +282,14 @@ class Precedence(parsing.Precedence, assoc='fail', metaclass=PrecedenceMeta):
pass
class ParserError(Exception):
def __init__(
self, msg=None, *, hint=None, details=None, token=None, line=None,
col=None, expr=None, context=None):
if msg is None:
msg = 'syntax error at or near "%s"' % token
super().__init__(msg, hint=hint, details=details)
self.token = token
if line is not None:
self.line = line
if col is not None:
self.col = col
self.expr = expr
if context:
add_context(self, context)
if line is None and col is None:
self.line = context.start.line
self.col = context.start.column
@property
def context(self):
try:
return get_context(self, pctx.ParserContext)
except LookupError:
return None
def _derive_hint(
input: str,
message: str,
position: Tuple[int, int, int],
) -> Optional[str]:
_, _, off = position
if message == r"invalid string literal: invalid escape sequence '\ '":
if TRAILING_WS_IN_CONTINUATION.search(input[off:]):
return "consider removing trailing whitespace"
return None
class Parser:
class ParserSpec:
parser_spec: ClassVar[parsing.Spec | None]
lexer: Optional[rust_lexer.EdgeQLLexer]
def __init__(self, **parser_data):
self.lexer = None
self.parser = None
self.parser_data = parser_data
def cleanup(self):
self.__class__.parser_spec = None
self.__class__.lexer_spec = None
self.lexer = None
self.parser = None
def get_debug(self):
return False
def get_exception(self, native_err, context, token=None):
if not isinstance(native_err, ParserError):
return ParserError(native_err.args[0],
context=context, token=token)
else:
return native_err
return debug.flags.edgeql_parser
def get_parser_spec_module(self) -> types.ModuleType:
raise NotImplementedError
@ -421,96 +339,3 @@ class Parser:
return os.path.join(
os.path.dirname(mod.__file__),
mod.__name__.rpartition('.')[2] + '.' + type)
def get_lexer(self):
"""Return an initialized lexer.
The lexer must implement 'setinputstr' and 'token' methods.
A lexer derived from edb.common.lexer.Lexer will satisfy these
criteria.
"""
raise NotImplementedError
def reset_parser(
self,
input: Union[str, tokenizer.Source],
filename: Optional[str]=None
):
if not self.parser:
self.lexer = self.get_lexer()
self.parser = parsing.Lr(self.get_parser_spec())
self.parser.parser_data = self.parser_data
self.parser.verbose = self.get_debug()
self.parser.reset()
assert self.lexer
self.lexer.setinputstr(input, filename=filename)
def convert_lex_token(self, mod: Any, tok: ql_parser.Token) -> Token:
token_cls = mod.TokenMeta.for_lex_token(tok.kind())
return token_cls(tok.text(), tok.value(), self.context(tok))
def parse(
self,
input: Union[str, tokenizer.Source],
filename: Optional[str] = None
):
try:
self.reset_parser(input, filename=filename)
assert self.lexer
mod = self.get_parser_spec_module()
while tok := self.lexer.token():
token = self.convert_lex_token(mod, tok)
if token is None:
continue
self.parser.token(token)
self.parser.eoi()
except ql_parser.TokenizerError as e:
message, position = e.args
assert self.lexer
hint = _derive_hint(self.lexer.inputstr, message, position)
raise EdgeQLSyntaxError(
message, context=self.context(pos=position), hint=hint
) from e
except parsing.UnexpectedToken as e:
raise self.get_exception(
e, context=self.context(tok), token=tok
) from e
except ParserError as e:
raise self.get_exception(e, context=e.context) from e
return self.parser.start[0].val
def context(self, tok=None, pos: Optional[Tuple[int, int, int]] = None):
lex = self.lexer
assert lex
name = lex.filename if lex.filename else '<string>'
if tok is None:
if pos is None:
pos = lex.end_of_input
context = pctx.ParserContext(
name=name, buffer=lex.inputstr,
start=pos[2], end=pos[2])
else:
context = pctx.ParserContext(
name=name, buffer=lex.inputstr,
start=tok.start()[2],
end=tok.end()[2])
return context
def line_col_from_char_offset(source, position):
line = source[:position].count('\n') + 1
col = source.rfind('\n', 0, position)
col = position if col == -1 else position - col
return line, col

View file

@ -8,7 +8,8 @@ edition = "2021"
[dependencies]
base32 = "0.4.0"
bigdecimal = "0.3.0"
bigdecimal = { version = "0.3.0", features = ["serde"] }
num-bigint = { version = "0.3.0", features = ["serde"] }
sha2 = "0.10.2"
snafu = "0.7.0"
memchr = "2.5.0"
@ -21,10 +22,14 @@ unicode-width = "0.1.8"
edgeql-parser-derive = { path = "edgeql-parser-derive", optional = true }
cpython = { version = "0.7.0", optional = true }
indexmap = "1.9.3"
serde_json = {version="1.0", features=["preserve_order"]}
bumpalo = {version="3.13.0", features=["collections"]}
phf = { version = "0.11.1", features = ["macros"] }
append-only-vec = "0.1.2"
[features]
default = []
wasm-lexer = ["wasm-bindgen", "serde"]
python = ["cpython", "edgeql-parser-derive"]
python = ["cpython", "serde", "edgeql-parser-derive"]
[lib]

View file

@ -7,11 +7,15 @@ rust-version = "1.59"
edition = "2021"
[dependencies]
edgeql-parser = {path = ".."}
edgeql-parser = {path = "..", features=["serde"]}
bytes = "1.0.1"
num-bigint = "0.4.3"
bigdecimal = "0.3.0"
blake2 = "0.10.4"
serde = {version="1.0", features=["derive"]}
serde_json = "1.0"
rmp-serde = "1.1.1"
indexmap = "1.9.3"
[dependencies.edgedb-protocol]
git = "https://github.com/edgedb/edgedb-rust"

View file

@ -1,65 +1,92 @@
use cpython::{PyObject, ToPyObject, Python, PyErr, PythonObject, PyType};
use cpython::exc::Exception;
use crate::cpython::PythonObjectWithTypeObject;
use cpython::exc::Exception;
use cpython::{
PyClone, PyErr, PyList, PyObject, PyResult, PyType, Python, PythonObject, ToPyObject,
};
use edgeql_parser::tokenizer::Error;
// can't use py_exception macro because that fails on dotted module name
pub struct TokenizerError(PyObject);
pub struct SyntaxError(PyObject);
pyobject_newtype!(TokenizerError);
pyobject_newtype!(SyntaxError);
impl TokenizerError {
impl SyntaxError {
pub fn new<T: ToPyObject>(py: Python, args: T) -> PyErr {
PyErr::new::<TokenizerError, T>(py, args)
PyErr::new::<SyntaxError, T>(py, args)
}
}
impl cpython::PythonObjectWithCheckedDowncast for TokenizerError {
impl cpython::PythonObjectWithCheckedDowncast for SyntaxError {
#[inline]
fn downcast_from(py: Python, obj: PyObject)
-> Result<TokenizerError, cpython::PythonObjectDowncastError>
{
if TokenizerError::type_object(py).is_instance(py, &obj) {
fn downcast_from(
py: Python,
obj: PyObject,
) -> Result<SyntaxError, cpython::PythonObjectDowncastError> {
if SyntaxError::type_object(py).is_instance(py, &obj) {
Ok(unsafe { PythonObject::unchecked_downcast_from(obj) })
} else {
Err(cpython::PythonObjectDowncastError::new(py,
"TokenizerError",
TokenizerError::type_object(py),
Err(cpython::PythonObjectDowncastError::new(
py,
"SyntaxError",
SyntaxError::type_object(py),
))
}
}
#[inline]
fn downcast_borrow_from<'a, 'p>(py: Python<'p>, obj: &'a PyObject)
-> Result<&'a TokenizerError, cpython::PythonObjectDowncastError<'p>>
{
if TokenizerError::type_object(py).is_instance(py, obj) {
fn downcast_borrow_from<'a, 'p>(
py: Python<'p>,
obj: &'a PyObject,
) -> Result<&'a SyntaxError, cpython::PythonObjectDowncastError<'p>> {
if SyntaxError::type_object(py).is_instance(py, obj) {
Ok(unsafe { PythonObject::unchecked_downcast_borrow_from(obj) })
} else {
Err(cpython::PythonObjectDowncastError::new(py,
"TokenizerError",
TokenizerError::type_object(py),
Err(cpython::PythonObjectDowncastError::new(
py,
"SyntaxError",
SyntaxError::type_object(py),
))
}
}
}
impl cpython::PythonObjectWithTypeObject for TokenizerError {
impl cpython::PythonObjectWithTypeObject for SyntaxError {
#[inline]
fn type_object(py: Python) -> PyType {
unsafe {
static mut TYPE_OBJECT: *mut cpython::_detail::ffi::PyTypeObject
= 0 as *mut cpython::_detail::ffi::PyTypeObject;
static mut TYPE_OBJECT: *mut cpython::_detail::ffi::PyTypeObject =
0 as *mut cpython::_detail::ffi::PyTypeObject;
if TYPE_OBJECT.is_null() {
TYPE_OBJECT = PyErr::new_type(
py,
"edb._edgeql_parser.TokenizerError",
"edb._edgeql_parser.SyntaxError",
Some(PythonObject::into_object(py.get_type::<Exception>())),
None).as_type_ptr();
None,
)
.as_type_ptr();
}
PyType::from_type_ptr(py, TYPE_OBJECT)
}
}
}
py_class!(pub class ParserResult |py| {
data _out: PyObject;
data _errors: PyList;
def out(&self) -> PyResult<PyObject> {
Ok(self._out(py).clone_ref(py))
}
def errors(&self) -> PyResult<PyList> {
Ok(self._errors(py).clone_ref(py))
}
});
pub fn parser_error_into_tuple(py: Python, error: Error) -> PyObject {
(error.message, (error.span.start, error.span.end))
.into_py_object(py)
.into_object()
}

View file

@ -5,8 +5,7 @@ use cpython::exc::RuntimeError;
use edgeql_parser::hash;
use crate::errors::TokenizerError;
use crate::pynormalize::py_pos;
use crate::errors::SyntaxError;
py_class!(pub class Hasher |py| {
@ -26,7 +25,7 @@ py_class!(pub class Hasher |py| {
hasher.add_source(&text)
.map_err(|e| match e {
hash::Error::Tokenizer(msg, pos) => {
TokenizerError::new(py, (msg, py_pos(py, &pos)))
SyntaxError::new(py, (msg, (pos.offset, py.None())))
}
})?;
Ok(py.None())

View file

@ -1,20 +1,22 @@
#[macro_use]
extern crate cpython;
use cpython::PyString;
use cpython::{PyObject, PyString};
mod errors;
mod hash;
mod keywords;
pub mod normalize;
mod parser;
mod position;
mod pynormalize;
mod tokenizer;
use errors::TokenizerError;
use errors::{SyntaxError, ParserResult};
use parser::{parse, CSTNode, Production};
use position::{offset_of_line, SourcePoint};
use pynormalize::normalize;
use tokenizer::{get_unpickle_fn, tokenize, Token};
use tokenizer::{get_fn_unpickle_token, tokenize, OpaqueToken};
py_module_initializer!(
_edgeql_parser,
@ -22,6 +24,7 @@ py_module_initializer!(
PyInit__edgeql_parser,
|py, m| {
tokenizer::init_module(py);
parser::init_module();
let keywords = keywords::get_keywords(py)?;
m.add(
py,
@ -30,9 +33,10 @@ py_module_initializer!(
)?;
m.add(py, "tokenize", py_fn!(py, tokenize(data: &PyString)))?;
m.add(py, "_unpickle_token", get_unpickle_fn(py))?;
m.add(py, "Token", py.get_type::<Token>())?;
m.add(py, "TokenizerError", py.get_type::<TokenizerError>())?;
m.add(py, "_unpickle_token", get_fn_unpickle_token(py))?;
m.add(py, "Token", py.get_type::<OpaqueToken>())?;
m.add(py, "SyntaxError", py.get_type::<SyntaxError>())?;
m.add(py, "ParserResult", py.get_type::<ParserResult>())?;
m.add(py, "Entry", py.get_type::<pynormalize::Entry>())?;
m.add(py, "SourcePoint", py.get_type::<SourcePoint>())?;
m.add(py, "normalize", py_fn!(py, normalize(query: &PyString)))?;
@ -46,6 +50,13 @@ py_module_initializer!(
m.add(py, "partial_reserved_keywords", keywords.partial)?;
m.add(py, "future_reserved_keywords", keywords.future)?;
m.add(py, "current_reserved_keywords", keywords.current)?;
m.add(
py,
"parse",
py_fn!(py, parse(parser_name: &PyString, data: PyObject)),
)?;
m.add(py, "CSTNode", py.get_type::<CSTNode>())?;
m.add(py, "Production", py.get_type::<Production>())?;
Ok(())
}
);

View file

@ -1,7 +1,9 @@
use std::borrow::Cow;
use std::collections::BTreeSet;
use edgeql_parser::tokenizer::{Kind, Tokenizer, Token, Value};
use edgeql_parser::keywords::Keyword;
use edgeql_parser::position::{Pos, Span};
use edgeql_parser::tokenizer::{Kind, Token, Tokenizer, Value};
use blake2::{Blake2b512, Digest};
@ -10,76 +12,29 @@ pub struct Variable {
pub value: Value,
}
pub struct Entry<'a> {
pub struct Entry {
pub processed_source: String,
pub hash: [u8; 64],
pub tokens: Vec<Token<'a>>,
pub tokens: Vec<Token<'static>>,
pub variables: Vec<Vec<Variable>>,
pub end_pos: Pos,
pub named_args: bool,
pub first_arg: Option<usize>,
}
#[derive(Debug)]
pub enum Error {
Tokenizer(String, Pos),
Tokenizer(String, u64),
Assertion(String, Pos),
}
fn push_var<'x>(res: &mut Vec<Token<'x>>, module: &'x str, typ: &'x str,
var: String, span: Span)
{
res.push(Token {kind: Kind::OpenParen, text: "(".into(), span, value: None});
res.push(Token {kind: Kind::Less, text: "<".into(), span, value: None});
res.push(Token {kind: Kind::Ident, text: module.into(), span, value: None});
res.push(Token {kind: Kind::Namespace, text: "::".into(), span, value: None});
res.push(Token {kind: Kind::Ident, text: typ.into(), span,
value: Some(Value::String(typ.to_string())),
});
res.push(Token {kind: Kind::Greater, text: ">".into(), span, value: None});
res.push(Token {kind: Kind::Argument, text: var.into(), span, value: None});
res.push(Token {kind: Kind::CloseParen, text: ")".into(), span, value: None});
}
fn scan_vars<'x, 'y: 'x, I>(tokens: I) -> Option<(bool, usize)>
where I: IntoIterator<Item=&'x Token<'y>>,
{
let mut max_visited = None::<usize>;
let mut names = BTreeSet::new();
for t in tokens {
if t.kind == Kind::Argument {
if let Ok(v) = t.text[1..].parse() {
if max_visited.map(|old| v > old).unwrap_or(true) {
max_visited = Some(v);
}
} else {
names.insert(&t.text[..]);
}
}
}
if names.is_empty() {
let next = max_visited.map(|x| x.checked_add(1)).unwrap_or(Some(0))?;
Some((false, next))
} else if max_visited.is_some() {
return None // mixed arguments
} else {
Some((true, names.len()))
}
}
fn hash(text: &str) -> [u8; 64] {
let mut result = [0u8; 64];
result.copy_from_slice(&Blake2b512::new_with_prefix(text.as_bytes())
.finalize());
return result;
}
pub fn normalize(text: &str) -> Result<Entry, Error> {
let mut token_stream = Tokenizer::new(text).validated_values();
let tokens = (&mut token_stream)
let tokens = Tokenizer::new(text)
.validated_values()
.with_eof()
.map(|x| x.map(|t| t.cloned()))
.collect::<Result<Vec<_>, _>>()
.map_err(|e| Error::Tokenizer(e.message, e.span.start))?;
let end_pos = token_stream.current_pos();
let (named_args, var_idx) = match scan_vars(&tokens) {
Some(pair) => pair,
None => {
@ -90,7 +45,6 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
processed_source,
tokens,
variables: Vec::new(),
end_pos,
named_args: false,
first_arg: None,
});
@ -110,7 +64,7 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
}
};
let mut last_was_set = false;
for (idx, tok) in tokens.iter().enumerate() {
for tok in &tokens {
let mut is_set = false;
match tok.kind {
Kind::IntConst
@ -120,69 +74,64 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
// Don't replace 'LIMIT 1' as a special case
&& (tok.text != "1"
|| !matches!(rewritten_tokens.last(),
Some(Token { kind: Kind::Keyword, ref text, .. })
if text.eq_ignore_ascii_case("LIMIT")))
Some(Token { kind: Kind::Keyword(Keyword("limit")), .. })))
&& tok.text != "9223372036854775808"
=> {
push_var(&mut rewritten_tokens, "__std__", "int64",
rewritten_tokens.extend(arg_type_cast( "__std__", "int64",
next_var(),
tok.span);
tok.span));
variables.push(Variable {
value: tok.value.clone().unwrap(),
});
continue;
}
Kind::FloatConst => {
push_var(&mut rewritten_tokens, "__std__", "float64",
rewritten_tokens.extend(arg_type_cast( "__std__", "float64",
next_var(),
tok.span);
tok.span));
variables.push(Variable {
value: tok.value.clone().unwrap(),
});
continue;
}
Kind::BigIntConst => {
push_var(&mut rewritten_tokens, "__std__", "bigint",
rewritten_tokens.extend(arg_type_cast( "__std__", "bigint",
next_var(),
tok.span);
tok.span));
variables.push(Variable {
value: tok.value.clone().unwrap(),
});
continue;
}
Kind::DecimalConst => {
push_var(&mut rewritten_tokens, "__std__", "decimal",
rewritten_tokens.extend(arg_type_cast( "__std__", "decimal",
next_var(),
tok.span);
tok.span));
variables.push(Variable {
value: tok.value.clone().unwrap(),
});
continue;
}
Kind::Str => {
push_var(&mut rewritten_tokens, "__std__", "str",
rewritten_tokens.extend(arg_type_cast( "__std__", "str",
next_var(),
tok.span);
tok.span));
variables.push(Variable {
value: tok.value.clone().unwrap(),
});
continue;
}
Kind::Keyword
if (matches!(&(&tok.text[..].to_uppercase())[..],
"CONFIGURE"|"CREATE"|"ALTER"|"DROP"|"START"|"ANALYZE")
|| (last_was_set &&
matches!(&(&tok.text[..].to_uppercase())[..],
"GLOBAL"))
)
=> {
Kind::Keyword(Keyword(kw))
if (
matches!(kw, "configure"|"create"|"alter"|"drop"|"start"|"analyze")
|| (last_was_set && kw == "global")
) => {
let processed_source = serialize_tokens(&tokens);
return Ok(Entry {
hash: hash(&processed_source),
processed_source,
tokens,
variables: Vec::new(),
end_pos,
named_args: false,
first_arg: None,
});
@ -192,14 +141,11 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
// because the only statements with internal semis are DDL
// statements, which we don't support anyway.
Kind::Semicolon => {
if idx + 1 < tokens.len() {
all_variables.push(variables);
variables = Vec::new();
}
all_variables.push(variables);
variables = Vec::new();
rewritten_tokens.push(tok.clone());
}
Kind::Keyword
if (matches!(&(&tok.text[..].to_uppercase())[..], "SET")) => {
Kind::Keyword(Keyword("set")) => {
is_set = true;
rewritten_tokens.push(tok.clone());
}
@ -214,75 +160,39 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
hash: hash(&processed_source),
processed_source,
named_args,
first_arg: if counter <= var_idx { None } else { Some(var_idx) },
first_arg: if counter <= var_idx {
None
} else {
Some(var_idx)
},
tokens: rewritten_tokens,
variables: all_variables,
end_pos,
});
}
fn is_operator(token: &Token) -> bool {
use edgeql_parser::tokenizer::Kind::*;
match token.kind {
| Assign
| SubAssign
| AddAssign
| Arrow
| Coalesce
| Namespace
| DoubleSplat
| BackwardLink
| FloorDiv
| Concat
| GreaterEq
| LessEq
| NotEq
| NotDistinctFrom
| DistinctFrom
| Comma
| OpenParen
| CloseParen
| OpenBracket
| CloseBracket
| OpenBrace
| CloseBrace
| Dot
| Semicolon
| Colon
| Add
| Sub
| Mul
| Div
| Modulo
| Pow
| Less
| Greater
| Eq
| Ampersand
| Pipe
| At
=> true,
| DecimalConst
| FloatConst
| IntConst
| BigIntConst
| BinStr
| Argument
| Str
| BacktickName
| Keyword
| Ident
| Substitution
=> false,
Assign | SubAssign | AddAssign | Arrow | Coalesce | Namespace | DoubleSplat
| BackwardLink | FloorDiv | Concat | GreaterEq | LessEq | NotEq | NotDistinctFrom
| DistinctFrom | Comma | OpenParen | CloseParen | OpenBracket | CloseBracket
| OpenBrace | CloseBrace | Dot | Semicolon | Colon | Add | Sub | Mul | Div | Modulo
| Pow | Less | Greater | Eq | Ampersand | Pipe | At => true,
DecimalConst | FloatConst | IntConst | BigIntConst | BinStr | Argument | Str
| BacktickName | Keyword(_) | Ident | Substitution | EOF | EOI | Epsilon => false,
}
}
fn serialize_tokens(tokens: &[Token<'_>]) -> String {
fn serialize_tokens(tokens: &[Token]) -> String {
use edgeql_parser::tokenizer::Kind::Argument;
let mut buf = String::new();
let mut needs_space = false;
for token in tokens {
if matches!(token.kind, Kind::EOF | Kind::EOI) {
break;
}
if needs_space && !is_operator(token) && token.kind != Argument {
buf.push(' ');
}
@ -292,12 +202,78 @@ fn serialize_tokens(tokens: &[Token<'_>]) -> String {
return buf;
}
fn scan_vars<'x, 'y: 'x, I>(tokens: I) -> Option<(bool, usize)>
where
I: IntoIterator<Item = &'x Token<'x>>,
{
let mut max_visited = None::<usize>;
let mut names = BTreeSet::new();
for t in tokens {
if t.kind == Kind::Argument {
if let Ok(v) = t.text[1..].parse() {
if max_visited.map(|old| v > old).unwrap_or(true) {
max_visited = Some(v);
}
} else {
names.insert(&t.text[..]);
}
}
}
if names.is_empty() {
let next = max_visited.map(|x| x.checked_add(1)).unwrap_or(Some(0))?;
Some((false, next))
} else if max_visited.is_some() {
return None; // mixed arguments
} else {
Some((true, names.len()))
}
}
fn hash(text: &str) -> [u8; 64] {
let mut result = [0u8; 64];
result.copy_from_slice(&Blake2b512::new_with_prefix(text.as_bytes()).finalize());
return result;
}
/// Produces tokens corresponding to (<module::typ>$var)
fn arg_type_cast(
module: &'static str,
typ: &'static str,
var: String,
span: Span,
) -> [Token<'static>; 8] {
fn tk(kind: Kind, text: Cow<'_, str>, span: Span) -> Token {
let value = if kind == Kind::Ident {
Some(Value::String(text.to_string()))
} else {
None
};
Token {
kind,
text,
value,
span,
}
}
[
tk(Kind::OpenParen, "(".into(), span),
tk(Kind::Less, "<".into(), span),
tk(Kind::Ident, module.into(), span),
tk(Kind::Namespace, "::".into(), span),
tk(Kind::Ident, typ.into(), span),
tk(Kind::Greater, ">".into(), span),
tk(Kind::Argument, var.into(), span),
tk(Kind::CloseParen, ")".into(), span),
]
}
#[cfg(test)]
mod test {
use super::scan_vars;
use edgeql_parser::tokenizer::{Token, Tokenizer};
fn tokenize<'x>(s: &'x str) -> Vec<Token<'x>> {
fn tokenize<'x>(s: &'x str) -> Vec<Token> {
let mut r = Vec::new();
let mut s = Tokenizer::new(s);
loop {
@ -328,8 +304,10 @@ mod test {
assert_eq!(scan_vars(&tokenize("$a")).unwrap(), (true, 1));
assert_eq!(scan_vars(&tokenize("$b $c $d")).unwrap(), (true, 3));
assert_eq!(scan_vars(&tokenize("$b $c $b")).unwrap(), (true, 2));
assert_eq!(scan_vars(&tokenize("$a $b $b $a $c $xx")).unwrap(),
(true, 4));
assert_eq!(
scan_vars(&tokenize("$a $b $b $a $c $xx")).unwrap(),
(true, 4)
);
}
#[test]
@ -339,5 +317,4 @@ mod test {
assert_eq!(scan_vars(&tokenize("$b $c $100")), None);
assert_eq!(scan_vars(&tokenize("$10 $xx $yy")), None);
}
}

View file

@ -0,0 +1,163 @@
use std::collections::HashMap;
use cpython::{
ObjectProtocol, PyClone, PyInt, PyList, PyObject, PyResult, PyString, PyTuple, Python,
PythonObject, PythonObjectWithCheckedDowncast, ToPyObject,
};
use edgeql_parser::parser;
use crate::errors::{parser_error_into_tuple, ParserResult};
use crate::pynormalize::value_to_py_object;
use crate::tokenizer::OpaqueToken;
pub fn parse(py: Python, parser_name: &PyString, tokens: PyObject) -> PyResult<PyTuple> {
let (spec, productions) = load_spec(py, parser_name.to_string(py)?.as_ref())?;
let tokens = downcast_tokens(py, tokens)?;
let context = parser::Context::new(spec);
let (cst, errors) = parser::parse(&tokens, &context);
let cst = cst.map(|c| to_py_cst(c, py)).transpose()?;
let errors = errors
.into_iter()
.map(|e| parser_error_into_tuple(py, e))
.collect::<Vec<_>>();
let errors = PyList::new(py, &errors);
let res = ParserResult::create_instance(py, cst.into_py_object(py), errors)?;
Ok((res, productions).into_py_object(py))
}
py_class!(pub class CSTNode |py| {
data _production: PyObject;
data _terminal: PyObject;
def production(&self) -> PyResult<PyObject> {
Ok(self._production(py).clone_ref(py))
}
def terminal(&self) -> PyResult<PyObject> {
Ok(self._terminal(py).clone_ref(py))
}
});
py_class!(pub class Production |py| {
data _id: PyInt;
data _args: PyList;
def id(&self) -> PyResult<PyInt> {
Ok(self._id(py).clone_ref(py))
}
def args(&self) -> PyResult<PyList> {
Ok(self._args(py).clone_ref(py))
}
});
py_class!(pub class Terminal |py| {
data _text: PyString;
data _value: PyObject;
data _start: u64;
data _end: u64;
def text(&self) -> PyResult<PyString> {
Ok(self._text(py).clone_ref(py))
}
def value(&self) -> PyResult<PyObject> {
Ok(self._value(py).clone_ref(py))
}
def start(&self) -> PyResult<u64> {
Ok(*self._start(py))
}
def end(&self) -> PyResult<u64> {
Ok(*self._end(py))
}
});
static mut PARSER_SPECS: Option<HashMap<String, (parser::Spec, PyObject)>> = None;
pub fn init_module() {
unsafe {
PARSER_SPECS = Some(HashMap::new());
}
}
fn downcast_tokens<'a>(py: Python, token_list: PyObject) -> PyResult<Vec<parser::Terminal>> {
let tokens = PyList::downcast_from(py, token_list)?;
let mut buf = Vec::with_capacity(tokens.len(py));
for token in tokens.iter(py) {
let token = OpaqueToken::downcast_from(py, token)?;
let token = token.inner(py);
buf.push(parser::Terminal::from_token(token));
}
Ok(buf)
}
fn load_spec(py: Python, parser_name: &str) -> PyResult<&'static (parser::Spec, PyObject)> {
let parser_specs = unsafe { PARSER_SPECS.as_mut().unwrap() };
if !parser_specs.contains_key(parser_name) {
let parser_mod = py.import("edb.edgeql.parser.parser")?;
let process_spec = py.import("edb.edgeql.parser")?.get(py, "process_spec")?;
let parser_cls = parser_mod.get(py, parser_name)?;
let parser = parser_cls.call(py, PyTuple::new(py, &[]), None)?;
let res = process_spec.call(py, (parser,), None)?;
let res = PyTuple::downcast_from(py, res)?;
let spec_json = PyString::downcast_from(py, res.get_item(py, 0))?;
let spec_json = spec_json.to_string(py).unwrap();
let productions = res.get_item(py, 1);
let spec = parser::Spec::from_json(&spec_json).unwrap();
parser_specs.insert(parser_name.to_string(), (spec, productions));
}
Ok(unsafe { PARSER_SPECS.as_ref().unwrap().get(parser_name).unwrap() })
}
fn to_py_cst<'a>(cst: &'a parser::CSTNode<'a>, py: Python) -> PyResult<CSTNode> {
match cst {
parser::CSTNode::Empty => CSTNode::create_instance(py, py.None(), py.None()),
parser::CSTNode::Terminal(token) => CSTNode::create_instance(
py,
py.None(),
Terminal::create_instance(
py,
token.text.to_py_object(py),
if let Some(val) = &token.value {
value_to_py_object(py, val)?
} else {
py.None()
},
token.span.start,
token.span.end,
)?
.into_object(),
),
parser::CSTNode::Production(prod) => CSTNode::create_instance(
py,
Production::create_instance(
py,
prod.id.into_py_object(py),
PyList::new(
py,
prod.args
.iter()
.map(|a| to_py_cst(a, py).map(|x| x.into_object()))
.collect::<PyResult<Vec<_>>>()?
.as_slice(),
),
)?
.into_object(),
py.None(),
),
}
}

View file

@ -1,19 +1,19 @@
use std::convert::TryFrom;
use bigdecimal::Num;
use cpython::exc::AssertionError;
use cpython::{PyBytes, PyErr, PyInt, PyTuple, PythonObject, ToPyObject};
use cpython::{PyBytes, PyErr, PyInt, PythonObject, ToPyObject};
use cpython::{PyClone, PyDict, PyList, PyResult, PyString, Python};
use cpython::{PyFloat, PyObject};
use bytes::{BufMut, Bytes, BytesMut};
use edgedb_protocol::codec;
use edgedb_protocol::model::{BigInt, Decimal};
use edgeql_parser::position::Pos;
use edgeql_parser::tokenizer::Value;
use crate::errors::TokenizerError;
use crate::errors::SyntaxError;
use crate::normalize::{normalize as _normalize, Error, Variable};
use crate::tokenizer::convert_tokens;
use crate::tokenizer::tokens_to_py;
py_class!(pub class Entry |py| {
data _key: PyBytes;
@ -60,10 +60,6 @@ py_class!(pub class Entry |py| {
}
});
pub fn py_pos(py: Python, pos: &Pos) -> PyTuple {
(pos.line, pos.column, pos.offset).to_py_object(py)
}
pub fn serialize_extra(variables: &[Variable]) -> Result<Bytes, String> {
use edgedb_protocol::codec::Codec;
use edgedb_protocol::value::Value as P;
@ -91,8 +87,15 @@ pub fn serialize_extra(variables: &[Variable]) -> Result<Bytes, String> {
.map_err(|e| format!("float cannot be encoded: {}", e))?;
}
Value::BigInt(ref v) => {
let val = BigInt::try_from(v.clone())
.map_err(|e| format!("bigint cannot be encoded: {}", e))?;
// We have two different versions of BigInt implementations here.
// We have to use bigdecimal::num_bigint::BigInt because it can parse with radix 16.
let val = bigdecimal::num_bigint::BigInt::from_str_radix(v, 16)
.map_err(|e| format!("bigint cannot be encoded: {}", e))
.and_then(|x| {
BigInt::try_from(x).map_err(|e| format!("bigint cannot be encoded: {}", e))
})?;
codec::BigInt
.encode(&mut buf, &P::BigInt(val))
.map_err(|e| format!("bigint cannot be encoded: {}", e))?;
@ -145,7 +148,7 @@ pub fn normalize(py: Python<'_>, text: &PyString) -> PyResult<Entry> {
py,
/* key: */ PyBytes::new(py, &entry.hash[..]),
/* processed_source: */ entry.processed_source,
/* tokens: */ convert_tokens(py, entry.tokens, entry.end_pos)?,
/* tokens: */ tokens_to_py(py, entry.tokens)?,
/* extra_blobs: */ blobs,
/* extra_named: */ entry.named_args,
/* first_extra: */ entry.first_arg,
@ -154,7 +157,7 @@ pub fn normalize(py: Python<'_>, text: &PyString) -> PyResult<Entry> {
)?)
}
Err(Error::Tokenizer(msg, pos)) => {
return Err(TokenizerError::new(py, (msg, py_pos(py, &pos))))
return Err(SyntaxError::new(py, (msg, (pos, py.None()))))
}
Err(Error::Assertion(msg, pos)) => {
return Err(PyErr::new::<AssertionError, _>(
@ -170,10 +173,9 @@ pub fn value_to_py_object(py: Python, val: &Value) -> PyResult<PyObject> {
Value::Int(v) => v.to_py_object(py).into_object(),
Value::String(v) => v.to_py_object(py).into_object(),
Value::Float(v) => v.to_py_object(py).into_object(),
Value::BigInt(v) => {
py.get_type::<PyInt>()
.call(py, (v.to_str_radix(16), 16.to_py_object(py)), None)?
}
Value::BigInt(v) => py
.get_type::<PyInt>()
.call(py, (v, 16.to_py_object(py)), None)?,
Value::Decimal(v) => py.get_type::<PyFloat>().call(py, (v.to_string(),), None)?,
Value::Bytes(v) => PyBytes::new(py, v).into_object(),
})

View file

@ -1,572 +1,94 @@
use std::collections::HashMap;
use cpython::{PyBytes, PyClone, PyResult, PyString, Python, PythonObject};
use cpython::{PyList, PyObject, PyTuple, ToPyObject};
use cpython::{PyString, PyResult, Python, PyClone, PythonObject};
use cpython::{PyTuple, PyList, PyObject, ToPyObject, ObjectProtocol};
use cpython::{FromPyObject};
use edgeql_parser::tokenizer::{Token, Tokenizer};
use edgeql_parser::tokenizer::{Kind, is_keyword, Tokenizer, Token as PToken};
use edgeql_parser::tokenizer::{MAX_KEYWORD_LENGTH};
use edgeql_parser::position::Pos;
use edgeql_parser::keywords::{PARTIAL_RESERVED_KEYWORDS, UNRESERVED_KEYWORDS};
use edgeql_parser::keywords::{CURRENT_RESERVED_KEYWORDS};
use edgeql_parser::keywords::{FUTURE_RESERVED_KEYWORDS};
use crate::errors::{parser_error_into_tuple, ParserResult};
use crate::errors::TokenizerError;
use crate::pynormalize::{py_pos, value_to_py_object};
pub fn tokenize(py: Python, s: &PyString) -> PyResult<ParserResult> {
let data = s.to_string(py)?;
static mut TOKENS: Option<Tokens> = None;
let mut token_stream = Tokenizer::new(&data[..]).validated_values().with_eof();
let mut tokens: Vec<_> = Vec::new();
let mut errors: Vec<_> = Vec::new();
fn rs_pos(py: Python, value: &PyObject) -> PyResult<Pos> {
let (line, column, offset) = FromPyObject::extract(py, value)?;
Ok(Pos { line, column, offset })
for res in &mut token_stream {
match res {
Ok(token) => tokens.push(token),
Err(e) => {
errors.push(parser_error_into_tuple(py, e));
// TODO: fix tokenizer to skip bad tokens and continue
break;
}
}
}
let tokens = tokens_to_py(py, tokens)?;
let errors = PyList::new(py, errors.as_slice()).to_py_object(py);
ParserResult::create_instance(py, tokens.into_object(), errors)
}
py_class!(pub class Token |py| {
data _kind: PyString;
data _text: PyString;
data _value: PyObject;
data _start: Pos;
data _end: Pos;
def kind(&self) -> PyResult<PyString> {
Ok(self._kind(py).clone_ref(py))
}
def text(&self) -> PyResult<PyString> {
Ok(self._text(py).clone_ref(py))
}
def value(&self) -> PyResult<PyObject> {
Ok(self._value(py).clone_ref(py))
}
def start(&self) -> PyResult<PyTuple> {
Ok(py_pos(py, self._start(py)))
}
def end(&self) -> PyResult<PyTuple> {
Ok(py_pos(py, self._end(py)))
}
// An opaque wrapper around [edgeql_parser::tokenizer::Token].
// Supports Python pickle serialization.
py_class!(pub class OpaqueToken |py| {
data _inner: Token<'static>;
def __repr__(&self) -> PyResult<PyString> {
let val = self._value(py);
let s = if *val == py.None() {
format!("<Token {}>", self._kind(py).to_string(py)?)
} else {
format!("<Token {} {}>",
self._kind(py).to_string(py)?,
val.repr(py)?.to_string(py)?)
};
Ok(PyString::new(py, &s))
Ok(PyString::new(py, &self._inner(py).to_string()))
}
def __reduce__(&self) -> PyResult<PyTuple> {
let data: Vec<u8> = rmp_serde::to_vec(self._inner(py)).unwrap().to_vec();
return Ok((
get_unpickle_fn(py),
get_fn_unpickle_token(py),
(
self._kind(py),
self._text(py),
self._value(py),
py_pos(py, self._start(py)),
py_pos(py, self._end(py)),
PyBytes::new(py, &data),
),
).to_py_object(py))
}
});
pub struct Tokens {
ident: PyString,
argument: PyString,
eof: PyString,
empty: PyString,
substitution: PyString,
named_only: PyString,
named_only_val: PyString,
set_annotation: PyString,
set_annotation_val: PyString,
set_type: PyString,
set_type_val: PyString,
extension_package: PyString,
extension_package_val: PyString,
order_by: PyString,
order_by_val: PyString,
dot: PyString,
backward_link: PyString,
open_bracket: PyString,
close_bracket: PyString,
open_paren: PyString,
close_paren: PyString,
open_brace: PyString,
close_brace: PyString,
namespace: PyString,
double_splat: PyString,
coalesce: PyString,
colon: PyString,
semicolon: PyString,
comma: PyString,
add: PyString,
concat: PyString,
sub: PyString,
mul: PyString,
div: PyString,
floor_div: PyString,
modulo: PyString,
pow: PyString,
less: PyString,
greater: PyString,
eq: PyString,
ampersand: PyString,
pipe: PyString,
at: PyString,
iconst: PyString,
niconst: PyString,
fconst: PyString,
nfconst: PyString,
bconst: PyString,
sconst: PyString,
greater_eq: PyString,
less_eq: PyString,
not_eq: PyString,
distinct_from: PyString,
not_distinct_from: PyString,
assign: PyString,
add_assign: PyString,
sub_assign: PyString,
arrow: PyString,
keywords: HashMap<String, TokenInfo>,
unpickle_token: PyObject,
}
struct Cache {
keyword_buf: String,
}
pub struct TokenInfo {
pub kind: Kind,
pub name: PyString,
pub value: Option<PyString>,
}
pub fn init_module(py: Python) {
unsafe {
TOKENS = Some(Tokens::new(py))
}
}
pub fn _unpickle_token(py: Python,
kind: &PyString, text: &PyString, value: &PyObject,
start: &PyObject, end: &PyObject)
-> PyResult<Token>
{
// TODO(tailhook) We might some strings from Tokens structure
// (i.e. internning them).
// But if we're storing a collection of tokens
// they will store the tokens only once, so it
// doesn't seem to help that much.
Token::create_instance(py,
kind.clone_ref(py),
text.clone_ref(py),
value.clone_ref(py),
rs_pos(py, start)?,
rs_pos(py, end)?)
}
pub fn tokenize(py: Python, s: &PyString) -> PyResult<PyList> {
let data = s.to_string(py)?;
let mut token_stream = Tokenizer::new(&data[..]).validated_values();
let rust_tokens: Vec<_> = py.allow_threads(|| {
(&mut token_stream).collect::<Result<_, _>>()
}).map_err(|e| {
TokenizerError::new(py, (e.message, py_pos(py, &e.span.start)))
})?;
return convert_tokens(py, rust_tokens, token_stream.current_pos());
}
pub fn convert_tokens(py: Python, rust_tokens: Vec<PToken<'_>>,
end_pos: Pos)
-> PyResult<PyList>
{
let tokens = unsafe { TOKENS.as_ref().expect("module initialized") };
let mut cache = Cache {
keyword_buf: String::with_capacity(MAX_KEYWORD_LENGTH),
};
pub fn tokens_to_py(py: Python, rust_tokens: Vec<Token>) -> PyResult<PyList> {
let mut buf = Vec::with_capacity(rust_tokens.len());
for tok in rust_tokens {
let (kind, text) = get_token_kind_and_name(py, tokens, &mut cache, &tok);
let py_tok = OpaqueToken::create_instance(py, tok.cloned())?.into_object();
let value = tok.value.as_ref()
.map(|v| value_to_py_object(py, v)).transpose()?
.unwrap_or_else(|| py.None());
let py_tok = Token::create_instance(
py, kind, text, value, tok.span.start, tok.span.end
)?;
buf.push(py_tok.into_object());
buf.push(py_tok);
}
buf.push(Token::create_instance(
py,
tokens.eof.clone_ref(py),
tokens.empty.clone_ref(py),
py.None(),
end_pos,
end_pos
)?.into_object()
);
Ok(PyList::new(py, &buf[..]))
}
impl Tokens {
pub fn new(py: Python) -> Tokens {
let mut res = Tokens {
ident: PyString::new(py, "IDENT"),
argument: PyString::new(py, "ARGUMENT"),
eof: PyString::new(py, "EOF"),
empty: PyString::new(py, ""),
substitution: PyString::new(py, "SUBSTITUTION"),
named_only: PyString::new(py, "NAMEDONLY"),
named_only_val: PyString::new(py, "NAMED ONLY"),
set_annotation: PyString::new(py, "SETANNOTATION"),
set_annotation_val: PyString::new(py, "SET ANNOTATION"),
set_type: PyString::new(py, "SETTYPE"),
set_type_val: PyString::new(py, "SET TYPE"),
extension_package: PyString::new(py, "EXTENSIONPACKAGE"),
extension_package_val: PyString::new(py, "EXTENSION PACKAGE"),
order_by: PyString::new(py, "ORDERBY"),
order_by_val: PyString::new(py, "ORDER BY"),
/// To support pickle serialization of OpaqueTokens, we need to provide a
/// deserialization function in __reduce__ methods.
/// This function must not be inlined and must be globally accessible.
/// To achieve this, we expose it a part of the module definition
/// (`_unpickle_token`) and save reference to is in the `FN_UNPICKLE_TOKEN`.
///
/// A bit hackly, but it works.
static mut FN_UNPICKLE_TOKEN: Option<PyObject> = None;
dot: PyString::new(py, "."),
backward_link: PyString::new(py, ".<"),
open_bracket: PyString::new(py, "["),
close_bracket: PyString::new(py, "]"),
open_paren: PyString::new(py, "("),
close_paren: PyString::new(py, ")"),
open_brace: PyString::new(py, "{"),
close_brace: PyString::new(py, "}"),
namespace: PyString::new(py, "::"),
double_splat: PyString::new(py, "**"),
coalesce: PyString::new(py, "??"),
colon: PyString::new(py, ":"),
semicolon: PyString::new(py, ";"),
comma: PyString::new(py, ","),
add: PyString::new(py, "+"),
concat: PyString::new(py, "++"),
sub: PyString::new(py, "-"),
mul: PyString::new(py, "*"),
div: PyString::new(py, "/"),
floor_div: PyString::new(py, "//"),
modulo: PyString::new(py, "%"),
pow: PyString::new(py, "^"),
less: PyString::new(py, "<"),
greater: PyString::new(py, ">"),
eq: PyString::new(py, "="),
ampersand: PyString::new(py, "&"),
pipe: PyString::new(py, "|"),
at: PyString::new(py, "@"),
iconst: PyString::new(py, "ICONST"),
niconst: PyString::new(py, "NICONST"),
fconst: PyString::new(py, "FCONST"),
nfconst: PyString::new(py, "NFCONST"),
bconst: PyString::new(py, "BCONST"),
sconst: PyString::new(py, "SCONST"),
// as OP
greater_eq: PyString::new(py, ">="),
less_eq: PyString::new(py, "<="),
not_eq: PyString::new(py, "!="),
distinct_from: PyString::new(py, "?!="),
not_distinct_from: PyString::new(py, "?="),
assign: PyString::new(py, ":="),
add_assign: PyString::new(py, "+="),
sub_assign: PyString::new(py, "-="),
arrow: PyString::new(py, "->"),
keywords: HashMap::new(),
unpickle_token: py_fn!(py, _unpickle_token(
kind: &PyString, text: &PyString, value: &PyObject,
start: &PyObject, end: &PyObject)),
};
// 'EOF'
for kw in UNRESERVED_KEYWORDS.iter() {
res.add_kw(py, kw);
}
for kw in PARTIAL_RESERVED_KEYWORDS.iter() {
res.add_kw(py, kw);
}
for kw in CURRENT_RESERVED_KEYWORDS.iter() {
res.add_kw(py, kw);
}
for kw in FUTURE_RESERVED_KEYWORDS.iter() {
res.add_kw(py, kw);
}
return res;
}
fn add_kw(&mut self, py: Python, name: &str) {
let py_name = PyString::new(py, &name.to_ascii_uppercase());
let tok_name = if name.starts_with("__") && name.ends_with("__") {
format!("DUNDER{}", name[2..name.len()-2].to_ascii_uppercase())
.to_py_object(py)
} else {
py_name.clone_ref(py)
};
self.keywords.insert(name.into(), TokenInfo {
kind: if is_keyword(name) { Kind::Keyword } else { Kind::Ident },
name: tok_name,
value: None,
});
pub fn init_module(py: Python) {
unsafe {
FN_UNPICKLE_TOKEN = Some(py_fn!(py, _unpickle_token(bytes: &PyBytes)));
}
}
fn get_token_kind_and_name(
py: Python,
tokens: &Tokens,
cache: &mut Cache,
token: &PToken,
) -> (PyString, PyString) {
use Kind::*;
let text = &token.text[..];
match token.kind {
Assign => (
tokens.assign.clone_ref(py),
tokens.assign.clone_ref(py),
),
SubAssign => (
tokens.sub_assign.clone_ref(py),
tokens.sub_assign.clone_ref(py),
),
AddAssign => (
tokens.add_assign.clone_ref(py),
tokens.add_assign.clone_ref(py),
),
Arrow => (
tokens.arrow.clone_ref(py),
tokens.arrow.clone_ref(py),
),
Coalesce => (
tokens.coalesce.clone_ref(py),
tokens.coalesce.clone_ref(py),
),
Namespace => (
tokens.namespace.clone_ref(py),
tokens.namespace.clone_ref(py),
),
DoubleSplat => (
tokens.double_splat.clone_ref(py),
tokens.double_splat.clone_ref(py),
),
BackwardLink => (
tokens.backward_link.clone_ref(py),
tokens.backward_link.clone_ref(py),
),
FloorDiv => (
tokens.floor_div.clone_ref(py),
tokens.floor_div.clone_ref(py),
),
Concat => (
tokens.concat.clone_ref(py),
tokens.concat.clone_ref(py),
),
GreaterEq => (
tokens.greater_eq.clone_ref(py),
tokens.greater_eq.clone_ref(py),
),
LessEq => (
tokens.less_eq.clone_ref(py),
tokens.less_eq.clone_ref(py),
),
NotEq => (
tokens.not_eq.clone_ref(py),
tokens.not_eq.clone_ref(py),
),
NotDistinctFrom => (
tokens.not_distinct_from.clone_ref(py),
tokens.not_distinct_from.clone_ref(py),
),
DistinctFrom => (
tokens.distinct_from.clone_ref(py),
tokens.distinct_from.clone_ref(py),
),
Comma => (
tokens.comma.clone_ref(py),
tokens.comma.clone_ref(py),
),
OpenParen => (
tokens.open_paren.clone_ref(py),
tokens.open_paren.clone_ref(py),
),
CloseParen => (
tokens.close_paren.clone_ref(py),
tokens.close_paren.clone_ref(py),
),
OpenBracket => (
tokens.open_bracket.clone_ref(py),
tokens.open_bracket.clone_ref(py),
),
CloseBracket => (
tokens.close_bracket.clone_ref(py),
tokens.close_bracket.clone_ref(py),
),
OpenBrace => (
tokens.open_brace.clone_ref(py),
tokens.open_brace.clone_ref(py),
),
CloseBrace => (
tokens.close_brace.clone_ref(py),
tokens.close_brace.clone_ref(py),
),
Dot => (
tokens.dot.clone_ref(py),
tokens.dot.clone_ref(py),
),
Semicolon => (
tokens.semicolon.clone_ref(py),
tokens.semicolon.clone_ref(py),
),
Colon => (
tokens.colon.clone_ref(py),
tokens.colon.clone_ref(py),
),
Add => (
tokens.add.clone_ref(py),
tokens.add.clone_ref(py),
),
Sub => (
tokens.sub.clone_ref(py),
tokens.sub.clone_ref(py),
),
Mul => (
tokens.mul.clone_ref(py),
tokens.mul.clone_ref(py),
),
Div => (
tokens.div.clone_ref(py),
tokens.div.clone_ref(py),
),
Modulo => (
tokens.modulo.clone_ref(py),
tokens.modulo.clone_ref(py),
),
Pow => (
tokens.pow.clone_ref(py),
tokens.pow.clone_ref(py),
),
Less => (
tokens.less.clone_ref(py),
tokens.less.clone_ref(py),
),
Greater => (
tokens.greater.clone_ref(py),
tokens.greater.clone_ref(py),
),
Eq => (
tokens.eq.clone_ref(py),
tokens.eq.clone_ref(py),
),
Ampersand => (
tokens.ampersand.clone_ref(py),
tokens.ampersand.clone_ref(py),
),
Pipe => (
tokens.pipe.clone_ref(py),
tokens.pipe.clone_ref(py),
),
At => (
tokens.at.clone_ref(py),
tokens.at.clone_ref(py),
),
Argument => (
tokens.argument.clone_ref(py),
PyString::new(py, text),
),
DecimalConst => (
tokens.nfconst.clone_ref(py),
PyString::new(py, text),
),
FloatConst => (
tokens.fconst.clone_ref(py),
PyString::new(py, text),
),
IntConst => (
tokens.iconst.clone_ref(py),
PyString::new(py, text),
),
BigIntConst => (
tokens.niconst.clone_ref(py),
PyString::new(py, text),
),
BinStr => (
tokens.bconst.clone_ref(py),
PyString::new(py, text),
),
Str => (
tokens.sconst.clone_ref(py),
PyString::new(py, text),
),
BacktickName => (
tokens.ident.clone_ref(py),
PyString::new(py, text),
),
Ident | Keyword => match text {
"named only" => (
tokens.named_only.clone_ref(py),
tokens.named_only_val.clone_ref(py),
),
"set annotation" => (
tokens.set_annotation.clone_ref(py),
tokens.set_annotation_val.clone_ref(py),
),
"set type" => (
tokens.set_type.clone_ref(py),
tokens.set_type_val.clone_ref(py),
),
"extension package" => {
(
tokens.extension_package.clone_ref(py),
tokens.extension_package_val.clone_ref(py),
)},
"order by" => (
tokens.order_by.clone_ref(py),
tokens.order_by_val.clone_ref(py),
),
pub fn _unpickle_token(py: Python, bytes: &PyBytes) -> PyResult<OpaqueToken> {
let token = rmp_serde::from_slice(bytes.data(py)).unwrap();
OpaqueToken::create_instance(py, token)
}
_ => {
if text.len() > MAX_KEYWORD_LENGTH {
(
tokens.ident.clone_ref(py),
PyString::new(py, text),
)
} else {
cache.keyword_buf.clear();
cache.keyword_buf.push_str(text);
cache.keyword_buf.make_ascii_lowercase();
pub fn get_fn_unpickle_token(py: Python) -> PyObject {
let py_function = unsafe { FN_UNPICKLE_TOKEN.as_ref().expect("module initialized") };
return py_function.clone_ref(py);
}
let kind = match tokens.keywords.get(&cache.keyword_buf) {
Some(keyword) => {
debug_assert_eq!(keyword.kind, token.kind);
keyword.name.clone_ref(py)
}
None => {
debug_assert_eq!(Kind::Ident, token.kind);
tokens.ident.clone_ref(py)
}
};
(kind, PyString::new(py, text))
}
},
}
Substitution => (
tokens.substitution.clone_ref(py),
PyString::new(py, text),
),
impl OpaqueToken {
pub(super) fn inner(&self, py: Python) -> Token {
self._inner(py).clone()
}
}
pub fn get_unpickle_fn(py: Python) -> PyObject {
let tokens = unsafe { TOKENS.as_ref().expect("module initialized") };
return tokens.unpickle_token.clone_ref(py);
}

View file

@ -1,5 +1,6 @@
use edgeql_rust::normalize::{normalize, Variable};
use edgeql_parser::tokenizer::{Value as Value};
use edgeql_parser::tokenizer::Value;
use num_bigint::BigInt;
#[test]
@ -80,10 +81,10 @@ fn test_bigint() {
"SELECT(<__std__::bigint>$0)+(<__std__::bigint>$1)");
assert_eq!(entry.variables, vec![vec![
Variable {
value: Value::BigInt(1.into()),
value: Value::BigInt("1".into()),
},
Variable {
value: Value::BigInt(23.into()),
value: Value::BigInt(BigInt::from(23).to_str_radix(16)),
}
]]);
}
@ -97,10 +98,10 @@ fn test_bigint_exponent() {
"SELECT(<__std__::bigint>$0)+(<__std__::bigint>$1)");
assert_eq!(entry.variables, vec![vec![
Variable {
value: Value::BigInt(10000000000u64.into()),
value: Value::BigInt(BigInt::from(10000000000u64).to_str_radix(16)),
},
Variable {
value: Value::BigInt(230000000000000u64.into()),
value: Value::BigInt(BigInt::from(230000000000000u64).to_str_radix(16)),
}
]]);
}
@ -203,6 +204,7 @@ fn test_script() {
value: Value::Int(2),
}
],
vec![]
]);
}
@ -227,5 +229,6 @@ fn test_script_with_args() {
value: Value::Int(2),
}
],
vec![]
]);
}

View file

@ -1,4 +1,4 @@
use crate::position::Pos;
use crate::position::{Pos, InflatedPos};
use crate::tokenizer::{Kind, self};
/// Error of expression checking
@ -79,10 +79,12 @@ pub fn check(text: &str) -> Result<(), Error> {
}
};
let pos = token.span.start;
let pos = InflatedPos::from_offset(text.as_bytes(), pos).unwrap().deflate();
empty = false;
match token.kind {
Comma | Semicolon if brackets.is_empty() => {
return Err(UnexpectedToken(token.text.to_string(), pos));
return Err(UnexpectedToken(token.text.into(), pos));
}
OpenParen | OpenBracket | OpenBrace => {
brackets.push((token.kind, pos));

View file

@ -3,7 +3,7 @@ use std::fmt::{self, Write};
use std::error::Error;
use std::char;
use crate::tokenizer::is_keyword;
use crate::keywords;
/// Error returned from `unquote_string` function
///
@ -23,7 +23,7 @@ pub struct UnquoteError(String);
pub fn quote_name(s: &str) -> Cow<str> {
if s.chars().all(|c| c.is_alphanumeric() || c == '_') {
let lower = s.to_ascii_lowercase();
if !is_keyword(&lower) {
if keywords::lookup(&lower).is_none() {
return s.into();
}
}

View file

@ -1,4 +1,6 @@
pub const UNRESERVED_KEYWORDS: &[&str] = &[
use phf::phf_set;
pub const UNRESERVED_KEYWORDS: phf::Set<&str> = phf_set!(
"abort",
"abstract",
"access",
@ -103,20 +105,11 @@ pub const UNRESERVED_KEYWORDS: &[&str] = &[
"version",
"view",
"write",
];
);
pub const PARTIAL_RESERVED_KEYWORDS: phf::Set<&str> = phf_set!("except", "intersect", "union",);
pub const PARTIAL_RESERVED_KEYWORDS: &[&str] = &[
// Keep in sync with `tokenizer::is_keyword`
"except",
"intersect",
"union",
// Keep in sync with `tokenizer::is_keyword`
];
pub const FUTURE_RESERVED_KEYWORDS: &[&str] = &[
// Keep in sync with `tokenizer::is_keyword`
pub const FUTURE_RESERVED_KEYWORDS: phf::Set<&str> = phf_set!(
"anyarray",
"begin",
"case",
@ -147,18 +140,15 @@ pub const FUTURE_RESERVED_KEYWORDS: &[&str] = &[
"when",
"window",
"never",
// Keep in sync with `tokenizer::is_keyword`
];
);
pub const CURRENT_RESERVED_KEYWORDS: &[&str] = &[
// Keep in sync with `tokenizer::is_keyword`
pub const CURRENT_RESERVED_KEYWORDS: phf::Set<&str> = phf_set!(
"__source__",
"__subject__",
"__type__",
"__std__",
"__edgedbsys__",
"__edgedbtpl__",
"__std__",
"__new__",
"__old__",
"__specified__",
@ -207,5 +197,38 @@ pub const CURRENT_RESERVED_KEYWORDS: &[&str] = &[
"update",
"variadic",
"with",
// Keep in sync with `tokenizer::is_keyword`
];
);
pub const COMBINED_KEYWORDS: phf::Set<&str> = phf_set!(
"named only",
"set annotation",
"set type",
"extension package",
"order by",
);
pub fn lookup(s: &str) -> Option<Keyword> {
None.or_else(|| PARTIAL_RESERVED_KEYWORDS.get_key(s))
.or_else(|| FUTURE_RESERVED_KEYWORDS.get_key(s))
.or_else(|| CURRENT_RESERVED_KEYWORDS.get_key(s))
.map(|x| Keyword(x))
}
pub fn lookup_all(s: &str) -> Option<Keyword> {
lookup(s).or_else(|| {
None.or_else(|| COMBINED_KEYWORDS.get_key(s))
.or_else(|| UNRESERVED_KEYWORDS.get_key(s))
.map(|x| Keyword(x))
})
}
/// This is required for serde deserializer for Token to work correctly.
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Keyword(pub &'static str);
impl From<Keyword> for &'static str {
fn from(value: Keyword) -> Self {
value.0
}
}

View file

@ -5,8 +5,9 @@ pub mod helpers;
#[cfg(feature = "python")]
pub mod into_python;
pub mod keywords;
pub mod parser;
pub mod position;
pub mod preparser;
pub mod schema_file;
pub mod tokenizer;
pub mod validation;
pub mod validation;

View file

@ -0,0 +1,590 @@
use append_only_vec::AppendOnlyVec;
use indexmap::IndexMap;
use crate::helpers::quote_name;
use crate::keywords::Keyword;
use crate::position::Span;
use crate::tokenizer::{Error, Kind, Token, Value};
pub struct Context<'s> {
spec: &'s Spec,
arena: bumpalo::Bump,
terminal_arena: AppendOnlyVec<Terminal>,
}
impl<'s> Context<'s> {
pub fn new(spec: &'s Spec) -> Self {
Context {
spec,
arena: bumpalo::Bump::new(),
terminal_arena: AppendOnlyVec::new(),
}
}
}
pub fn parse<'a>(input: &'a [Terminal], ctx: &'a Context) -> (Option<&'a CSTNode<'a>>, Vec<Error>) {
let stack_top = ctx.arena.alloc(StackNode {
parent: None,
state: 0,
value: CSTNode::Empty,
});
let initial_track = Parser {
stack_top,
error_cost: 0,
node_count: 0,
can_recover: true,
errors: Vec::new(),
};
// append EIO
let end = input.last().map(|t| t.span.end).unwrap_or_default();
let eoi = ctx.alloc_terminal(Terminal {
kind: Kind::EOI,
span: Span { start: end, end },
text: "".to_string(),
value: None,
});
let input = input.iter().chain(Some(eoi));
let mut parsers = vec![initial_track];
let mut prev_span: Option<Span> = None;
let mut new_parsers = Vec::with_capacity(parsers.len() + 5);
for token in input {
while let Some(mut parser) = parsers.pop() {
let res = parser.act(ctx, token);
if res.is_ok() {
// base case: ok
parser.node_successful();
new_parsers.push(parser);
} else {
// error: try to recover
let gap_span = {
let prev_end = prev_span.map(|p| p.end).unwrap_or(token.span.start);
Span {
start: prev_end,
end: token.span.start,
}
};
// option 1: inject a token
let possible_actions = &ctx.spec.actions[parser.stack_top.state];
for token_kind in possible_actions.keys() {
let mut inject = parser.clone();
let injection = new_token_for_injection(token_kind, ctx);
let cost = error_cost(token_kind);
let error = Error::new(format!("Missing {injection}")).with_span(gap_span);
inject.push_error(error, cost);
if inject.error_cost <= ERROR_COST_INJECT_MAX {
// println!(" --> [inject {injection}]");
if inject.act(ctx, injection).is_ok() {
// insert into parsers, to retry the original token
parsers.push(inject);
}
}
}
// option 2: skip the token
let mut skip = parser;
let error = Error::new(format!("Unexpected {token}")).with_span(token.span);
skip.push_error(error, ERROR_COST_SKIP);
if token.kind == Kind::EOF {
// extra penalty
skip.error_cost += ERROR_COST_INJECT_MAX;
skip.can_recover = false;
};
// println!(" --> [skip]");
// insert into new_parsers, so the token is skipped
new_parsers.push(skip);
}
}
// has any parser recovered?
if new_parsers.len() > 1 {
let recovered = new_parsers.iter().position(Parser::has_recovered);
if let Some(recovered) = recovered {
let mut recovered = new_parsers.swap_remove(recovered);
recovered.error_cost = 0;
new_parsers.clear();
new_parsers.push(recovered);
}
}
// prune: pick only X best parsers
if new_parsers.len() > PARSER_COUNT_MAX {
new_parsers.sort_by_key(Parser::adjusted_cost);
new_parsers.drain(PARSER_COUNT_MAX..);
}
assert!(parsers.is_empty());
std::mem::swap(&mut parsers, &mut new_parsers);
prev_span = Some(token.span);
}
// there will always be a parser left,
// since we always allow a token to be skipped
let mut parser = parsers.into_iter().min_by_key(|p| p.error_cost).unwrap();
parser.finish();
let node = if parser.can_recover {
Some(&parser.stack_top.value)
} else {
None
};
(node, parser.errors)
}
impl<'s> Context<'s> {
fn alloc_terminal(&self, t: Terminal) -> &'_ Terminal {
let idx = self.terminal_arena.push(t);
&self.terminal_arena[idx]
}
}
fn new_token_for_injection<'a>(kind: &Kind, ctx: &'a Context) -> &'a Terminal {
ctx.alloc_terminal(Terminal {
kind: kind.clone(),
text: kind.text().unwrap_or_default().to_string(),
value: match kind {
Kind::Keyword(Keyword(kw)) => Some(Value::String(kw.to_string())),
Kind::Ident => Some(Value::String("my_name".to_string())),
_ => None,
},
span: Span::default(),
})
}
pub struct Spec {
pub actions: Vec<IndexMap<Kind, Action>>,
pub goto: Vec<IndexMap<String, usize>>,
pub start: String,
pub inlines: IndexMap<usize, u8>,
}
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(untagged))]
pub enum Action {
Shift(usize),
Reduce(Reduce),
}
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Reduce {
/// Index of the production in the associated production array
pub production_id: usize,
pub non_term: String,
/// Number of arguments
pub cnt: usize,
}
/// A node of the CST tree.
///
/// Warning: allocated in the bumpalo arena, which does not Drop.
/// Any types that do allocation with global allocator (such as String or Vec),
/// must manually drop. This is why Terminal has a special vec arena that does
/// Drop.
#[derive(Debug, Clone, Copy)]
pub enum CSTNode<'a> {
Empty,
Terminal(&'a Terminal),
Production(Production<'a>),
}
#[derive(Clone, Debug)]
pub struct Terminal {
pub kind: Kind,
pub text: String,
pub value: Option<Value>,
pub span: Span,
}
#[derive(Debug, Clone, Copy)]
pub struct Production<'a> {
pub id: usize,
pub args: &'a [CSTNode<'a>],
}
struct StackNode<'p> {
parent: Option<&'p StackNode<'p>>,
state: usize,
value: CSTNode<'p>,
}
#[derive(Clone)]
struct Parser<'s> {
stack_top: &'s StackNode<'s>,
/// sum of cost of every error recovery action
error_cost: u16,
/// number of nodes pushed to stack since last error
node_count: u16,
can_recover: bool,
errors: Vec<Error>,
}
impl<'s> Parser<'s> {
fn act(&mut self, ctx: &'s Context, token: &'s Terminal) -> Result<(), ()> {
// self.print_stack();
// println!("INPUT: {}", token.text);
loop {
// find next action
let Some(action) = ctx.spec.actions[self.stack_top.state].get(&token.kind) else {
return Err(());
};
match action {
Action::Shift(next) => {
// println!(" --> [shift {next}]");
// push on stack
self.push_on_stack(ctx, *next, CSTNode::Terminal(token));
return Ok(());
}
Action::Reduce(reduce) => {
self.reduce(ctx, reduce);
}
}
}
}
fn reduce(&mut self, ctx: &'s Context, reduce: &'s Reduce) {
let args = ctx.arena.alloc_slice_fill_with(reduce.cnt, |_| {
let v = self.stack_top.value;
self.stack_top = self.stack_top.parent.unwrap();
v
});
args.reverse();
let value = CSTNode::Production(Production {
id: reduce.production_id,
args,
});
let nstate = self.stack_top.state;
let next = *ctx.spec.goto[nstate].get(&reduce.non_term).unwrap();
// inline (if there is an inlining rule)
let mut value = value;
if let CSTNode::Production(production) = value {
if let Some(inline_position) = ctx.spec.inlines.get(&production.id) {
// inline rule found
let args = production.args;
let span = get_span_of_nodes(&args);
value = args[*inline_position as usize];
extend_span(&mut value, span, ctx);
} else {
// place back
value = CSTNode::Production(production);
}
}
self.push_on_stack(ctx, next, value);
// println!(
// " --> [reduce {} ::= ({} popped) at {}/{}]",
// production, cnt, state, nstate
// );
// self.print_stack();
}
pub fn push_on_stack(&mut self, ctx: &'s Context, state: usize, value: CSTNode<'s>) {
let node = StackNode {
parent: Some(self.stack_top),
state,
value,
};
self.stack_top = ctx.arena.alloc(node);
}
pub fn finish(&mut self) {
debug_assert!(matches!(
&self.stack_top.value,
CSTNode::Terminal(Terminal {
kind: Kind::EOI,
..
})
));
self.stack_top = self.stack_top.parent.unwrap();
// self.print_stack();
// println!(" --> accept");
#[cfg(debug_assertions)]
{
let first = self.stack_top.parent.unwrap();
assert!(
matches!(&first.value, CSTNode::Empty),
"expected 'Empty' found {:?}",
first.value
);
}
}
#[cfg(never)]
fn print_stack(&self) {
let prefix = "STACK: ";
let mut stack = Vec::new();
let mut node = Some(self.stack_top);
while let Some(n) = node {
stack.push(n);
node = n.parent.clone();
}
stack.reverse();
let names = stack
.iter()
.map(|s| format!("{:?}", s.value))
.collect::<Vec<_>>();
let mut states = format!("{:6}", ' ');
for (index, node) in stack.iter().enumerate() {
let name_width = names[index].chars().count();
states += &format!(" {:<width$}", node.state, width = name_width);
}
println!("{}{}", prefix, names.join(" "));
println!("{}", states);
}
fn push_error(&mut self, error: Error, cost: u16) {
self.errors.push(error);
self.error_cost += cost;
self.node_count = 0;
}
fn node_successful(&mut self) {
self.node_count += 1;
}
/// Error cost, subtracted by a function of successfully parsed nodes.
fn adjusted_cost(&self) -> u16 {
let x = self.node_count.saturating_sub(3);
self.error_cost.saturating_sub(x * x)
}
fn has_recovered(&self) -> bool {
self.can_recover && self.adjusted_cost() == 0
}
}
fn get_span_of_nodes(args: &[CSTNode]) -> Option<Span> {
let start = args.iter().find_map(|x| match x {
CSTNode::Terminal(t) => Some(t.span.start),
_ => None,
})?;
let end = args.iter().rev().find_map(|x| match x {
CSTNode::Terminal(t) => Some(t.span.end),
_ => None,
})?;
Some(Span { start, end })
}
fn extend_span<'a>(value: &mut CSTNode<'a>, span: Option<Span>, ctx: &'a Context) {
let Some(span) = span else {
return;
};
let CSTNode::Terminal(terminal) = value else {
return
};
let mut new_term = terminal.clone();
if span.start < new_term.span.start {
new_term.span.start = span.start;
}
if span.end > new_term.span.end {
new_term.span.end = span.end;
}
*terminal = ctx.alloc_terminal(new_term);
}
const PARSER_COUNT_MAX: usize = 10;
const ERROR_COST_INJECT_MAX: u16 = 15;
const ERROR_COST_SKIP: u16 = 3;
fn error_cost(kind: &Kind) -> u16 {
use Kind::*;
match kind {
Ident => 9,
Substitution => 8,
Keyword(_) => 10,
Dot => 5,
OpenBrace | OpenBracket | OpenParen => 5,
CloseBrace | CloseBracket | CloseParen => 1,
Namespace => 10,
Semicolon | Comma | Colon => 2,
Eq => 5,
At => 6,
IntConst => 8,
Assign | Arrow => 5,
_ => 100, // forbidden
}
}
impl std::fmt::Display for Terminal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.text.is_empty() {
return write!(f, "{}", self.kind.user_friendly_text().unwrap_or_default());
}
match self.kind {
Kind::Ident => write!(f, "'{}'", &quote_name(&self.text)),
Kind::Keyword(Keyword(kw)) => write!(f, "keyword '{}'", kw.to_ascii_uppercase()),
_ => write!(f, "'{}'", self.text),
}
}
}
impl<'a> Default for CSTNode<'a> {
fn default() -> Self {
CSTNode::Empty
}
}
impl Terminal {
pub fn from_token(token: Token) -> Self {
Terminal {
kind: token.kind,
text: token.text.into(),
value: token.value,
span: token.span,
}
}
}
#[cfg(feature = "serde")]
impl Spec {
pub fn from_json(j_spec: &str) -> Result<Spec, String> {
#[derive(Debug, serde::Serialize, serde::Deserialize)]
struct SpecJson {
pub actions: Vec<Vec<(String, Action)>>,
pub goto: Vec<Vec<(String, usize)>>,
pub start: String,
pub inlines: Vec<(usize, u8)>,
}
let v = serde_json::from_str::<SpecJson>(j_spec).map_err(|e| e.to_string())?;
let actions = v
.actions
.into_iter()
.map(|x| x.into_iter().map(|(k, a)| (get_token_kind(&k), a)))
.map(IndexMap::from_iter)
.collect();
let goto = v.goto.into_iter().map(IndexMap::from_iter).collect();
let inlines = IndexMap::from_iter(v.inlines);
Ok(Spec {
actions,
goto,
start: v.start,
inlines,
})
}
}
#[cfg(feature = "serde")]
fn get_token_kind(token_name: &str) -> Kind {
use Kind::*;
match token_name {
"+" => Add,
"&" => Ampersand,
"@" => At,
".<" => BackwardLink,
"}" => CloseBrace,
"]" => CloseBracket,
")" => CloseParen,
"??" => Coalesce,
":" => Colon,
"," => Comma,
"++" => Concat,
"/" => Div,
"." => Dot,
"**" => DoubleSplat,
"=" => Eq,
"//" => FloorDiv,
"%" => Modulo,
"*" => Mul,
"::" => Namespace,
"{" => OpenBrace,
"[" => OpenBracket,
"(" => OpenParen,
"|" => Pipe,
"^" => Pow,
";" => Semicolon,
"-" => Sub,
"?!=" => DistinctFrom,
">=" => GreaterEq,
"<=" => LessEq,
"?=" => NotDistinctFrom,
"!=" => NotEq,
"<" => Less,
">" => Greater,
"IDENT" => Ident,
"EOF" => EOF,
"<$>" => EOI,
"<e>" => Epsilon,
"BCONST" => BinStr,
"FCONST" => FloatConst,
"ICONST" => IntConst,
"NFCONST" => DecimalConst,
"NICONST" => BigIntConst,
"SCONST" => Str,
"+=" => AddAssign,
"->" => Arrow,
":=" => Assign,
"-=" => SubAssign,
"ARGUMENT" => Argument,
"SUBSTITUTION" => Substitution,
_ => {
let mut token_name = token_name.to_lowercase();
if let Some(rem) = token_name.strip_prefix("dunder") {
token_name = format!("__{rem}__");
}
let kw = crate::keywords::lookup_all(&token_name)
.unwrap_or_else(|| panic!("unknown keyword {token_name}"));
Keyword(kw)
}
}
}

View file

@ -4,13 +4,24 @@ use std::str::{from_utf8, Utf8Error};
use unicode_width::UnicodeWidthStr;
/// Span of an element in source code
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Span {
pub start: Pos,
pub end: Pos,
/// Byte offset in the original file
///
/// Technically you can read > 4Gb file on 32bit machine so it may
/// not fit in usize
pub start: u64,
/// Byte offset in the original file
///
/// Technically you can read > 4Gb file on 32bit machine so it may
/// not fit in usize
pub end: u64,
}
/// Original position of element in source code
#[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Copy, Default, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Pos {
/// One-based line number
pub line: usize,
@ -88,6 +99,11 @@ fn new_lines_in_fragment(data: &[u8]) -> u64 {
impl InflatedPos {
pub fn from_offset(data: &[u8], offset: u64) -> Result<InflatedPos, InflatingError> {
let res = Self::from_offsets(data, &[offset as usize])?;
Ok(res.into_iter().next().unwrap())
}
pub fn from_offsets(data: &[u8], offsets: &[usize])
-> Result<Vec<InflatedPos>, InflatingError>
{
@ -125,6 +141,14 @@ impl InflatedPos {
}
return Ok(result);
}
pub fn deflate(self) -> Pos {
Pos {
line: self.line as usize + 1,
column: self.column as usize + 1,
offset: self.offset,
}
}
}
#[cfg(test)]

File diff suppressed because it is too large Load diff

View file

@ -4,11 +4,13 @@ use bigdecimal::num_bigint::ToBigInt;
use bigdecimal::BigDecimal;
use crate::helpers::{unquote_bytes, unquote_string};
use crate::position::Pos;
use crate::keywords::Keyword;
use crate::position::{Pos, Span};
use crate::tokenizer::{Error, Kind, Token, Tokenizer, Value, MAX_KEYWORD_LENGTH};
/// Applies additional validation to the tokens.
/// Combines multi-word keywords into single tokens.
/// Remaps a few token kinds.
pub struct Validator<'a> {
pub inner: Tokenizer<'a>,
@ -30,12 +32,14 @@ impl<'a> Iterator for Validator<'a> {
Err(e) => return Some(Err(Error::new(e).with_span(token.span))),
};
if let Some(text) = self.combine_multi_word_keywords(&token) {
token.kind = Kind::Keyword;
token.text = text.into();
if let Some(keyword) = self.combine_multi_word_keywords(&token) {
token.text = keyword.into();
token.kind = Kind::Keyword(Keyword(keyword));
self.peeked = None;
}
token.kind = remap_kind(token.kind);
Some(Ok(token))
}
}
@ -49,6 +53,13 @@ impl<'a> Validator<'a> {
}
}
pub fn with_eof(self) -> WithEof<'a> {
WithEof {
inner: self,
emitted: false,
}
}
/// Mimics behavior of [std::iter::Peekable]. We could use that, but it
/// hides access to underlying iterator.
fn next_inner(&mut self) -> Option<Result<Token<'a>, Error>> {
@ -61,7 +72,7 @@ impl<'a> Validator<'a> {
/// Mimics behavior of [std::iter::Peekable]. We could use that, but it
/// hides access to underlying iterator.
fn peek(&mut self) -> &Option<Result<Token<'a>, Error>> {
fn peek(&mut self) -> &Option<Result<Token, Error>> {
if self.peeked.is_none() {
self.peeked = Some(self.inner.next());
}
@ -73,8 +84,8 @@ impl<'a> Validator<'a> {
self.inner.current_pos()
}
fn combine_multi_word_keywords(&mut self, token: &Token) -> Option<&'static str> {
if !matches!(token.kind, Kind::Ident | Kind::Keyword) {
fn combine_multi_word_keywords(&mut self, token: &Token<'a>) -> Option<&'static str> {
if !matches!(token.kind, Kind::Ident | Kind::Keyword(_)) {
return None;
}
let text = &token.text;
@ -115,19 +126,19 @@ impl<'a> Validator<'a> {
return None;
}
fn peek_keyword(&mut self, kw: &str) -> bool {
fn peek_keyword(&mut self, kw: &'static str) -> bool {
self.peek()
.as_ref()
.and_then(|res| res.as_ref().ok())
.map(|t| {
(t.kind == Kind::Ident || t.kind == Kind::Keyword)
&& t.text.eq_ignore_ascii_case(kw)
t.kind == Kind::Keyword(Keyword(kw))
|| (t.kind == Kind::Ident && t.text.eq_ignore_ascii_case(kw))
})
.unwrap_or(false)
}
}
pub fn parse_value(token: &Token<'_>) -> Result<Option<Value>, String> {
pub fn parse_value(token: &Token) -> Result<Option<Value>, String> {
use Kind::*;
let text = &token.text;
let string_value = match token.kind {
@ -175,33 +186,69 @@ pub fn parse_value(token: &Token<'_>) -> Result<Option<Value>, String> {
// Python has no problem of representing such a positive
// value, though.
return u64::from_str(&text.replace("_", ""))
.map(|x| Value::Int(x as i64))
.map(Some)
.map(|x| Some(Value::Int(x as i64)))
.map_err(|e| format!("error reading int: {}", e));
}
BigIntConst => {
let dec = text[..text.len() - 1]
return text[..text.len() - 1]
.replace("_", "")
.parse::<BigDecimal>()
.map_err(|e| format!("error reading bigint: {}", e))?;
// this conversion to decimal and back to string
// fixes thing like `1e2n` which we support for bigints
return Ok(Some(Value::BigInt(
dec.to_bigint()
.ok_or_else(|| "number is not integer".to_string())?,
)));
.map_err(|e| format!("error reading bigint: {}", e))
// this conversion to decimal and back to string
// fixes thing like `1e2n` which we support for bigints
.and_then(|x| {
x.to_bigint()
.ok_or_else(|| "number is not integer".to_string())
})
.map(|x| Some(Value::BigInt(x.to_str_radix(16))));
}
BinStr => {
return unquote_bytes(text).map(Value::Bytes).map(Some);
}
Str => unquote_string(text)
.map_err(|s| s.to_string())?
.to_string(),
Str => unquote_string(text).map_err(|s| s.to_string())?.to_string(),
BacktickName => text[1..text.len() - 1].replace("``", "`"),
Ident | Keyword => text.to_string(),
Ident | Keyword(_) => text.to_string(),
Substitution => text[2..text.len() - 1].to_string(),
_ => return Ok(None),
};
Ok(Some(Value::String(string_value)))
}
fn remap_kind(kind: Kind) -> Kind {
match kind {
Kind::BacktickName => Kind::Ident,
kind => kind,
}
}
pub struct WithEof<'a> {
inner: Validator<'a>,
emitted: bool,
}
impl<'a> Iterator for WithEof<'a> {
type Item = Result<Token<'a>, Error>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(next) = self.inner.next() {
Some(next)
} else if !self.emitted {
self.emitted = true;
let pos = self.inner.current_pos().offset;
Some(Ok(Token {
kind: Kind::EOF,
text: "".into(),
value: None,
span: Span {
start: pos,
end: pos,
},
}))
} else {
None
}
}
}

View file

@ -39,6 +39,10 @@ fn tok_err(s: &str) -> String {
panic!("No error, where error expected");
}
fn keyword(kw: &'static str) -> Kind {
Keyword(edgeql_parser::keywords::Keyword(kw))
}
#[test]
fn whitespace_and_comments() {
assert_eq!(tok_str("# hello { world }"), &[] as &[&str]);
@ -64,9 +68,9 @@ fn idents() {
#[test]
fn keywords() {
assert_eq!(tok_str("SELECT a"), ["SELECT", "a"]);
assert_eq!(tok_typ("SELECT a"), [Keyword, Ident]);
assert_eq!(tok_typ("SELECT a"), [keyword("select"), Ident]);
assert_eq!(tok_str("with Select"), ["with", "Select"]);
assert_eq!(tok_typ("with Select"), [Keyword, Keyword]);
assert_eq!(tok_typ("with Select"), [keyword("with"), keyword("select")]);
}
#[test]
@ -375,92 +379,98 @@ fn decimal() {
#[test]
fn numbers_from_py() {
assert_eq!(tok_str("SELECT 3.5432;"), ["SELECT", "3.5432", ";"]);
assert_eq!(tok_typ("SELECT 3.5432;"), [Keyword, FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT +3.5432;"), ["SELECT", "+", "3.5432", ";"]);
assert_eq!(tok_str("SELECT 3.5432;"),
["SELECT", "3.5432", ";"]);
assert_eq!(tok_typ("SELECT 3.5432;"),
[keyword("select"), FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT +3.5432;"),
["SELECT", "+", "3.5432", ";"]);
assert_eq!(tok_typ("SELECT +3.5432;"),
[Keyword, Add, FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT -3.5432;"), ["SELECT", "-", "3.5432", ";"]);
[keyword("select"), Add, FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT -3.5432;"),
["SELECT", "-", "3.5432", ";"]);
assert_eq!(tok_typ("SELECT -3.5432;"),
[Keyword, Sub, FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT 354.32;"), ["SELECT", "354.32", ";"]);
assert_eq!(tok_typ("SELECT 354.32;"), [Keyword, FloatConst, Semicolon]);
[keyword("select"), Sub, FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT 354.32;"),
["SELECT", "354.32", ";"]);
assert_eq!(tok_typ("SELECT 354.32;"),
[keyword("select"), FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT 35400000000000.32;"),
["SELECT", "35400000000000.32", ";"]);
assert_eq!(tok_typ("SELECT 35400000000000.32;"),
[Keyword, FloatConst, Semicolon]);
[keyword("select"), FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT 35400000000000000000.32;"),
["SELECT", "35400000000000000000.32", ";"]);
assert_eq!(tok_typ("SELECT 35400000000000000000.32;"),
[Keyword, FloatConst, Semicolon]);
[keyword("select"), FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT 3.5432e20;"),
["SELECT", "3.5432e20", ";"]);
assert_eq!(tok_typ("SELECT 3.5432e20;"),
[Keyword, FloatConst, Semicolon]);
[keyword("select"), FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT 3.5432e+20;"),
["SELECT", "3.5432e+20", ";"]);
assert_eq!(tok_typ("SELECT 3.5432e+20;"),
[Keyword, FloatConst, Semicolon]);
[keyword("select"), FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT 3.5432e-20;"),
["SELECT", "3.5432e-20", ";"]);
assert_eq!(tok_typ("SELECT 3.5432e-20;"),
[Keyword, FloatConst, Semicolon]);
[keyword("select"), FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT 354.32e-20;"),
["SELECT", "354.32e-20", ";"]);
assert_eq!(tok_typ("SELECT 354.32e-20;"),
[Keyword, FloatConst, Semicolon]);
[keyword("select"), FloatConst, Semicolon]);
assert_eq!(tok_str("SELECT -0n;"),
["SELECT", "-", "0n", ";"]);
assert_eq!(tok_typ("SELECT -0n;"),
[Keyword, Sub, BigIntConst, Semicolon]);
[keyword("select"), Sub, BigIntConst, Semicolon]);
assert_eq!(tok_str("SELECT 0n;"),
["SELECT", "0n", ";"]);
assert_eq!(tok_typ("SELECT 0n;"),
[Keyword, BigIntConst, Semicolon]);
[keyword("select"), BigIntConst, Semicolon]);
assert_eq!(tok_str("SELECT 1n;"),
["SELECT", "1n", ";"]);
assert_eq!(tok_typ("SELECT 1n;"),
[Keyword, BigIntConst, Semicolon]);
[keyword("select"), BigIntConst, Semicolon]);
assert_eq!(tok_str("SELECT -1n;"),
["SELECT", "-", "1n", ";"]);
assert_eq!(tok_typ("SELECT -1n;"),
[Keyword, Sub, BigIntConst, Semicolon]);
[keyword("select"), Sub, BigIntConst, Semicolon]);
assert_eq!(tok_str("SELECT 100000n;"),
["SELECT", "100000n", ";"]);
assert_eq!(tok_typ("SELECT 100000n;"),
[Keyword, BigIntConst, Semicolon]);
[keyword("select"), BigIntConst, Semicolon]);
assert_eq!(tok_str("SELECT -100000n;"),
["SELECT", "-", "100000n", ";"]);
assert_eq!(tok_typ("SELECT -100000n;"),
[Keyword, Sub, BigIntConst, Semicolon]);
[keyword("select"), Sub, BigIntConst, Semicolon]);
assert_eq!(tok_str("SELECT -354.32n;"),
["SELECT", "-", "354.32n", ";"]);
assert_eq!(tok_typ("SELECT -354.32n;"),
[Keyword, Sub, DecimalConst, Semicolon]);
[keyword("select"), Sub, DecimalConst, Semicolon]);
assert_eq!(tok_str("SELECT 35400000000000.32n;"),
["SELECT", "35400000000000.32n", ";"]);
assert_eq!(tok_typ("SELECT 35400000000000.32n;"),
[Keyword, DecimalConst, Semicolon]);
[keyword("select"), DecimalConst, Semicolon]);
assert_eq!(tok_str("SELECT -35400000000000000000.32n;"),
["SELECT", "-", "35400000000000000000.32n", ";"]);
assert_eq!(tok_typ("SELECT -35400000000000000000.32n;"),
[Keyword, Sub, DecimalConst, Semicolon]);
[keyword("select"), Sub, DecimalConst, Semicolon]);
assert_eq!(tok_str("SELECT 3.5432e20n;"),
["SELECT", "3.5432e20n", ";"]);
assert_eq!(tok_typ("SELECT 3.5432e20n;"),
[Keyword, DecimalConst, Semicolon]);
[keyword("select"), DecimalConst, Semicolon]);
assert_eq!(tok_str("SELECT -3.5432e+20n;"),
["SELECT", "-", "3.5432e+20n", ";"]);
assert_eq!(tok_typ("SELECT -3.5432e+20n;"),
[Keyword, Sub, DecimalConst, Semicolon]);
[keyword("select"), Sub, DecimalConst, Semicolon]);
assert_eq!(tok_str("SELECT 3.5432e-20n;"),
["SELECT", "3.5432e-20n", ";"]);
assert_eq!(tok_typ("SELECT 3.5432e-20n;"),
[Keyword, DecimalConst, Semicolon]);
[keyword("select"), DecimalConst, Semicolon]);
assert_eq!(tok_str("SELECT 354.32e-20n;"),
["SELECT", "354.32e-20n", ";"]);
assert_eq!(tok_typ("SELECT 354.32e-20n;"),
[Keyword, DecimalConst, Semicolon]);
[keyword("select"), DecimalConst, Semicolon]);
}
#[test]
@ -598,7 +608,7 @@ fn strings() {
assert_eq!(tok_str(r#" rb'hello' "#), [r#"rb'hello'"#]);
assert_eq!(tok_typ(r#" rb'hello' "#), [BinStr]);
assert_eq!(tok_str(r#" `hello` "#), [r#"`hello`"#]);
assert_eq!(tok_typ(r#" `hello` "#), [BacktickName]);
assert_eq!(tok_typ(r#" `hello` "#), [Ident]);
assert_eq!(tok_str(r#" "hello""#), [r#""hello""#]);
assert_eq!(tok_typ(r#" "hello""#), [Str]);
@ -617,7 +627,7 @@ fn strings() {
assert_eq!(tok_str(r#" rb'hello'"#), [r#"rb'hello'"#]);
assert_eq!(tok_typ(r#" rb'hello'"#), [BinStr]);
assert_eq!(tok_str(r#" `hello`"#), [r#"`hello`"#]);
assert_eq!(tok_typ(r#" `hello`"#), [BacktickName]);
assert_eq!(tok_typ(r#" `hello`"#), [Ident]);
assert_eq!(tok_str(r#" "h\"ello" "#), [r#""h\"ello""#]);
assert_eq!(tok_typ(r#" "h\"ello" "#), [Str]);
@ -636,9 +646,9 @@ fn strings() {
assert_eq!(tok_str(r#" rb'hello\' "#), [r#"rb'hello\'"#]);
assert_eq!(tok_typ(r#" rb'hello\' "#), [BinStr]);
assert_eq!(tok_str(r#" `hello\` "#), [r#"`hello\`"#]);
assert_eq!(tok_typ(r#" `hello\` "#), [BacktickName]);
assert_eq!(tok_typ(r#" `hello\` "#), [Ident]);
assert_eq!(tok_str(r#" `hel``lo` "#), [r#"`hel``lo`"#]);
assert_eq!(tok_typ(r#" `hel``lo` "#), [BacktickName]);
assert_eq!(tok_typ(r#" `hel``lo` "#), [Ident]);
assert_eq!(tok_str(r#" "h'el`lo" "#), [r#""h'el`lo""#]);
assert_eq!(tok_typ(r#" "h'el`lo" "#), [Str]);
@ -657,7 +667,7 @@ fn strings() {
assert_eq!(tok_str(r#" rb'h"el`lo' "#), [r#"rb'h"el`lo'"#]);
assert_eq!(tok_typ(r#" rb'h"el`lo' "#), [BinStr]);
assert_eq!(tok_str(r#" `h'el"lo` "#), [r#"`h'el"lo`"#]);
assert_eq!(tok_typ(r#" `h'el"lo\` "#), [BacktickName]);
assert_eq!(tok_typ(r#" `h'el"lo\` "#), [Ident]);
assert_eq!(tok_str(" \"hel\nlo\" "), ["\"hel\nlo\""]);
assert_eq!(tok_typ(" \"hel\nlo\" "), [Str]);
@ -676,7 +686,7 @@ fn strings() {
assert_eq!(tok_str(" rb'hel\nlo' "), ["rb'hel\nlo'"]);
assert_eq!(tok_str(" br'hel\nlo' "), ["br'hel\nlo'"]);
assert_eq!(tok_str(" `hel\nlo` "), ["`hel\nlo`"]);
assert_eq!(tok_typ(" `hel\nlo` "), [BacktickName]);
assert_eq!(tok_typ(" `hel\nlo` "), [Ident]);
assert_eq!(tok_err(r#""hello"#),
"unterminated string, quoted by `\"`");
@ -762,15 +772,15 @@ fn test_dollar() {
assert_eq!(tok_str("select $$ something $$; x"),
["select", "$$ something $$", ";", "x"]);
assert_eq!(tok_typ("select $$ something $$; x"),
[Keyword, Str, Semicolon, Ident]);
[keyword("select"), Str, Semicolon, Ident]);
assert_eq!(tok_str("select $a$ ; $b$ ; $b$ ; $a$; x"),
["select", "$a$ ; $b$ ; $b$ ; $a$", ";", "x"]);
assert_eq!(tok_typ("select $a$ ; $b$ ; $b$ ; $a$; x"),
[Keyword, Str, Semicolon, Ident]);
[keyword("select"), Str, Semicolon, Ident]);
assert_eq!(tok_str("select $a$ ; $b$ ; $a$; x"),
["select", "$a$ ; $b$ ; $a$", ";", "x"]);
assert_eq!(tok_typ("select $a$ ; $b$ ; $a$; x"),
[Keyword, Str, Semicolon, Ident]);
[keyword("select"), Str, Semicolon, Ident]);
assert_eq!(tok_err("select $$ ; $ab$ test;"),
"unterminated string started with $$");
assert_eq!(tok_err("select $a$ ; $$ test;"),
@ -782,24 +792,24 @@ fn test_dollar() {
assert_eq!(tok_str("select $a$a$ ; $a$ test;"),
["select", "$a$a$ ; $a$", "test", ";"]);
assert_eq!(tok_typ("select $a$a$ ; $a$ test;"),
[Keyword, Str, Ident, Semicolon]);
[keyword("select"), Str, Ident, Semicolon]);
assert_eq!(tok_str("select $a+b; $b test; $a+b; $b ;"),
["select", "$a", "+", "b", ";", "$b", "test",
";", "$a", "+", "b", ";", "$b", ";"]);
assert_eq!(tok_typ("select $a+b; $b test; $a+b; $b ;"),
[Keyword, Argument, Add, Ident, Semicolon, Argument, Ident,
[keyword("select"), Argument, Add, Ident, Semicolon, Argument, Ident,
Semicolon, Argument, Add, Ident, Semicolon, Argument, Semicolon]);
assert_eq!(tok_str("select $def x$y test; $def x$y"),
["select", "$def", "x", "$y", "test",
";", "$def", "x", "$y"]);
assert_eq!(tok_typ("select $def x$y test; $def x$y"),
[Keyword, Argument, Ident, Argument, Ident,
[keyword("select"), Argument, Ident, Argument, Ident,
Semicolon, Argument, Ident, Argument]);
assert_eq!(tok_str("select $`x``y` + $0 + $`zz` + $1.2 + $фыва"),
["select", "$`x``y`", "+", "$0", "+", "$`zz`", "+", "$1", ".", "2",
"+", "$фыва"]);
assert_eq!(tok_typ("select $`x``y` + $0 + $`zz` + $1.2 + $фыва"),
[Keyword, Argument, Add, Argument, Add, Argument,
[keyword("select"), Argument, Add, Argument, Add, Argument,
Add, Argument, Dot, IntConst, Add, Argument]);
assert_eq!(tok_err(r#"$-"#),
"bare $ is not allowed");
@ -831,11 +841,11 @@ fn test_substitution() {
assert_eq!(tok_str("SELECT \\(expr);"),
["SELECT", "\\(expr)", ";"]);
assert_eq!(tok_typ("SELECT \\(expr);"),
[Keyword, Substitution, Semicolon]);
[keyword("select"), Substitution, Semicolon]);
assert_eq!(tok_str("SELECT \\(other_Name1);"),
["SELECT", "\\(other_Name1)", ";"]);
assert_eq!(tok_typ("SELECT \\(other_Name1);"),
[Keyword, Substitution, Semicolon]);
[keyword("select"), Substitution, Semicolon]);
assert_eq!(tok_err("SELECT \\(some-name);"),
"only alphanumerics are allowed in \\(name) token");
assert_eq!(tok_err("SELECT \\(some_name"),

View file

@ -21,6 +21,7 @@ from __future__ import annotations
from typing import *
import multiprocessing
import json
from edb import errors
from edb.common import parsing
@ -29,7 +30,7 @@ from . import parser as qlparser
from .. import ast as qlast
from .. import tokenizer as qltokenizer
EdgeQLParserBase = qlparser.EdgeQLParserBase
EdgeQLParserBase = qlparser.EdgeQLParserSpec
def append_module_aliases(tree, aliases):
@ -48,11 +49,9 @@ def append_module_aliases(tree, aliases):
def parse_fragment(
source: Union[qltokenizer.Source, str],
filename: Optional[str]=None,
filename: Optional[str] = None,
) -> qlast.Expr:
if isinstance(source, str):
source = qltokenizer.Source.from_string(source)
parser = qlparser.EdgeQLExpressionParser()
parser = qlparser.EdgeQLExpressionSpec().get_parser()
res = parser.parse(source, filename=filename)
assert isinstance(res, qlast.Expr)
return res
@ -60,11 +59,9 @@ def parse_fragment(
def parse_single(
source: Union[qltokenizer.Source, str],
filename: Optional[str]=None,
filename: Optional[str] = None,
) -> qlast.Statement:
if isinstance(source, str):
source = qltokenizer.Source.from_string(source)
parser = qlparser.EdgeQLSingleParser()
parser = qlparser.EdgeQLSingleSpec().get_parser()
res = parser.parse(source, filename=filename)
assert isinstance(res, (qlast.Query | qlast.Command))
return res
@ -106,9 +103,7 @@ def parse_command(
def parse_block(source: Union[qltokenizer.Source, str]) -> List[qlast.Base]:
if isinstance(source, str):
source = qltokenizer.Source.from_string(source)
parser = qlparser.EdgeQLBlockParser()
parser = qlparser.EdgeQLBlockSpec().get_parser()
return parser.parse(source)
@ -122,9 +117,8 @@ def parse_migration_body_block(
# where the source contexts don't matter anyway.
source = '{' + source + '}'
tsource = qltokenizer.Source.from_string(source)
parser = qlparser.EdgeQLMigrationBodyParser()
return parser.parse(tsource)
parser = qlparser.EdgeQLMigrationBodySpec().get_parser()
return parser.parse(source)
def parse_extension_package_body_block(
@ -137,31 +131,30 @@ def parse_extension_package_body_block(
# where the source contexts don't matter anyway.
source = '{' + source + '}'
tsource = qltokenizer.Source.from_string(source)
parser = qlparser.EdgeQLExtensionPackageBodyParser()
return parser.parse(tsource)
parser = qlparser.EdgeQLExtensionPackageBodySpec().get_parser()
return parser.parse(source)
def parse_sdl(expr: str):
parser = qlparser.EdgeSDLParser()
parser = qlparser.EdgeSDLSpec().get_parser()
return parser.parse(expr)
def _load_parser(parser: qlparser.EdgeQLParserBase) -> None:
def _load_parser(parser: qlparser.EdgeQLParserSpec) -> None:
parser.get_parser_spec(allow_rebuild=True)
def preload(
allow_rebuild: bool = True,
paralellize: bool = False,
parsers: Optional[List[qlparser.EdgeQLParserBase]] = None,
parsers: Optional[List[qlparser.EdgeQLParserSpec]] = None,
) -> None:
if parsers is None:
parsers = [
qlparser.EdgeQLBlockParser(),
qlparser.EdgeQLSingleParser(),
qlparser.EdgeQLExpressionParser(),
qlparser.EdgeSDLParser(),
qlparser.EdgeQLBlockSpec(),
qlparser.EdgeQLSingleSpec(),
qlparser.EdgeQLExpressionSpec(),
qlparser.EdgeSDLSpec(),
]
if not paralellize:
@ -188,3 +181,73 @@ def preload(
pool.map(_load_parser, parsers_to_rebuild)
preload(parsers=parsers, allow_rebuild=False)
def process_spec(parser: parsing.ParserSpec) -> Tuple[str, List[Any]]:
# Converts a ParserSpec into JSON. Called from edgeql-parser Rust crate.
spec = parser.get_parser_spec()
assert spec.pureLR
token_map: Dict[str, str] = {
v._token: c for (_, c), v in parsing.TokenMeta.token_map.items()
}
# productions
productions: List[Any] = []
production_ids: Dict[Any, int] = {}
inlines: List[Tuple[int, int]] = []
def get_production_id(prod: Any) -> int:
if prod in production_ids:
return production_ids[prod]
id = len(productions)
productions.append(prod)
production_ids[prod] = id
inline = getattr(prod.method, 'inline_index', None)
if inline is not None:
assert isinstance(inline, int)
inlines.append((id, inline))
return id
actions = []
for st_actions in spec.actions():
out_st_actions = []
for tok, acts in st_actions.items():
act = cast(Any, acts[0])
str_tok = token_map.get(str(tok), str(tok))
if 'ShiftAction' in str(type(act)):
action_obj: Any = int(act.nextState)
else:
prod = act.production
action_obj = {
'production_id': get_production_id(prod),
'non_term': str(prod.lhs),
'cnt': len(prod.rhs),
}
out_st_actions.append((str_tok, action_obj))
actions.append(out_st_actions)
# goto
goto = []
for st_goto in spec.goto():
out_goto = []
for nterm, action in st_goto.items():
out_goto.append((str(nterm), action))
goto.append(out_goto)
res = {
'actions': actions,
'goto': goto,
'start': str(spec.start_sym()),
'inlines': inlines,
}
res_json = json.dumps(res)
return (res_json, productions)

View file

@ -1,53 +0,0 @@
#
# This source file is part of the EdgeDB open source project.
#
# Copyright 2020-present MagicStack Inc. and the EdgeDB authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations
from typing import *
from collections import deque
from edb.edgeql import tokenizer
from edb import _edgeql_parser as ql_parser
class EdgeQLLexer:
inputstr: str
tokens: Optional[Deque[ql_parser.Token]]
filename: Optional[str]
end_of_input: Tuple[int, int, int]
def __init__(self):
self.filename = None # TODO
def setinputstr(
self,
source: Union[str, tokenizer.Source],
filename: Optional[str]=None,
) -> None:
if isinstance(source, str):
source = tokenizer.Source.from_string(source)
self.inputstr = source.text()
self.filename = filename
self.tokens = deque(source.tokens())
self.end_of_input = self.tokens[-1].end()
def token(self) -> ql_parser.Token:
if self.tokens:
return self.tokens.popleft()

View file

@ -174,23 +174,23 @@ class T_PIPE(Token, lextoken='|'):
pass
class T_NAMEDONLY(Token):
class T_NAMEDONLY(Token, lextoken='named only'):
pass
class T_SETANNOTATION(Token):
class T_SETANNOTATION(Token, lextoken='set annotation'):
pass
class T_SETTYPE(Token):
class T_SETTYPE(Token, lextoken='set type'):
pass
class T_EXTENSIONPACKAGE(Token):
class T_EXTENSIONPACKAGE(Token, lextoken='extension package'):
pass
class T_ORDERBY(Token):
class T_ORDERBY(Token, lextoken='order by'):
pass

View file

@ -18,297 +18,176 @@
from __future__ import annotations
from typing import *
from edb import errors
from edb.common import debug, parsing
from edb.common import context as pctx
from edb.common.english import add_a as a
from edb.common import parsing
from .grammar import rust_lexer, tokens
from .grammar import expressions as gr_exprs
from .grammar import commondl as gr_commondl
from .grammar import keywords as gr_keywords
from .. import tokenizer
import edb._edgeql_parser as ql_parser
class EdgeQLParserBase(parsing.Parser):
def get_debug(self):
return debug.flags.edgeql_parser
def get_exception(self, native_err, context, token=None):
msg = native_err.args[0]
details = None
hint = None
if isinstance(native_err, errors.EdgeQLSyntaxError):
return native_err
else:
if msg.startswith('Unexpected token: '):
token = token or getattr(native_err, 'token', None)
token_kind = token.kind()
ltok = self.parser._stack[-1][0]
is_reserved = (
token.text().lower()
in gr_keywords.by_type[gr_keywords.RESERVED_KEYWORD]
)
# Look at the parsing stack and use tokens and
# non-terminals to infer the parser rule when the
# error occurred.
i, rule = self._get_rule()
if not token or token_kind == 'EOF':
msg = 'Unexpected end of line'
elif (
rule == 'shape' and
token_kind == 'IDENT' and
isinstance(ltok, parsing.Nonterm)
):
# Make sure that the previous element in the stack
# is some kind of Nonterminal, because if it's
# not, this is probably not an issue of a missing
# COMMA.
hint = (f"It appears that a ',' is missing in {a(rule)} "
f"before {token.text()!r}")
elif (
rule == 'list of arguments' and
# The stack is like <NodeName> LPAREN <AnyIdentifier>
i == 1 and
isinstance(ltok, (gr_exprs.AnyIdentifier,
tokens.T_WITH,
tokens.T_SELECT,
tokens.T_FOR,
tokens.T_INSERT,
tokens.T_UPDATE,
tokens.T_DELETE))
):
hint = ("Missing parentheses around statement used "
"as an expression")
# We want the error context correspond to the
# statement keyword
context = ltok.context
token = None
elif (
rule == 'array slice' and
# The offending token was something that could
# make an expression
token_kind in {'IDENT', 'ICONST'} and
not isinstance(ltok, tokens.T_COLON)
):
hint = (f"It appears that a ':' is missing in {a(rule)} "
f"before {token.text()!r}")
elif (
rule in {'list of arguments', 'tuple', 'array'} and
# The offending token was something that could
# make an expression
token_kind in {
'IDENT', 'TRUE', 'FALSE',
'ICONST', 'FCONST', 'NICONST', 'NFCONST',
'BCONST', 'SCONST',
} and
not isinstance(ltok, tokens.T_COMMA)
):
hint = (f"It appears that a ',' is missing in {a(rule)} "
f"before {token.text()!r}")
elif (
rule == 'definition' and
token_kind == 'IDENT'
):
# Something went wrong in a definition, so check
# if the last successful token is a keyword.
if (
isinstance(ltok, gr_exprs.Identifier) and
ltok.val.upper() == 'INDEX'
):
msg = (f"Expected 'ON', but got {token.text()!r} "
f"instead")
else:
msg = f'Unexpected {token.text()!r}'
elif rule == 'for iterator':
msg = ("Missing parentheses around complex expression in "
"a FOR iterator clause")
if i > 0:
context = pctx.merge_context([
self.parser._stack[-i][0].context, context,
])
token = None
elif hasattr(token, 'val'):
msg = f'Unexpected {token.val!r}'
elif token_kind == 'NL':
msg = 'Unexpected end of line'
elif token.text() == "explain":
msg = f'Unexpected keyword {token.text()!r}'
hint = f'Use `analyze` to show query performance details'
elif is_reserved and not isinstance(ltok, gr_exprs.Expr):
# Another token followed by a reserved keyword:
# likely an attempt to use keyword as identifier
msg = f'Unexpected keyword {token.text()!r}'
details = (
f'Token {token.text()!r} is a reserved keyword and'
f' cannot be used as an identifier'
)
hint = (
f'Use a different identifier or quote the name with'
f' backticks: `{token.text()}`'
)
else:
msg = f'Unexpected {token.text()!r}'
return errors.EdgeQLSyntaxError(
msg, details=details, hint=hint, context=context, token=token)
def _get_rule(self):
ltok = self.parser._stack[-1][0]
# Look at the parsing stack and use tokens and non-terminals
# to infer the parser rule when the error occurred.
rule = ''
def _matches_for(i):
return (
len(self.parser._stack) >= i + 3
and isinstance(self.parser._stack[-3 - i][0], tokens.T_FOR)
and isinstance(
self.parser._stack[-2 - i][0], gr_exprs.Identifier)
and isinstance(self.parser._stack[-1 - i][0], tokens.T_IN)
)
# Check if we're in the `FOR x IN <bad_token>` situation
if (
len(self.parser._stack) >= 4
and isinstance(self.parser._stack[-2][0], tokens.T_RANGBRACKET)
and isinstance(self.parser._stack[-3][0], gr_exprs.FullTypeExpr)
and isinstance(self.parser._stack[-4][0], tokens.T_LANGBRACKET)
and _matches_for(4)
):
return 4, 'for iterator'
if (
len(self.parser._stack) >= 2
and isinstance(self.parser._stack[-2][0], gr_exprs.AtomicExpr)
and _matches_for(2)
):
return 2, 'for iterator'
if (
len(self.parser._stack) >= 1
and isinstance(self.parser._stack[-1][0], gr_exprs.BaseAtomicExpr)
and _matches_for(1)
):
return 1, 'for iterator'
if _matches_for(0):
return 0, 'for iterator'
# If the last valid token was a closing brace/parent/bracket,
# so we need to find a match for it before deciding what rule
# context we're in.
need_match = isinstance(ltok, (tokens.T_RBRACE,
tokens.T_RPAREN,
tokens.T_RBRACKET))
nextel = None
for i, (el, _) in enumerate(reversed(self.parser._stack)):
if isinstance(el, tokens.Token):
# We'll need the element right before "{", "[", or "(".
prevel = self.parser._stack[-2 - i][0]
if isinstance(el, tokens.T_LBRACE):
if need_match and isinstance(ltok,
tokens.T_RBRACE):
# This is matched, while we're looking
# for unmatched braces.
need_match = False
continue
elif isinstance(prevel, gr_commondl.OptExtending):
# This is some SDL/DDL
rule = 'definition'
elif (
isinstance(prevel, gr_exprs.Expr) or
(
isinstance(prevel, tokens.T_COLON) and
isinstance(self.parser._stack[-3 - i][0],
gr_exprs.ShapePointer)
)
):
# This is some kind of shape.
rule = 'shape'
break
elif isinstance(el, tokens.T_LPAREN):
if need_match and isinstance(ltok,
tokens.T_RPAREN):
# This is matched, while we're looking
# for unmatched parentheses.
need_match = False
continue
elif isinstance(prevel, gr_exprs.NodeName):
rule = 'list of arguments'
elif isinstance(nextel, (tokens.T_FOR,
tokens.T_SELECT,
tokens.T_UPDATE,
tokens.T_DELETE,
tokens.T_INSERT,
tokens.T_FOR)):
# A parenthesized subquery expression,
# we should leave the error as is.
break
else:
rule = 'tuple'
break
elif isinstance(el, tokens.T_LBRACKET):
if need_match and isinstance(ltok,
tokens.T_RBRACKET):
# This is matched, while we're looking
# for unmatched brackets.
need_match = False
continue
# This is either an array literal or
# array index.
elif isinstance(prevel, gr_exprs.Expr):
rule = 'array slice'
else:
rule = 'array'
break
# Also keep track of the element right after current.
nextel = el
return i, rule
def get_lexer(self):
return rust_lexer.EdgeQLLexer()
class EdgeQLParserSpec(parsing.ParserSpec):
def get_parser(self):
return EdgeQLParser(self)
class EdgeQLSingleParser(EdgeQLParserBase):
class EdgeQLSingleSpec(EdgeQLParserSpec):
def get_parser_spec_module(self):
from .grammar import single
return single
class EdgeQLExpressionParser(EdgeQLParserBase):
class EdgeQLExpressionSpec(EdgeQLParserSpec):
def get_parser_spec_module(self):
from .grammar import fragment
return fragment
class EdgeQLBlockParser(EdgeQLParserBase):
class EdgeQLBlockSpec(EdgeQLParserSpec):
def get_parser_spec_module(self):
from .grammar import block
return block
class EdgeQLMigrationBodyParser(EdgeQLParserBase):
class EdgeQLMigrationBodySpec(EdgeQLParserSpec):
def get_parser_spec_module(self):
from .grammar import migration_body
return migration_body
class EdgeQLExtensionPackageBodyParser(EdgeQLParserBase):
class EdgeQLExtensionPackageBodySpec(EdgeQLParserSpec):
def get_parser_spec_module(self):
from .grammar import extension_package_body
return extension_package_body
class EdgeSDLParser(EdgeQLParserBase):
class EdgeSDLSpec(EdgeQLParserSpec):
def get_parser_spec_module(self):
from .grammar import sdldocument
return sdldocument
class EdgeQLParser:
spec: EdgeQLParserSpec
filename: Optional[str]
source: tokenizer.Source
def __init__(self, p: EdgeQLParserSpec):
self.spec = p
self.filename = None
mod = self.spec.get_parser_spec_module()
self.token_map = {}
for (_, token), cls in mod.TokenMeta.token_map.items():
self.token_map[token] = cls
def get_parser_spec(self, allow_rebuild=False):
return self.spec.get_parser_spec(allow_rebuild=allow_rebuild)
def parse(
self,
source: Union[str, tokenizer.Source],
filename: Optional[str] = None,
):
if isinstance(source, str):
source = tokenizer.Source.from_string(source)
self.filename = filename
self.source = source
parser_name = self.spec.__class__.__name__
result, productions = ql_parser.parse(parser_name, source.tokens())
if len(result.errors()) > 0:
# TODO: emit multiple errors
# Heuristic to pick the error:
# - first encountered,
# - Unexpected before Missing,
# - original order.
errs: List[Tuple[str, Tuple[int, Optional[int]]]] = result.errors()
errs.sort(key=lambda e: (e[1][0], -ord(e[0][1])))
error = errs[0]
message, span = error
position = tokenizer.inflate_position(source.text(), span)
raise errors.EdgeQLSyntaxError(message, position=position)
return self._cst_to_ast(result.out(), productions).val
def _cst_to_ast(
self, cst: ql_parser.CSTNode, productions: List[Callable]
) -> Any:
# Converts CST into AST by calling methods from the grammar classes.
#
# This function was originally written as a simple recursion.
# Then I had to unfold it, because it was hitting recursion limit.
# Stack here contains all remaining things to do:
# - CST node means the node has to be processed and pushed onto the
# result stack,
# - production means that all args of production have been processed
# are are ready to be passed to the production method. The result is
# obviously pushed onto the result stack
stack: List[ql_parser.CSTNode | ql_parser.Production] = [cst]
result: List[Any] = []
while len(stack) > 0:
node = stack.pop()
if isinstance(node, ql_parser.CSTNode):
# this would be the body of the original recursion function
if terminal := node.terminal():
# Terminal is simple: just convert to parsing.Token
context = parsing.ParserContext(
name=self.filename,
buffer=self.source.text(),
start=terminal.start(),
end=terminal.end(),
)
result.append(
parsing.Token(
terminal.text(), terminal.value(), context
)
)
elif production := node.production():
# Production needs to first process all args, then
# call the appropriate method.
# (this is all in reverse, because stacks)
stack.append(production)
args = list(production.args())
args.reverse()
stack.extend(args)
else:
raise NotImplementedError(node)
elif isinstance(node, ql_parser.Production):
# production args are done, get them out of result stack
len_args = len(node.args())
split_at = len(result) - len_args
args = result[split_at:]
result = result[0:split_at]
# find correct method to call
production_id = node.id()
production = productions[production_id]
sym = production.lhs.nontermType()
assert len(args) == len(production.rhs)
production.method(sym, *args)
# push into result stack
result.append(sym)
return result.pop()

View file

@ -31,7 +31,6 @@ TRAILING_WS_IN_CONTINUATION = re.compile(r'\\ \s+\n')
class Source:
def __init__(self, text: str, tokens: List[ql_parser.Token]) -> None:
self._cache_key = hashlib.blake2b(text.encode('utf-8')).digest()
self._text = text
@ -67,7 +66,6 @@ class Source:
class NormalizedSource(Source):
def __init__(self, normalized: ql_parser.Entry, text: str) -> None:
self._text = text
self._cache_key = normalized.key()
@ -103,32 +101,70 @@ class NormalizedSource(Source):
return cls(_normalize(text), text)
def inflate_span(
source: str, span: Tuple[int, Optional[int]]
) -> Tuple[ql_parser.SourcePoint, ql_parser.SourcePoint]:
(start, end) = span
source_bytes = source.encode('utf-8')
[start_sp] = ql_parser.SourcePoint.from_offsets(source_bytes, [start])
if end is not None:
[end_sp] = ql_parser.SourcePoint.from_offsets(source_bytes, [end])
else:
end_sp = None
return (start_sp, end_sp)
def inflate_position(
source: str, span: Tuple[int, Optional[int]]
) -> Tuple[int, int, int, Optional[int]]:
(start, end) = inflate_span(source, span)
return (
start.column,
start.line,
start.offset,
end.offset if end else None,
)
def _tokenize(eql: str) -> List[ql_parser.Token]:
try:
return ql_parser.tokenize(eql)
except ql_parser.TokenizerError as e:
message, position = e.args
result = ql_parser.tokenize(eql)
if len(result.errors()) > 0:
# TODO: emit multiple errors
error = result.errors()[0]
message, span = error
position = inflate_position(eql, span)
hint = _derive_hint(eql, message, position)
raise errors.EdgeQLSyntaxError(
message, position=position, hint=hint) from e
raise errors.EdgeQLSyntaxError(message, position=position, hint=hint)
return result.out()
def _normalize(eql: str) -> ql_parser.Entry:
try:
return ql_parser.normalize(eql)
except ql_parser.TokenizerError as e:
message, position = e.args
except ql_parser.SyntaxError as e:
message, span = e.args
position = inflate_position(eql, span)
hint = _derive_hint(eql, message, position)
raise errors.EdgeQLSyntaxError(
message, position=position, hint=hint) from e
message, position=position, hint=hint
) from e
def _derive_hint(
input: str,
message: str,
position: Tuple[int, int, int],
position: Tuple[int, int, int, Optional[int]],
) -> Optional[str]:
_, _, off = position
_, _, off, _ = position
if message.endswith(
r"invalid string literal: invalid escape sequence '\ '"
):

View file

@ -90,7 +90,7 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):
hint: Optional[str] = None,
details: Optional[str] = None,
context=None,
position: Optional[tuple[Optional[int], ...]] = None,
position: Optional[tuple[int, int, int, int | None]] = None,
filename: Optional[str] = None,
token=None,
pgext_code: Optional[str] = None,
@ -125,7 +125,7 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):
def set_filename(self, filename):
self._attrs[FIELD_FILENAME] = filename
def set_linecol(self, line, col):
def set_linecol(self, line: Optional[int], col: Optional[int]):
if line is not None:
self._attrs[FIELD_LINE_START] = str(line)
if col is not None:
@ -143,7 +143,10 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):
def has_source_context(self):
return FIELD_DETAILS in self._attrs
def set_source_context(self, context):
def set_source_context(self, context: Optional[pctx.ParserContext]):
if not context:
return
start = context.start_point
end = context.end_point
ex.replace_context(self, context)
@ -163,17 +166,14 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):
def set_position(
self,
line: Optional[int] = None,
column: Optional[int] = None,
start: Optional[int] = None,
end: Optional[int] = None,
column: int,
line: int,
start: int,
end: Optional[int],
):
self.set_linecol(line, column)
if start is not None:
self._attrs[FIELD_POSITION_START] = str(start)
end = end or start
if end is not None:
self._attrs[FIELD_POSITION_END] = str(end)
self._attrs[FIELD_POSITION_START] = str(start)
self._attrs[FIELD_POSITION_END] = str(end or start)
@property
def line(self):

View file

@ -195,7 +195,6 @@ class BaseSyntaxTest(BaseDocTest):
markup.dump(inast)
# make sure that the AST has context
#
context.ContextValidator().visit(inast)
processed_src = self.ast_to_source(inast)

View file

@ -723,7 +723,7 @@ class EQLFunctionDirective(BaseEQLDirective):
from edb.edgeql import codegen as ql_gen
from edb.edgeql import qltypes
parser = edgeql_parser.EdgeQLBlockParser()
parser = edgeql_parser.EdgeQLBlockSpec().get_parser()
try:
astnode = parser.parse(
f'create function {sig} using SQL function "xxx";')[0]
@ -796,7 +796,7 @@ class EQLConstraintDirective(BaseEQLDirective):
from edb.edgeql import ast as ql_ast
from edb.edgeql import codegen as ql_gen
parser = edgeql_parser.EdgeQLBlockParser()
parser = edgeql_parser.EdgeQLBlockSpec().get_parser()
try:
astnode = parser.parse(
f'create abstract constraint {sig};')[0]

View file

@ -72,4 +72,5 @@ from . import wipe # noqa
from . import gen_test_dumps # noqa
from . import gen_sql_introspection # noqa
from . import gen_rust_ast # noqa
from . import parser_demo # noqa
from .profiling import cli as prof_cli # noqa

299
edb/tools/parser_demo.py Normal file
View file

@ -0,0 +1,299 @@
#
# This source file is part of the EdgeDB open source project.
#
# Copyright 2020-present MagicStack Inc. and the EdgeDB authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from typing import *
from edb.edgeql import ast as qlast
from edb.edgeql import tokenizer
from edb.edgeql.parser import parser as qlparser
import edb._edgeql_parser as ql_parser
from edb.tools.edb import edbcommands
@edbcommands.command("parser-demo")
def main():
for q in QUERIES:
sdl = q.startswith('sdl')
if sdl:
q = q[3:]
try:
# s = tokenizer.NormalizedSource.from_string(q)
source = tokenizer.Source.from_string(q)
except BaseException as e:
print('Error during tokenization:')
print(e)
continue
if sdl:
spec = qlparser.EdgeSDLSpec()
else:
spec = qlparser.EdgeQLBlockSpec()
parser = spec.get_parser()
parser.filename = None
parser.source = source
parser_name = spec.__class__.__name__
result, productions = ql_parser.parse(parser_name, source.tokens())
print('-' * 30)
print()
for index, error in enumerate(result.errors()):
message, span = error
(start, end) = tokenizer.inflate_span(source.text(), span)
print(f'Error [{index+1}/{len(result.errors())}]:')
print(
'\n'.join(
source.text().splitlines()[(start.line - 1) : end.line]
)
)
print(
' ' * (start.column - 1)
+ '^'
+ '-' * (end.column - start.column - 1)
+ ' '
+ message
)
print()
if result.out():
try:
ast = parser._cst_to_ast(result.out(), productions).val
except BaseException:
ast = None
if ast:
print('Recovered AST:')
if isinstance(ast, list):
for x in ast:
x.dump_edgeql()
elif isinstance(ast, qlast.Base):
ast.dump_edgeql()
else:
print(ast)
QUERIES = [
'''
select 1
''',
'''
select User { name, email } filter .name = 'Sully'
''',
'''
SELECT {354.32,
35400000000000.32,
35400000000000000000.32,
3.5432e20,
3.5432e+20,
3.5432e-20,
3.543_2e-20,
354.32e-20,
2_354.32e-20,
0e-999
}
''',
'''
with module cards
for g in (group Card by .element) union (for gi in 0 union (
element := g.key.element,
cst := sum(g.elements.cost + gi),
))
''',
'''
select '10 seconds'
''',
'''SELECT (User.id, User { name := ''',
'''SELECT (false, }]})''',
'''
SELECT User { name, last_name }
WITH u := User SELECT u;
''',
'''
SELECT (false, true false])
''',
'''
for c Card union c.hello
''',
'''
SELECT User id, name }
''',
'''
CREATE TYPE cfg::TestSessionConfig EXTENDING cfg::ConfigObject {
CREATE REQUIRED PROPERTY name -> std::str {
CREATE CONSTRAINT std::exclusive;
}
};
''',
'''
CREATE FUNCTION
std::_gen_series(
`start`: std::int64,
stop: std::int64
) -> SET OF std::int64
{
SET volatility := 'Immutable';
USING SQL FUNCTION 'generate_series';
};
''',
'''
select b"04e3b";
''',
'''
select User { intersect };
''',
'''
create module __std__;
''',
'''
create type Hello {
create property intersect -> str;
create property `__std__` -> str;
};
''',
'''
SELECT
count(
schema::Module
FILTER NOT .builtin AND NOT .name = "default"
) + count(
schema::Object
FILTER .name LIKE "default::%"
) > 0
''',
'''sdl
module test {
function len1(a: str b: str) -> std::str {
using SQL function 'length1'
}
''',
'''
SELECT len('');
''',
'''
SELECT __std__::len({'hello', 'world'});
''',
'''sdl
module test {
alias FooBaz := [1 2];
};
''',
'''
SEL ECT 1
''',
'''
SELECT (
foo: 1,
bar := 3
);
''',
'''
SELECT (
foo: (
bar: 42
)
);
''',
'''
SELECT count(FOR X IN {Foo} UNION X);
''',
'''
SELECT some_agg(User.name) OVER (ORDER BY User.age ASC);
SELECT some_agg(User.name) OVER (
PARTITION BY strlen(User.name)
ORDER BY User.age ASC);
SELECT some_agg(User.name) OVER (
PARTITION BY User.email, User.age
ORDER BY User.age ASC);
SELECT some_agg(User.name) OVER (
PARTITION BY User.email, User.age
ORDER BY User.age ASC THEN User.name ASC);
''',
'''
SELECT Issue{
name,
related_to *-1,
};
''',
'''
SELECT __type__;
''',
'''
SELECT Issue{
name,
related_to *,
};
''',
'''
SELECT Foo {(bar)};
''',
'''
SELECT Foo.__source__;
''',
'''
SELECT Foo.bar@__type__;
''',
'''
SELECT Foo {
__type__.name
};
''',
'''
SELECT INTROSPECT tuple<int64>;
''',
'''
CREATE FUNCTION std::strlen(string: std::str = '1', abc: std::str)
-> std::int64 {};
''',
'''
SELECT Obj.n + random()
''',
'''
CREATE MIGRATION { ;;; CREATE TYPE Foo ;;; CREATE TYPE Bar ;;; };
''',
'''
SELECT (User IS (Named, Text));
''',
'''sdl
module test {
scalar type foobar {
index prop on (__source__);
};
};
''',
'''
INSERT Foo FILTER Foo.bar = 42;
''',
'''sdl
module test {
function some_func($`(`: str = ) ) -> std::str {
using edgeql function 'some_other_func';
}
};
''',
'''
SELECT (a := 1, foo);
''',
'''
CREATE MODULE `__std__`;
''',
]

View file

@ -4336,7 +4336,7 @@ class TestEdgeQLDDL(tb.DDLTestCase):
async def test_edgeql_ddl_function_20(self):
with self.assertRaisesRegex(
edgedb.EdgeQLSyntaxError,
r"Unexpected ';'"):
"Unexpected ';'"):
await self.con.execute(r'''
CREATE FUNCTION ddlf_20(f: int64) -> int64
@ -12337,7 +12337,7 @@ type default::Foo {
with self.assertRaisesRegex(
edgedb.SchemaDefinitionError,
r"possibly more than one element returned by the index expression",
_line=4, _col=34
_line=4, _col=38
):
await self.con.execute(r"""
CREATE TYPE Foo {
@ -12350,7 +12350,7 @@ type default::Foo {
with self.assertRaisesRegex(
edgedb.SchemaDefinitionError,
r"possibly more than one element returned by the index expression",
_line=5, _col=34
_line=5, _col=38
):
await self.con.execute(r"""
CREATE TYPE Foo {
@ -12364,7 +12364,7 @@ type default::Foo {
with self.assertRaisesRegex(
edgedb.SchemaDefinitionError,
r"possibly more than one element returned by the index expression",
_line=5, _col=34
_line=5, _col=38
):
await self.con.execute(r"""
CREATE TYPE Foo {
@ -12770,7 +12770,7 @@ CREATE MIGRATION m14i24uhm6przo3bpl2lqndphuomfrtq3qdjaqdg6fza7h6m7tlbra
# work, and there is a commented bit below to test that.
async with self.assertRaisesRegexTx(
edgedb.QueryError,
"Unexpected keyword 'global'"):
"Unexpected keyword 'GLOBAL'"):
await self.con.execute('''
CREATE MIGRATION
{

View file

@ -175,7 +175,7 @@ class TestEdgeQLExplain(tb.QueryTestCase):
"contexts": [
{
"buffer_idx": 0,
"end": 116,
"end": 115,
"start": 74,
}
],
@ -278,7 +278,7 @@ class TestEdgeQLExplain(tb.QueryTestCase):
"contexts": [
{
"buffer_idx": 0,
"end": 174,
"end": 173,
"start": 134,
},
],

View file

@ -7238,7 +7238,7 @@ aa \
edgedb.QueryError,
r'possibly more than one element returned by an expression '
r'where only singletons are allowed',
_position=29):
_position=35):
await self.con.execute('''\
SELECT Issue LIMIT LogEntry.spent_time;
@ -7249,7 +7249,7 @@ aa \
edgedb.QueryError,
r'possibly more than one element returned by an expression '
r'where only singletons are allowed',
_position=29):
_position=36):
await self.con.execute('''\
SELECT Issue OFFSET LogEntry.spent_time;
@ -7695,7 +7695,7 @@ aa \
async def test_edgeql_expr_error_after_extraction_01(self):
with self.assertRaisesRegex(
edgedb.QueryError,
"Unexpected \"'1'\""):
"Unexpected ''1''"):
await self.con.query("""
SELECT '''1''';

View file

@ -450,7 +450,7 @@ class TestInsert(tb.QueryTestCase):
async def test_edgeql_insert_nested_07(self):
with self.assertRaisesRegex(
edgedb.EdgeQLSyntaxError,
"Unexpected 'Subordinate'"):
r"Missing '\{'"):
await self.con.execute('''
INSERT InsertTest {
subordinates: Subordinate {

View file

@ -54,7 +54,7 @@ class TestEdgeQLIRScopeTree(tb.BaseEdgeQLCompilerTest):
@tb.must_fail(errors.QueryError,
"reference to 'User.name' changes the interpretation",
line=3, col=9)
line=3, col=16)
def test_edgeql_ir_scope_tree_bad_01(self):
"""
SELECT User.deck
@ -63,7 +63,7 @@ class TestEdgeQLIRScopeTree(tb.BaseEdgeQLCompilerTest):
@tb.must_fail(errors.QueryError,
"reference to 'User' changes the interpretation",
line=3, col=9)
line=3, col=16)
def test_edgeql_ir_scope_tree_bad_02(self):
"""
SELECT User.deck

View file

@ -2227,7 +2227,7 @@ class TestEdgeQLSelect(tb.QueryTestCase):
edgedb.QueryError,
"cannot redefine the cardinality of link 'related_to': it is "
"defined as 'multi' in the base object type 'default::Issue'",
_position=73,
_position=74,
):
await self.con.execute("""
SELECT Issue {
@ -2253,7 +2253,7 @@ class TestEdgeQLSelect(tb.QueryTestCase):
edgedb.QueryError,
"cannot redefine link 'status' as optional: it is "
"defined as required in the base object type 'default::Issue'",
_position=71,
_position=72,
):
await self.con.execute("""
SELECT Issue {

File diff suppressed because it is too large Load diff

View file

@ -2228,7 +2228,7 @@ class TestUpdate(tb.QueryTestCase):
edgedb.QueryError,
"cannot update link 'readonly_tag': "
"it is declared as read-only",
_position=147,
_position=148,
):
await self.con.execute(r'''
UPDATE UpdateTest

View file

@ -1711,7 +1711,7 @@ class TestEdgeQLVolatility(tb.QueryTestCase):
with self.assertRaisesRegex(
edgedb.QueryError,
"can not take cross product of volatile operation",
_position=36):
_position=37):
await self.con.execute(
r"""
SELECT {1,2} + (FOR x in {1,2,3} UNION (x*random()))
@ -1722,7 +1722,7 @@ class TestEdgeQLVolatility(tb.QueryTestCase):
with self.assertRaisesRegex(
edgedb.QueryError,
"can not take cross product of volatile operation",
_position=36):
_position=37):
await self.con.execute(
r"""
SELECT ({1,2}, (INSERT Obj { n := 100 }))
@ -1733,7 +1733,7 @@ class TestEdgeQLVolatility(tb.QueryTestCase):
with self.assertRaisesRegex(
edgedb.QueryError,
"can not take cross product of volatile operation",
_position=64):
_position=65):
await self.con.execute(
r"""
SELECT ({1,2},

View file

@ -396,7 +396,7 @@ class TestSchema(tb.BaseSchemaLoadTest):
@tb.must_fail(errors.InvalidPropertyTargetError,
"invalid property type: expected a scalar type, "
"or a scalar collection, got object type 'test::Object'",
position=73)
position=74)
def test_schema_bad_prop_02(self):
"""
type Object {
@ -1433,7 +1433,7 @@ class TestSchema(tb.BaseSchemaLoadTest):
@tb.must_fail(errors.SchemaDefinitionError,
"missing value for required property",
line=9, col=42)
line=10, col=25)
def test_schema_rewrite_missing_required_01(self):
"""
type Project {

View file

@ -36,7 +36,7 @@ class SchemaSyntaxTest(tb.BaseSyntaxTest):
@classmethod
def get_parser(cls):
return ql_parser.EdgeSDLParser()
return ql_parser.EdgeSDLSpec().get_parser()
class TestEdgeSchemaParser(SchemaSyntaxTest):
@ -265,8 +265,8 @@ class TestEdgeSchemaParser(SchemaSyntaxTest):
};
"""
@tb.must_fail(errors.EdgeQLSyntaxError, "Unexpected keyword 'Commit'",
line=3, col=18)
@tb.must_fail(errors.EdgeQLSyntaxError, "Missing identifier",
line=3, col=17)
def test_eschema_syntax_type_11(self):
"""
module test {
@ -748,7 +748,7 @@ class TestEdgeSchemaParser(SchemaSyntaxTest):
"""
@tb.must_fail(errors.EdgeQLSyntaxError,
r"Expected 'ON', but got 'prop' instead", line=4, col=23)
r"Missing ':='", line=4, col=22)
def test_eschema_syntax_index_03(self):
"""
module test {
@ -757,6 +757,8 @@ class TestEdgeSchemaParser(SchemaSyntaxTest):
};
};
"""
# XXX: error recovery quality regression
# Expected 'ON', but got 'prop' instead
def test_eschema_syntax_index_04(self):
"""
@ -876,8 +878,8 @@ type LogEntry extending OwnedObject, Text {
"""
@tb.must_fail(errors.EdgeQLSyntaxError,
r"Unexpected 'scalar'",
line=4, col=9)
r"Missing ';'",
line=2, col=55)
def test_eschema_syntax_ws_03(self):
"""
scalar type test::newScalarType0 extending str#:
@ -966,7 +968,7 @@ type LogEntry extending OwnedObject, Text {
};
"""
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'final'",
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'FINAL'",
line=3, col=13)
def test_eschema_syntax_scalar_07(self):
"""
@ -1043,7 +1045,7 @@ type LogEntry extending OwnedObject, Text {
"""
@tb.must_fail(errors.EdgeQLSyntaxError,
r"Unexpected 'delegated'",
r"Unexpected keyword 'DELEGATED'",
line=3, col=13)
def test_eschema_syntax_constraint_02(self):
"""
@ -1112,7 +1114,7 @@ type LogEntry extending OwnedObject, Text {
"""
@tb.must_fail(errors.EdgeQLSyntaxError,
r"Unexpected 'constraint'",
r"Unexpected keyword 'CONSTRAINT'",
line=4, col=26)
def test_eschema_syntax_constraint_07(self):
"""
@ -1135,7 +1137,7 @@ type LogEntry extending OwnedObject, Text {
};
"""
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'constraint'",
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'CONSTRAINT'",
line=3, col=13)
def test_eschema_syntax_constraint_09(self):
"""
@ -1198,7 +1200,7 @@ abstract property test::foo {
};
"""
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'property'",
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'PROPERTY'",
line=3, col=13)
def test_eschema_syntax_property_05(self):
"""
@ -1410,7 +1412,7 @@ abstract property test::foo {
};
"""
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'link'",
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'LINK'",
line=3, col=13)
def test_eschema_syntax_link_11(self):
"""
@ -1626,7 +1628,7 @@ abstract property test::foo {
def test_eschema_syntax_function_12(self):
"""
module test {
function some_func($`(`: str = ) ) -> std::str {
function some_func($`(`: str = () ) -> std::str {
using edgeql function 'some_other_func';
}
};
@ -1770,10 +1772,8 @@ abstract property test::foo {
"""
@tb.must_fail(errors.EdgeQLSyntaxError,
r'Unexpected token:.+b',
hint=r"It appears that a ',' is missing in a list of "
r"arguments before 'b'",
line=3, col=34)
r"Missing ','",
line=3, col=33)
def test_eschema_syntax_function_21(self):
"""
module test {
@ -1834,10 +1834,8 @@ abstract property test::foo {
"""
@tb.must_fail(errors.EdgeQLSyntaxError,
r'Unexpected token:.+baz',
hint=r"It appears that a ',' is missing in a shape "
r"before 'baz'",
line=5, col=17)
r"Missing ','",
line=4, col=25)
def test_eschema_syntax_alias_04(self):
"""
module test {
@ -1850,10 +1848,8 @@ abstract property test::foo {
"""
@tb.must_fail(errors.EdgeQLSyntaxError,
r'Unexpected token:.+2',
hint=r"It appears that a ',' is missing in a tuple "
r"before '2'",
line=3, col=32)
r"Missing ','",
line=3, col=31)
def test_eschema_syntax_alias_05(self):
"""
module test {
@ -1862,10 +1858,8 @@ abstract property test::foo {
"""
@tb.must_fail(errors.EdgeQLSyntaxError,
r'Unexpected token:.+2',
hint=r"It appears that a ',' is missing in an array "
r"before '2'",
line=3, col=32)
r"Missing ','",
line=3, col=31)
def test_eschema_syntax_alias_06(self):
"""
module test {
@ -1948,7 +1942,7 @@ abstract property test::foo {
"""
@tb.must_fail(errors.EdgeQLSyntaxError,
r"Unexpected keyword 'extending'", line=3, col=46)
r"Unexpected keyword 'EXTENDING'", line=3, col=46)
def test_eschema_syntax_annotation_14(self):
"""
module test {
@ -1956,7 +1950,7 @@ abstract property test::foo {
};
"""
@tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'annotation'",
@tb.must_fail(errors.EdgeQLSyntaxError, r"Missing keyword 'ABSTRACT'",
line=2, col=1)
def test_eschema_syntax_annotation_15(self):
"""

View file

@ -109,11 +109,11 @@ class TestServerProto(tb.QueryTestCase):
await self.con.query('select syntax error')
with self.assertRaisesRegex(edgedb.EdgeQLSyntaxError,
'Unexpected end of line'):
r"Missing '\)'"):
await self.con.query('select (')
with self.assertRaisesRegex(edgedb.EdgeQLSyntaxError,
'Unexpected end of line'):
r"Missing '\)'"):
await self.con.query_json('select (')
for _ in range(10):