Parser error recovery (#5693)

Co-authored-by: Michael J. Sullivan <sully@msully.net>
2024-09-16 18:59:05 +00:00 · 2023-07-12 05:54:41 +02:00 · 2023-07-12 05:54:41 +02:00 · 6f6b4cd117
commit 6f6b4cd117
parent 84d7875481
43 changed files with 2604 additions and 2087 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -11,6 +11,12 @@ dependencies = [
 "memchr",
 ]

+[[package]]
+name = "append-only-vec"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5608767d94038891df4c7bb82f6b1beb55fe3d204735985e20de329bc35d5fee"
+
 [[package]]
 name = "ascii"
 version = "0.9.3"
@ -38,6 +44,7 @@ dependencies = [
 "num-bigint 0.4.3",
 "num-integer",
 "num-traits",
+ "serde",
 ]

 [[package]]
@ -66,9 +73,9 @@ dependencies = [

 [[package]]
 name = "bumpalo"
-version = "3.12.2"
+version = "3.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c6ed94e98ecff0c12dd1b04c15ec0d7d9458ca8fe806cea6f12954efe74c63b"
+checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"

 [[package]]
 name = "byteorder"
@ -203,13 +210,18 @@ dependencies = [
 name = "edgeql-parser"
 version = "0.1.0"
 dependencies = [
+ "append-only-vec",
 "base32",
 "bigdecimal",
+ "bumpalo",
 "cpython",
 "edgeql-parser-derive",
 "indexmap",
 "memchr",
+ "num-bigint 0.3.3",
+ "phf",
 "serde",
+ "serde_json",
 "sha2",
 "snafu",
 "thiserror",
@ -236,7 +248,11 @@ dependencies = [
 "cpython",
 "edgedb-protocol",
 "edgeql-parser",
+ "indexmap",
 "num-bigint 0.4.3",
+ "rmp-serde",
+ "serde",
+ "serde_json",
 ]

 [[package]]
@ -298,9 +314,9 @@ checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"

 [[package]]
 name = "libc"
-version = "0.2.144"
+version = "0.2.147"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
+checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"

 [[package]]
 name = "log"
@ -328,6 +344,18 @@ dependencies = [
 "num-traits",
 ]

+[[package]]
+name = "num-bigint"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f6f7833f2cbf2360a6cfd58cd41a53aa7a90bd4c202f5b1c7dd2ed73c57b2c3"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+ "serde",
+]
+
 [[package]]
 name = "num-bigint"
 version = "0.4.3"
@ -379,6 +407,48 @@ version = "1.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79"

+[[package]]
+name = "phf"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c"
+dependencies = [
+ "phf_macros",
+ "phf_shared",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf"
+dependencies = [
+ "phf_shared",
+ "rand",
+]
+
+[[package]]
+name = "phf_macros"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92aacdc5f16768709a569e913f7451034034178b05bdc8acda226659a3dccc66"
+dependencies = [
+ "phf_generator",
+ "phf_shared",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676"
+dependencies = [
+ "siphasher",
+]
+
 [[package]]
 name = "pretty_assertions"
 version = "1.3.0"
@ -419,6 +489,21 @@ dependencies = [
 "proc-macro2",
 ]

+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+
 [[package]]
 name = "regex"
 version = "1.8.1"
@ -436,6 +521,28 @@ version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c"

+[[package]]
+name = "rmp"
+version = "0.8.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44519172358fd6d58656c86ab8e7fbc9e1490c3e8f14d35ed78ca0dd07403c9f"
+dependencies = [
+ "byteorder",
+ "num-traits",
+ "paste",
+]
+
+[[package]]
+name = "rmp-serde"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5b13be192e0220b8afb7222aa5813cb62cc269ebb5cac346ca6487681d2913e"
+dependencies = [
+ "byteorder",
+ "rmp",
+ "serde",
+]
+
 [[package]]
 name = "ryu"
 version = "1.0.13"
@ -468,6 +575,7 @@ version = "1.0.96"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
 dependencies = [
+ "indexmap",
 "itoa",
 "ryu",
 "serde",
@ -484,6 +592,12 @@ dependencies = [
 "digest",
 ]

+[[package]]
+name = "siphasher"
+version = "0.3.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
+
 [[package]]
 name = "snafu"
 version = "0.7.4"
@ -601,9 +715,9 @@ checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"

 [[package]]
 name = "wasm-bindgen"
-version = "0.2.86"
+version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73"
+checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
 dependencies = [
 "cfg-if",
 "serde",
@ -613,9 +727,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.86"
+version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb"
+checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
 dependencies = [
 "bumpalo",
 "log",
@ -628,9 +742,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.86"
+version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258"
+checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
 dependencies = [
 "quote",
 "wasm-bindgen-macro-support",
@ -638,9 +752,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.86"
+version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
+checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
 dependencies = [
 "proc-macro2",
 "quote",
@ -651,9 +765,9 @@ dependencies = [

 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.86"
+version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
+checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"

 [[package]]
 name = "winapi"
--- a/edb/common/parsing.py
+++ b/edb/common/parsing.py
@ -25,23 +25,14 @@ import logging
 import os
 import sys
 import types
-import re

 import parsing

-from edb.common.exceptions import add_context, get_context
-from edb.common import context as pctx
-from edb.edgeql import tokenizer
-from edb.errors import EdgeQLSyntaxError
-from edb import _edgeql_parser as ql_parser
-
-if TYPE_CHECKING:
-    from edb.edgeql.parser.grammar import rust_lexer
+from edb.common import context as pctx, debug

 ParserContext = pctx.ParserContext

 logger = logging.getLogger('edb.common.parsing')
-TRAILING_WS_IN_CONTINUATION = re.compile(r'\\ \s+\n')


 class ParserSpecIncompatibleError(Exception):
@ -131,17 +122,6 @@ def inline(argument_index: int):
    return decorator


-def make_inlining_func(arg_index: int):
-    """Makes a parser production handler which simply inlines an argument."""
-    # TODO: remove this when Rust parser is merged
-
-    def wrapper(obj, *args, **kwargs):
-        obj.val = args[arg_index].val
-        return obj
-
-    return wrapper
-
-
 class NontermMeta(type):
    def __new__(mcls, name, bases, dct):
        result = super().__new__(mcls, name, bases, dct)
@ -171,13 +151,7 @@ class NontermMeta(type):
                    attr = lambda self, *args, meth=attr: meth(self, *args)
                    attr.__doc__ = doc

-                if inline_index is not None:
-                    # TODO: remove this when Rust parser is merged
-                    a = make_inlining_func(inline_index)
-                else:
-                    a = attr
-
-                a = pctx.has_context(a)
+                a = pctx.has_context(attr)

                a.__doc__ = attr.__doc__
                a.inline_index = inline_index
@ -308,70 +282,14 @@ class Precedence(parsing.Precedence, assoc='fail', metaclass=PrecedenceMeta):
    pass


-class ParserError(Exception):
-    def __init__(
-            self, msg=None, *, hint=None, details=None, token=None, line=None,
-            col=None, expr=None, context=None):
-        if msg is None:
-            msg = 'syntax error at or near "%s"' % token
-        super().__init__(msg, hint=hint, details=details)
-
-        self.token = token
-        if line is not None:
-            self.line = line
-        if col is not None:
-            self.col = col
-        self.expr = expr
-        if context:
-            add_context(self, context)
-            if line is None and col is None:
-                self.line = context.start.line
-                self.col = context.start.column
-
-    @property
-    def context(self):
-        try:
-            return get_context(self, pctx.ParserContext)
-        except LookupError:
-            return None
-
-
-def _derive_hint(
-    input: str,
-    message: str,
-    position: Tuple[int, int, int],
-) -> Optional[str]:
-    _, _, off = position
-    if message == r"invalid string literal: invalid escape sequence '\ '":
-        if TRAILING_WS_IN_CONTINUATION.search(input[off:]):
-            return "consider removing trailing whitespace"
-    return None
-
-
-class Parser:
+class ParserSpec:
    parser_spec: ClassVar[parsing.Spec | None]
-    lexer: Optional[rust_lexer.EdgeQLLexer]

    def __init__(self, **parser_data):
-        self.lexer = None
-        self.parser = None
        self.parser_data = parser_data

-    def cleanup(self):
-        self.__class__.parser_spec = None
-        self.__class__.lexer_spec = None
-        self.lexer = None
-        self.parser = None
-
    def get_debug(self):
-        return False
-
-    def get_exception(self, native_err, context, token=None):
-        if not isinstance(native_err, ParserError):
-            return ParserError(native_err.args[0],
-                               context=context, token=token)
-        else:
-            return native_err
+        return debug.flags.edgeql_parser

    def get_parser_spec_module(self) -> types.ModuleType:
        raise NotImplementedError
@ -421,96 +339,3 @@ class Parser:
        return os.path.join(
            os.path.dirname(mod.__file__),
            mod.__name__.rpartition('.')[2] + '.' + type)
-
-    def get_lexer(self):
-        """Return an initialized lexer.
-
-        The lexer must implement 'setinputstr' and 'token' methods.
-        A lexer derived from edb.common.lexer.Lexer will satisfy these
-        criteria.
-        """
-        raise NotImplementedError
-
-    def reset_parser(
-        self,
-        input: Union[str, tokenizer.Source],
-        filename: Optional[str]=None
-    ):
-        if not self.parser:
-            self.lexer = self.get_lexer()
-            self.parser = parsing.Lr(self.get_parser_spec())
-            self.parser.parser_data = self.parser_data
-            self.parser.verbose = self.get_debug()
-
-        self.parser.reset()
-        assert self.lexer
-        self.lexer.setinputstr(input, filename=filename)
-
-    def convert_lex_token(self, mod: Any, tok: ql_parser.Token) -> Token:
-        token_cls = mod.TokenMeta.for_lex_token(tok.kind())
-        return token_cls(tok.text(), tok.value(), self.context(tok))
-
-    def parse(
-        self,
-        input: Union[str, tokenizer.Source],
-        filename: Optional[str] = None
-    ):
-        try:
-            self.reset_parser(input, filename=filename)
-            assert self.lexer
-            mod = self.get_parser_spec_module()
-
-            while tok := self.lexer.token():
-                token = self.convert_lex_token(mod, tok)
-                if token is None:
-                    continue
-
-                self.parser.token(token)
-
-            self.parser.eoi()
-
-        except ql_parser.TokenizerError as e:
-            message, position = e.args
-
-            assert self.lexer
-            hint = _derive_hint(self.lexer.inputstr, message, position)
-
-            raise EdgeQLSyntaxError(
-                message, context=self.context(pos=position), hint=hint
-            ) from e
-
-        except parsing.UnexpectedToken as e:
-            raise self.get_exception(
-                e, context=self.context(tok), token=tok
-            ) from e
-
-        except ParserError as e:
-            raise self.get_exception(e, context=e.context) from e
-
-        return self.parser.start[0].val
-
-    def context(self, tok=None, pos: Optional[Tuple[int, int, int]] = None):
-        lex = self.lexer
-        assert lex
-        name = lex.filename if lex.filename else '<string>'
-
-        if tok is None:
-            if pos is None:
-                pos = lex.end_of_input
-            context = pctx.ParserContext(
-                name=name, buffer=lex.inputstr,
-                start=pos[2], end=pos[2])
-        else:
-            context = pctx.ParserContext(
-                name=name, buffer=lex.inputstr,
-                start=tok.start()[2],
-                end=tok.end()[2])
-
-        return context
-
-
-def line_col_from_char_offset(source, position):
-    line = source[:position].count('\n') + 1
-    col = source.rfind('\n', 0, position)
-    col = position if col == -1 else position - col
-    return line, col
--- a/edb/edgeql-parser/Cargo.toml
+++ b/edb/edgeql-parser/Cargo.toml
@ -8,7 +8,8 @@ edition = "2021"

 [dependencies]
 base32 = "0.4.0"
-bigdecimal = "0.3.0"
+bigdecimal = { version = "0.3.0", features = ["serde"] }
+num-bigint = { version = "0.3.0", features = ["serde"] }
 sha2 = "0.10.2"
 snafu = "0.7.0"
 memchr = "2.5.0"
@ -21,10 +22,14 @@ unicode-width = "0.1.8"
 edgeql-parser-derive = { path = "edgeql-parser-derive", optional = true }
 cpython = { version = "0.7.0", optional = true }
 indexmap = "1.9.3"
+serde_json = {version="1.0", features=["preserve_order"]}
+bumpalo = {version="3.13.0", features=["collections"]}
+phf = { version = "0.11.1", features = ["macros"] }
+append-only-vec = "0.1.2"

 [features]
 default = []
 wasm-lexer = ["wasm-bindgen", "serde"]
-python = ["cpython", "edgeql-parser-derive"]
+python = ["cpython", "serde", "edgeql-parser-derive"]

 [lib]
--- a/edb/edgeql-parser/edgeql-parser-python/Cargo.toml
+++ b/edb/edgeql-parser/edgeql-parser-python/Cargo.toml
@ -7,11 +7,15 @@ rust-version = "1.59"
 edition = "2021"

 [dependencies]
-edgeql-parser = {path = ".."}
+edgeql-parser = {path = "..", features=["serde"]}
 bytes = "1.0.1"
 num-bigint = "0.4.3"
 bigdecimal = "0.3.0"
 blake2 = "0.10.4"
+serde = {version="1.0", features=["derive"]}
+serde_json = "1.0"
+rmp-serde = "1.1.1"
+indexmap = "1.9.3"

 [dependencies.edgedb-protocol]
 git = "https://github.com/edgedb/edgedb-rust"
--- a/edb/edgeql-parser/edgeql-parser-python/src/errors.rs
+++ b/edb/edgeql-parser/edgeql-parser-python/src/errors.rs
@ -1,65 +1,92 @@
-use cpython::{PyObject, ToPyObject, Python, PyErr, PythonObject, PyType};
-use cpython::exc::Exception;
 use crate::cpython::PythonObjectWithTypeObject;

+use cpython::exc::Exception;
+use cpython::{
+    PyClone, PyErr, PyList, PyObject, PyResult, PyType, Python, PythonObject, ToPyObject,
+};
+use edgeql_parser::tokenizer::Error;

 // can't use py_exception macro because that fails on dotted module name
-pub struct TokenizerError(PyObject);
+pub struct SyntaxError(PyObject);

-pyobject_newtype!(TokenizerError);
+pyobject_newtype!(SyntaxError);

-impl TokenizerError {
+impl SyntaxError {
    pub fn new<T: ToPyObject>(py: Python, args: T) -> PyErr {
-        PyErr::new::<TokenizerError, T>(py, args)
+        PyErr::new::<SyntaxError, T>(py, args)
    }
 }

-impl cpython::PythonObjectWithCheckedDowncast for TokenizerError {
+impl cpython::PythonObjectWithCheckedDowncast for SyntaxError {
    #[inline]
-    fn downcast_from(py: Python, obj: PyObject)
-        -> Result<TokenizerError, cpython::PythonObjectDowncastError>
-    {
-        if TokenizerError::type_object(py).is_instance(py, &obj) {
+    fn downcast_from(
+        py: Python,
+        obj: PyObject,
+    ) -> Result<SyntaxError, cpython::PythonObjectDowncastError> {
+        if SyntaxError::type_object(py).is_instance(py, &obj) {
            Ok(unsafe { PythonObject::unchecked_downcast_from(obj) })
        } else {
-            Err(cpython::PythonObjectDowncastError::new(py,
-                "TokenizerError",
-                TokenizerError::type_object(py),
+            Err(cpython::PythonObjectDowncastError::new(
+                py,
+                "SyntaxError",
+                SyntaxError::type_object(py),
            ))
        }
    }

    #[inline]
-    fn downcast_borrow_from<'a, 'p>(py: Python<'p>, obj: &'a PyObject)
-        -> Result<&'a TokenizerError, cpython::PythonObjectDowncastError<'p>>
-    {
-        if TokenizerError::type_object(py).is_instance(py, obj) {
+    fn downcast_borrow_from<'a, 'p>(
+        py: Python<'p>,
+        obj: &'a PyObject,
+    ) -> Result<&'a SyntaxError, cpython::PythonObjectDowncastError<'p>> {
+        if SyntaxError::type_object(py).is_instance(py, obj) {
            Ok(unsafe { PythonObject::unchecked_downcast_borrow_from(obj) })
        } else {
-            Err(cpython::PythonObjectDowncastError::new(py,
-                "TokenizerError",
-                TokenizerError::type_object(py),
+            Err(cpython::PythonObjectDowncastError::new(
+                py,
+                "SyntaxError",
+                SyntaxError::type_object(py),
            ))
        }
    }
 }

-impl cpython::PythonObjectWithTypeObject for TokenizerError {
+impl cpython::PythonObjectWithTypeObject for SyntaxError {
    #[inline]
    fn type_object(py: Python) -> PyType {
        unsafe {
-            static mut TYPE_OBJECT: *mut cpython::_detail::ffi::PyTypeObject
-                = 0 as *mut cpython::_detail::ffi::PyTypeObject;
+            static mut TYPE_OBJECT: *mut cpython::_detail::ffi::PyTypeObject =
+                0 as *mut cpython::_detail::ffi::PyTypeObject;

            if TYPE_OBJECT.is_null() {
                TYPE_OBJECT = PyErr::new_type(
                    py,
-                    "edb._edgeql_parser.TokenizerError",
+                    "edb._edgeql_parser.SyntaxError",
                    Some(PythonObject::into_object(py.get_type::<Exception>())),
-                    None).as_type_ptr();
+                    None,
+                )
+                .as_type_ptr();
            }

            PyType::from_type_ptr(py, TYPE_OBJECT)
        }
    }
 }
+
+py_class!(pub class ParserResult |py| {
+    data _out: PyObject;
+    data _errors: PyList;
+
+    def out(&self) -> PyResult<PyObject> {
+        Ok(self._out(py).clone_ref(py))
+    }
+    def errors(&self) -> PyResult<PyList> {
+        Ok(self._errors(py).clone_ref(py))
+    }
+});
+
+pub fn parser_error_into_tuple(py: Python, error: Error) -> PyObject {
+    (error.message, (error.span.start, error.span.end))
+        .into_py_object(py)
+        .into_object()
+}
--- a/edb/edgeql-parser/edgeql-parser-python/src/hash.rs
+++ b/edb/edgeql-parser/edgeql-parser-python/src/hash.rs
@ -5,8 +5,7 @@ use cpython::exc::RuntimeError;

 use edgeql_parser::hash;

-use crate::errors::TokenizerError;
-use crate::pynormalize::py_pos;
+use crate::errors::SyntaxError;


 py_class!(pub class Hasher |py| {
@ -26,7 +25,7 @@ py_class!(pub class Hasher |py| {
        hasher.add_source(&text)
            .map_err(|e| match e {
                hash::Error::Tokenizer(msg, pos) => {
-                    TokenizerError::new(py, (msg, py_pos(py, &pos)))
+                    SyntaxError::new(py, (msg, (pos.offset, py.None())))
                }
            })?;
        Ok(py.None())
--- a/edb/edgeql-parser/edgeql-parser-python/src/lib.rs
+++ b/edb/edgeql-parser/edgeql-parser-python/src/lib.rs
@ -1,20 +1,22 @@
 #[macro_use]
 extern crate cpython;

-use cpython::PyString;
+use cpython::{PyObject, PyString};

 mod errors;
 mod hash;
 mod keywords;
 pub mod normalize;
+mod parser;
 mod position;
 mod pynormalize;
 mod tokenizer;

-use errors::TokenizerError;
+use errors::{SyntaxError, ParserResult};
+use parser::{parse, CSTNode, Production};
 use position::{offset_of_line, SourcePoint};
 use pynormalize::normalize;
-use tokenizer::{get_unpickle_fn, tokenize, Token};
+use tokenizer::{get_fn_unpickle_token, tokenize, OpaqueToken};

 py_module_initializer!(
    _edgeql_parser,
@ -22,6 +24,7 @@ py_module_initializer!(
    PyInit__edgeql_parser,
    |py, m| {
        tokenizer::init_module(py);
+        parser::init_module();
        let keywords = keywords::get_keywords(py)?;
        m.add(
            py,
@ -30,9 +33,10 @@ py_module_initializer!(
        )?;

        m.add(py, "tokenize", py_fn!(py, tokenize(data: &PyString)))?;
-        m.add(py, "_unpickle_token", get_unpickle_fn(py))?;
-        m.add(py, "Token", py.get_type::<Token>())?;
-        m.add(py, "TokenizerError", py.get_type::<TokenizerError>())?;
+        m.add(py, "_unpickle_token", get_fn_unpickle_token(py))?;
+        m.add(py, "Token", py.get_type::<OpaqueToken>())?;
+        m.add(py, "SyntaxError", py.get_type::<SyntaxError>())?;
+        m.add(py, "ParserResult", py.get_type::<ParserResult>())?;
        m.add(py, "Entry", py.get_type::<pynormalize::Entry>())?;
        m.add(py, "SourcePoint", py.get_type::<SourcePoint>())?;
        m.add(py, "normalize", py_fn!(py, normalize(query: &PyString)))?;
@ -46,6 +50,13 @@ py_module_initializer!(
        m.add(py, "partial_reserved_keywords", keywords.partial)?;
        m.add(py, "future_reserved_keywords", keywords.future)?;
        m.add(py, "current_reserved_keywords", keywords.current)?;
+        m.add(
+            py,
+            "parse",
+            py_fn!(py, parse(parser_name: &PyString, data: PyObject)),
+        )?;
+        m.add(py, "CSTNode", py.get_type::<CSTNode>())?;
+        m.add(py, "Production", py.get_type::<Production>())?;
        Ok(())
    }
 );
--- a/edb/edgeql-parser/edgeql-parser-python/src/normalize.rs
+++ b/edb/edgeql-parser/edgeql-parser-python/src/normalize.rs
@ -1,7 +1,9 @@
+use std::borrow::Cow;
 use std::collections::BTreeSet;

-use edgeql_parser::tokenizer::{Kind, Tokenizer, Token, Value};
+use edgeql_parser::keywords::Keyword;
 use edgeql_parser::position::{Pos, Span};
+use edgeql_parser::tokenizer::{Kind, Token, Tokenizer, Value};

 use blake2::{Blake2b512, Digest};

@ -10,76 +12,29 @@ pub struct Variable {
    pub value: Value,
 }

-pub struct Entry<'a> {
+pub struct Entry {
    pub processed_source: String,
    pub hash: [u8; 64],
-    pub tokens: Vec<Token<'a>>,
+    pub tokens: Vec<Token<'static>>,
    pub variables: Vec<Vec<Variable>>,
-    pub end_pos: Pos,
    pub named_args: bool,
    pub first_arg: Option<usize>,
 }

 #[derive(Debug)]
 pub enum Error {
-    Tokenizer(String, Pos),
+    Tokenizer(String, u64),
    Assertion(String, Pos),
 }

-fn push_var<'x>(res: &mut Vec<Token<'x>>, module: &'x str, typ: &'x str,
-    var: String, span: Span)
-{
-    res.push(Token {kind: Kind::OpenParen, text: "(".into(), span, value: None});
-    res.push(Token {kind: Kind::Less, text: "<".into(), span, value: None});
-    res.push(Token {kind: Kind::Ident, text: module.into(), span, value: None});
-    res.push(Token {kind: Kind::Namespace, text: "::".into(), span, value: None});
-    res.push(Token {kind: Kind::Ident, text: typ.into(), span,
-        value: Some(Value::String(typ.to_string())),
-    });
-    res.push(Token {kind: Kind::Greater, text: ">".into(), span, value: None});
-    res.push(Token {kind: Kind::Argument, text: var.into(), span, value: None});
-    res.push(Token {kind: Kind::CloseParen, text: ")".into(), span, value: None});
-}
-
-fn scan_vars<'x, 'y: 'x, I>(tokens: I) -> Option<(bool, usize)>
-    where I: IntoIterator<Item=&'x Token<'y>>,
-{
-    let mut max_visited = None::<usize>;
-    let mut names = BTreeSet::new();
-    for t in tokens {
-        if t.kind == Kind::Argument {
-            if let Ok(v) = t.text[1..].parse() {
-                if max_visited.map(|old| v > old).unwrap_or(true) {
-                    max_visited = Some(v);
-                }
-            } else {
-                names.insert(&t.text[..]);
-            }
-        }
-    }
-    if names.is_empty() {
-        let next = max_visited.map(|x| x.checked_add(1)).unwrap_or(Some(0))?;
-        Some((false, next))
-    } else if max_visited.is_some() {
-        return None  // mixed arguments
-    } else {
-        Some((true, names.len()))
-    }
-}
-
-fn hash(text: &str) -> [u8; 64] {
-    let mut result = [0u8; 64];
-    result.copy_from_slice(&Blake2b512::new_with_prefix(text.as_bytes())
-                           .finalize());
-    return result;
-}
-
 pub fn normalize(text: &str) -> Result<Entry, Error> {
-    let mut token_stream = Tokenizer::new(text).validated_values();
-    let tokens = (&mut token_stream)
+    let tokens = Tokenizer::new(text)
+        .validated_values()
+        .with_eof()
+        .map(|x| x.map(|t| t.cloned()))
        .collect::<Result<Vec<_>, _>>()
        .map_err(|e| Error::Tokenizer(e.message, e.span.start))?;
-    let end_pos = token_stream.current_pos();
+
    let (named_args, var_idx) = match scan_vars(&tokens) {
        Some(pair) => pair,
        None => {
@ -90,7 +45,6 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
                processed_source,
                tokens,
                variables: Vec::new(),
-                end_pos,
                named_args: false,
                first_arg: None,
            });
@ -110,7 +64,7 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
        }
    };
    let mut last_was_set = false;
-    for (idx, tok) in tokens.iter().enumerate() {
+    for tok in &tokens {
        let mut is_set = false;
        match tok.kind {
            Kind::IntConst
@ -120,69 +74,64 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
            // Don't replace 'LIMIT 1' as a special case
            && (tok.text != "1"
                || !matches!(rewritten_tokens.last(),
-                    Some(Token { kind: Kind::Keyword, ref text, .. })
-                    if text.eq_ignore_ascii_case("LIMIT")))
+                    Some(Token { kind: Kind::Keyword(Keyword("limit")), .. })))
            && tok.text != "9223372036854775808"
            => {
-                push_var(&mut rewritten_tokens, "__std__", "int64",
+                rewritten_tokens.extend(arg_type_cast( "__std__", "int64",
                    next_var(),
-                    tok.span);
+                    tok.span));
                variables.push(Variable {
                    value: tok.value.clone().unwrap(),
                });
                continue;
            }
            Kind::FloatConst => {
-                push_var(&mut rewritten_tokens, "__std__", "float64",
+                rewritten_tokens.extend(arg_type_cast( "__std__", "float64",
                    next_var(),
-                    tok.span);
+                    tok.span));
                variables.push(Variable {
                    value: tok.value.clone().unwrap(),
                });
                continue;
            }
            Kind::BigIntConst => {
-                push_var(&mut rewritten_tokens, "__std__", "bigint",
+                rewritten_tokens.extend(arg_type_cast( "__std__", "bigint",
                    next_var(),
-                    tok.span);
+                    tok.span));
                variables.push(Variable {
                    value: tok.value.clone().unwrap(),
                });
                continue;
            }
            Kind::DecimalConst => {
-                push_var(&mut rewritten_tokens, "__std__", "decimal",
+                rewritten_tokens.extend(arg_type_cast( "__std__", "decimal",
                    next_var(),
-                    tok.span);
+                    tok.span));
                variables.push(Variable {
                    value: tok.value.clone().unwrap(),
                });
                continue;
            }
            Kind::Str => {
-                push_var(&mut rewritten_tokens, "__std__", "str",
+                rewritten_tokens.extend(arg_type_cast( "__std__", "str",
                    next_var(),
-                    tok.span);
+                    tok.span));
                variables.push(Variable {
                    value: tok.value.clone().unwrap(),
                });
                continue;
            }
-            Kind::Keyword
-            if (matches!(&(&tok.text[..].to_uppercase())[..],
-                         "CONFIGURE"|"CREATE"|"ALTER"|"DROP"|"START"|"ANALYZE")
-                || (last_was_set &&
-                    matches!(&(&tok.text[..].to_uppercase())[..],
-                             "GLOBAL"))
-            )
-            => {
+            Kind::Keyword(Keyword(kw))
+            if (
+                matches!(kw, "configure"|"create"|"alter"|"drop"|"start"|"analyze")
+                || (last_was_set && kw == "global")
+            ) => {
                let processed_source = serialize_tokens(&tokens);
                return Ok(Entry {
                    hash: hash(&processed_source),
                    processed_source,
                    tokens,
                    variables: Vec::new(),
-                    end_pos,
                    named_args: false,
                    first_arg: None,
                });
@ -192,14 +141,11 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
            // because the only statements with internal semis are DDL
            // statements, which we don't support anyway.
            Kind::Semicolon => {
-                if idx + 1 < tokens.len() {
-                    all_variables.push(variables);
-                    variables = Vec::new();
-                }
+                all_variables.push(variables);
+                variables = Vec::new();
                rewritten_tokens.push(tok.clone());
            }
-            Kind::Keyword
-            if (matches!(&(&tok.text[..].to_uppercase())[..], "SET")) => {
+            Kind::Keyword(Keyword("set")) => {
                is_set = true;
                rewritten_tokens.push(tok.clone());
            }
@ -214,75 +160,39 @@ pub fn normalize(text: &str) -> Result<Entry, Error> {
        hash: hash(&processed_source),
        processed_source,
        named_args,
-        first_arg: if counter <= var_idx { None } else { Some(var_idx) },
+        first_arg: if counter <= var_idx {
+            None
+        } else {
+            Some(var_idx)
+        },
        tokens: rewritten_tokens,
        variables: all_variables,
-        end_pos,
    });
 }

 fn is_operator(token: &Token) -> bool {
    use edgeql_parser::tokenizer::Kind::*;
    match token.kind {
-        | Assign
-        | SubAssign
-        | AddAssign
-        | Arrow
-        | Coalesce
-        | Namespace
-        | DoubleSplat
-        | BackwardLink
-        | FloorDiv
-        | Concat
-        | GreaterEq
-        | LessEq
-        | NotEq
-        | NotDistinctFrom
-        | DistinctFrom
-        | Comma
-        | OpenParen
-        | CloseParen
-        | OpenBracket
-        | CloseBracket
-        | OpenBrace
-        | CloseBrace
-        | Dot
-        | Semicolon
-        | Colon
-        | Add
-        | Sub
-        | Mul
-        | Div
-        | Modulo
-        | Pow
-        | Less
-        | Greater
-        | Eq
-        | Ampersand
-        | Pipe
-        | At
-        => true,
-        | DecimalConst
-        | FloatConst
-        | IntConst
-        | BigIntConst
-        | BinStr
-        | Argument
-        | Str
-        | BacktickName
-        | Keyword
-        | Ident
-        | Substitution
-        => false,
+        Assign | SubAssign | AddAssign | Arrow | Coalesce | Namespace | DoubleSplat
+        | BackwardLink | FloorDiv | Concat | GreaterEq | LessEq | NotEq | NotDistinctFrom
+        | DistinctFrom | Comma | OpenParen | CloseParen | OpenBracket | CloseBracket
+        | OpenBrace | CloseBrace | Dot | Semicolon | Colon | Add | Sub | Mul | Div | Modulo
+        | Pow | Less | Greater | Eq | Ampersand | Pipe | At => true,
+        DecimalConst | FloatConst | IntConst | BigIntConst | BinStr | Argument | Str
+        | BacktickName | Keyword(_) | Ident | Substitution | EOF | EOI | Epsilon => false,
    }
 }

-fn serialize_tokens(tokens: &[Token<'_>]) -> String {
+fn serialize_tokens(tokens: &[Token]) -> String {
    use edgeql_parser::tokenizer::Kind::Argument;

    let mut buf = String::new();
    let mut needs_space = false;
    for token in tokens {
+        if matches!(token.kind, Kind::EOF | Kind::EOI) {
+            break;
+        }
+
        if needs_space && !is_operator(token) && token.kind != Argument {
            buf.push(' ');
        }
@ -292,12 +202,78 @@ fn serialize_tokens(tokens: &[Token<'_>]) -> String {
    return buf;
 }

+fn scan_vars<'x, 'y: 'x, I>(tokens: I) -> Option<(bool, usize)>
+where
+    I: IntoIterator<Item = &'x Token<'x>>,
+{
+    let mut max_visited = None::<usize>;
+    let mut names = BTreeSet::new();
+    for t in tokens {
+        if t.kind == Kind::Argument {
+            if let Ok(v) = t.text[1..].parse() {
+                if max_visited.map(|old| v > old).unwrap_or(true) {
+                    max_visited = Some(v);
+                }
+            } else {
+                names.insert(&t.text[..]);
+            }
+        }
+    }
+    if names.is_empty() {
+        let next = max_visited.map(|x| x.checked_add(1)).unwrap_or(Some(0))?;
+        Some((false, next))
+    } else if max_visited.is_some() {
+        return None; // mixed arguments
+    } else {
+        Some((true, names.len()))
+    }
+}
+
+fn hash(text: &str) -> [u8; 64] {
+    let mut result = [0u8; 64];
+    result.copy_from_slice(&Blake2b512::new_with_prefix(text.as_bytes()).finalize());
+    return result;
+}
+
+/// Produces tokens corresponding to (<module::typ>$var)
+fn arg_type_cast(
+    module: &'static str,
+    typ: &'static str,
+    var: String,
+    span: Span,
+) -> [Token<'static>; 8] {
+    fn tk(kind: Kind, text: Cow<'_, str>, span: Span) -> Token {
+        let value = if kind == Kind::Ident {
+            Some(Value::String(text.to_string()))
+        } else {
+            None
+        };
+        Token {
+            kind,
+            text,
+            value,
+            span,
+        }
+    }
+
+    [
+        tk(Kind::OpenParen, "(".into(), span),
+        tk(Kind::Less, "<".into(), span),
+        tk(Kind::Ident, module.into(), span),
+        tk(Kind::Namespace, "::".into(), span),
+        tk(Kind::Ident, typ.into(), span),
+        tk(Kind::Greater, ">".into(), span),
+        tk(Kind::Argument, var.into(), span),
+        tk(Kind::CloseParen, ")".into(), span),
+    ]
+}
+
 #[cfg(test)]
 mod test {
    use super::scan_vars;
    use edgeql_parser::tokenizer::{Token, Tokenizer};

-    fn tokenize<'x>(s: &'x str) -> Vec<Token<'x>> {
+    fn tokenize<'x>(s: &'x str) -> Vec<Token> {
        let mut r = Vec::new();
        let mut s = Tokenizer::new(s);
        loop {
@ -328,8 +304,10 @@ mod test {
        assert_eq!(scan_vars(&tokenize("$a")).unwrap(), (true, 1));
        assert_eq!(scan_vars(&tokenize("$b $c $d")).unwrap(), (true, 3));
        assert_eq!(scan_vars(&tokenize("$b $c $b")).unwrap(), (true, 2));
-        assert_eq!(scan_vars(&tokenize("$a $b $b $a $c $xx")).unwrap(),
-            (true, 4));
+        assert_eq!(
+            scan_vars(&tokenize("$a $b $b $a $c $xx")).unwrap(),
+            (true, 4)
+        );
    }

    #[test]
@ -339,5 +317,4 @@ mod test {
        assert_eq!(scan_vars(&tokenize("$b $c $100")), None);
        assert_eq!(scan_vars(&tokenize("$10 $xx $yy")), None);
    }
-
 }
--- a/edb/edgeql-parser/edgeql-parser-python/src/parser.rs
+++ b/edb/edgeql-parser/edgeql-parser-python/src/parser.rs
@ -0,0 +1,163 @@
+use std::collections::HashMap;
+
+use cpython::{
+    ObjectProtocol, PyClone, PyInt, PyList, PyObject, PyResult, PyString, PyTuple, Python,
+    PythonObject, PythonObjectWithCheckedDowncast, ToPyObject,
+};
+
+use edgeql_parser::parser;
+
+use crate::errors::{parser_error_into_tuple, ParserResult};
+use crate::pynormalize::value_to_py_object;
+use crate::tokenizer::OpaqueToken;
+
+pub fn parse(py: Python, parser_name: &PyString, tokens: PyObject) -> PyResult<PyTuple> {
+    let (spec, productions) = load_spec(py, parser_name.to_string(py)?.as_ref())?;
+
+    let tokens = downcast_tokens(py, tokens)?;
+
+    let context = parser::Context::new(spec);
+    let (cst, errors) = parser::parse(&tokens, &context);
+
+    let cst = cst.map(|c| to_py_cst(c, py)).transpose()?;
+
+    let errors = errors
+        .into_iter()
+        .map(|e| parser_error_into_tuple(py, e))
+        .collect::<Vec<_>>();
+    let errors = PyList::new(py, &errors);
+
+    let res = ParserResult::create_instance(py, cst.into_py_object(py), errors)?;
+
+    Ok((res, productions).into_py_object(py))
+}
+
+py_class!(pub class CSTNode |py| {
+    data _production: PyObject;
+    data _terminal: PyObject;
+
+    def production(&self) -> PyResult<PyObject> {
+        Ok(self._production(py).clone_ref(py))
+    }
+    def terminal(&self) -> PyResult<PyObject> {
+        Ok(self._terminal(py).clone_ref(py))
+    }
+});
+
+py_class!(pub class Production |py| {
+    data _id: PyInt;
+    data _args: PyList;
+
+    def id(&self) -> PyResult<PyInt> {
+        Ok(self._id(py).clone_ref(py))
+    }
+    def args(&self) -> PyResult<PyList> {
+        Ok(self._args(py).clone_ref(py))
+    }
+});
+
+py_class!(pub class Terminal |py| {
+    data _text: PyString;
+    data _value: PyObject;
+    data _start: u64;
+    data _end: u64;
+
+    def text(&self) -> PyResult<PyString> {
+        Ok(self._text(py).clone_ref(py))
+    }
+    def value(&self) -> PyResult<PyObject> {
+        Ok(self._value(py).clone_ref(py))
+    }
+    def start(&self) -> PyResult<u64> {
+        Ok(*self._start(py))
+    }
+    def end(&self) -> PyResult<u64> {
+        Ok(*self._end(py))
+    }
+});
+
+static mut PARSER_SPECS: Option<HashMap<String, (parser::Spec, PyObject)>> = None;
+
+pub fn init_module() {
+    unsafe {
+        PARSER_SPECS = Some(HashMap::new());
+    }
+}
+
+fn downcast_tokens<'a>(py: Python, token_list: PyObject) -> PyResult<Vec<parser::Terminal>> {
+    let tokens = PyList::downcast_from(py, token_list)?;
+
+    let mut buf = Vec::with_capacity(tokens.len(py));
+    for token in tokens.iter(py) {
+        let token = OpaqueToken::downcast_from(py, token)?;
+        let token = token.inner(py);
+
+        buf.push(parser::Terminal::from_token(token));
+    }
+    Ok(buf)
+}
+
+fn load_spec(py: Python, parser_name: &str) -> PyResult<&'static (parser::Spec, PyObject)> {
+    let parser_specs = unsafe { PARSER_SPECS.as_mut().unwrap() };
+    if !parser_specs.contains_key(parser_name) {
+        let parser_mod = py.import("edb.edgeql.parser.parser")?;
+
+        let process_spec = py.import("edb.edgeql.parser")?.get(py, "process_spec")?;
+
+        let parser_cls = parser_mod.get(py, parser_name)?;
+        let parser = parser_cls.call(py, PyTuple::new(py, &[]), None)?;
+
+        let res = process_spec.call(py, (parser,), None)?;
+        let res = PyTuple::downcast_from(py, res)?;
+
+        let spec_json = PyString::downcast_from(py, res.get_item(py, 0))?;
+        let spec_json = spec_json.to_string(py).unwrap();
+
+        let productions = res.get_item(py, 1);
+
+        let spec = parser::Spec::from_json(&spec_json).unwrap();
+
+        parser_specs.insert(parser_name.to_string(), (spec, productions));
+    }
+
+    Ok(unsafe { PARSER_SPECS.as_ref().unwrap().get(parser_name).unwrap() })
+}
+
+fn to_py_cst<'a>(cst: &'a parser::CSTNode<'a>, py: Python) -> PyResult<CSTNode> {
+    match cst {
+        parser::CSTNode::Empty => CSTNode::create_instance(py, py.None(), py.None()),
+        parser::CSTNode::Terminal(token) => CSTNode::create_instance(
+            py,
+            py.None(),
+            Terminal::create_instance(
+                py,
+                token.text.to_py_object(py),
+                if let Some(val) = &token.value {
+                    value_to_py_object(py, val)?
+                } else {
+                    py.None()
+                },
+                token.span.start,
+                token.span.end,
+            )?
+            .into_object(),
+        ),
+        parser::CSTNode::Production(prod) => CSTNode::create_instance(
+            py,
+            Production::create_instance(
+                py,
+                prod.id.into_py_object(py),
+                PyList::new(
+                    py,
+                    prod.args
+                        .iter()
+                        .map(|a| to_py_cst(a, py).map(|x| x.into_object()))
+                        .collect::<PyResult<Vec<_>>>()?
+                        .as_slice(),
+                ),
+            )?
+            .into_object(),
+            py.None(),
+        ),
+    }
+}
--- a/edb/edgeql-parser/edgeql-parser-python/src/pynormalize.rs
+++ b/edb/edgeql-parser/edgeql-parser-python/src/pynormalize.rs
@ -1,19 +1,19 @@
 use std::convert::TryFrom;

+use bigdecimal::Num;
 use cpython::exc::AssertionError;
-use cpython::{PyBytes, PyErr, PyInt, PyTuple, PythonObject, ToPyObject};
+use cpython::{PyBytes, PyErr, PyInt, PythonObject, ToPyObject};
 use cpython::{PyClone, PyDict, PyList, PyResult, PyString, Python};
 use cpython::{PyFloat, PyObject};

 use bytes::{BufMut, Bytes, BytesMut};
 use edgedb_protocol::codec;
 use edgedb_protocol::model::{BigInt, Decimal};
-use edgeql_parser::position::Pos;
 use edgeql_parser::tokenizer::Value;

-use crate::errors::TokenizerError;
+use crate::errors::SyntaxError;
 use crate::normalize::{normalize as _normalize, Error, Variable};
-use crate::tokenizer::convert_tokens;
+use crate::tokenizer::tokens_to_py;

 py_class!(pub class Entry |py| {
    data _key: PyBytes;
@ -60,10 +60,6 @@ py_class!(pub class Entry |py| {
    }
 });

-pub fn py_pos(py: Python, pos: &Pos) -> PyTuple {
-    (pos.line, pos.column, pos.offset).to_py_object(py)
-}
-
 pub fn serialize_extra(variables: &[Variable]) -> Result<Bytes, String> {
    use edgedb_protocol::codec::Codec;
    use edgedb_protocol::value::Value as P;
@ -91,8 +87,15 @@ pub fn serialize_extra(variables: &[Variable]) -> Result<Bytes, String> {
                    .map_err(|e| format!("float cannot be encoded: {}", e))?;
            }
            Value::BigInt(ref v) => {
-                let val = BigInt::try_from(v.clone())
-                    .map_err(|e| format!("bigint cannot be encoded: {}", e))?;
+                // We have two different versions of BigInt implementations here.
+                // We have to use bigdecimal::num_bigint::BigInt because it can parse with radix 16.
+
+                let val = bigdecimal::num_bigint::BigInt::from_str_radix(v, 16)
+                    .map_err(|e| format!("bigint cannot be encoded: {}", e))
+                    .and_then(|x| {
+                        BigInt::try_from(x).map_err(|e| format!("bigint cannot be encoded: {}", e))
+                    })?;
+
                codec::BigInt
                    .encode(&mut buf, &P::BigInt(val))
                    .map_err(|e| format!("bigint cannot be encoded: {}", e))?;
@ -145,7 +148,7 @@ pub fn normalize(py: Python<'_>, text: &PyString) -> PyResult<Entry> {
                py,
                /* key: */ PyBytes::new(py, &entry.hash[..]),
                /* processed_source: */ entry.processed_source,
-                /* tokens: */ convert_tokens(py, entry.tokens, entry.end_pos)?,
+                /* tokens: */ tokens_to_py(py, entry.tokens)?,
                /* extra_blobs: */ blobs,
                /* extra_named: */ entry.named_args,
                /* first_extra: */ entry.first_arg,
@ -154,7 +157,7 @@ pub fn normalize(py: Python<'_>, text: &PyString) -> PyResult<Entry> {
            )?)
        }
        Err(Error::Tokenizer(msg, pos)) => {
-            return Err(TokenizerError::new(py, (msg, py_pos(py, &pos))))
+            return Err(SyntaxError::new(py, (msg, (pos, py.None()))))
        }
        Err(Error::Assertion(msg, pos)) => {
            return Err(PyErr::new::<AssertionError, _>(
@ -170,10 +173,9 @@ pub fn value_to_py_object(py: Python, val: &Value) -> PyResult<PyObject> {
        Value::Int(v) => v.to_py_object(py).into_object(),
        Value::String(v) => v.to_py_object(py).into_object(),
        Value::Float(v) => v.to_py_object(py).into_object(),
-        Value::BigInt(v) => {
-            py.get_type::<PyInt>()
-                .call(py, (v.to_str_radix(16), 16.to_py_object(py)), None)?
-        }
+        Value::BigInt(v) => py
+            .get_type::<PyInt>()
+            .call(py, (v, 16.to_py_object(py)), None)?,
        Value::Decimal(v) => py.get_type::<PyFloat>().call(py, (v.to_string(),), None)?,
        Value::Bytes(v) => PyBytes::new(py, v).into_object(),
    })
--- a/edb/edgeql-parser/edgeql-parser-python/src/tokenizer.rs
+++ b/edb/edgeql-parser/edgeql-parser-python/src/tokenizer.rs
@ -1,572 +1,94 @@
-use std::collections::HashMap;
+use cpython::{PyBytes, PyClone, PyResult, PyString, Python, PythonObject};
+use cpython::{PyList, PyObject, PyTuple, ToPyObject};

-use cpython::{PyString, PyResult, Python, PyClone, PythonObject};
-use cpython::{PyTuple, PyList, PyObject, ToPyObject, ObjectProtocol};
-use cpython::{FromPyObject};
+use edgeql_parser::tokenizer::{Token, Tokenizer};

-use edgeql_parser::tokenizer::{Kind, is_keyword, Tokenizer, Token as PToken};
-use edgeql_parser::tokenizer::{MAX_KEYWORD_LENGTH};
-use edgeql_parser::position::Pos;
-use edgeql_parser::keywords::{PARTIAL_RESERVED_KEYWORDS, UNRESERVED_KEYWORDS};
-use edgeql_parser::keywords::{CURRENT_RESERVED_KEYWORDS};
-use edgeql_parser::keywords::{FUTURE_RESERVED_KEYWORDS};
+use crate::errors::{parser_error_into_tuple, ParserResult};

-use crate::errors::TokenizerError;
-use crate::pynormalize::{py_pos, value_to_py_object};
+pub fn tokenize(py: Python, s: &PyString) -> PyResult<ParserResult> {
+    let data = s.to_string(py)?;

-static mut TOKENS: Option<Tokens> = None;
+    let mut token_stream = Tokenizer::new(&data[..]).validated_values().with_eof();

+    let mut tokens: Vec<_> = Vec::new();
+    let mut errors: Vec<_> = Vec::new();

-fn rs_pos(py: Python, value: &PyObject) -> PyResult<Pos> {
-    let (line, column, offset) = FromPyObject::extract(py, value)?;
-    Ok(Pos { line, column, offset })
+    for res in &mut token_stream {
+        match res {
+            Ok(token) => tokens.push(token),
+            Err(e) => {
+                errors.push(parser_error_into_tuple(py, e));
+
+                // TODO: fix tokenizer to skip bad tokens and continue
+                break;
+            }
+        }
+    }
+
+    let tokens = tokens_to_py(py, tokens)?;
+
+    let errors = PyList::new(py, errors.as_slice()).to_py_object(py);
+
+    ParserResult::create_instance(py, tokens.into_object(), errors)
 }

-py_class!(pub class Token |py| {
-    data _kind: PyString;
-    data _text: PyString;
-    data _value: PyObject;
-    data _start: Pos;
-    data _end: Pos;
-    def kind(&self) -> PyResult<PyString> {
-        Ok(self._kind(py).clone_ref(py))
-    }
-    def text(&self) -> PyResult<PyString> {
-        Ok(self._text(py).clone_ref(py))
-    }
-    def value(&self) -> PyResult<PyObject> {
-        Ok(self._value(py).clone_ref(py))
-    }
-    def start(&self) -> PyResult<PyTuple> {
-        Ok(py_pos(py, self._start(py)))
-    }
-    def end(&self) -> PyResult<PyTuple> {
-        Ok(py_pos(py, self._end(py)))
-    }
+// An opaque wrapper around [edgeql_parser::tokenizer::Token].
+// Supports Python pickle serialization.
+py_class!(pub class OpaqueToken |py| {
+    data _inner: Token<'static>;
+
    def __repr__(&self) -> PyResult<PyString> {
-        let val = self._value(py);
-        let s = if *val == py.None() {
-            format!("<Token {}>", self._kind(py).to_string(py)?)
-        } else {
-            format!("<Token {} {}>",
-                self._kind(py).to_string(py)?,
-                val.repr(py)?.to_string(py)?)
-        };
-        Ok(PyString::new(py, &s))
+        Ok(PyString::new(py, &self._inner(py).to_string()))
    }
    def __reduce__(&self) -> PyResult<PyTuple> {
+        let data: Vec<u8> = rmp_serde::to_vec(self._inner(py)).unwrap().to_vec();
+
        return Ok((
-            get_unpickle_fn(py),
+            get_fn_unpickle_token(py),
            (
-                self._kind(py),
-                self._text(py),
-                self._value(py),
-                py_pos(py, self._start(py)),
-                py_pos(py, self._end(py)),
+                PyBytes::new(py, &data),
            ),
        ).to_py_object(py))
    }
 });

-
-pub struct Tokens {
-    ident: PyString,
-    argument: PyString,
-    eof: PyString,
-    empty: PyString,
-    substitution: PyString,
-
-    named_only: PyString,
-    named_only_val: PyString,
-    set_annotation: PyString,
-    set_annotation_val: PyString,
-    set_type: PyString,
-    set_type_val: PyString,
-    extension_package: PyString,
-    extension_package_val: PyString,
-    order_by: PyString,
-    order_by_val: PyString,
-
-    dot: PyString,
-    backward_link: PyString,
-    open_bracket: PyString,
-    close_bracket: PyString,
-    open_paren: PyString,
-    close_paren: PyString,
-    open_brace: PyString,
-    close_brace: PyString,
-    namespace: PyString,
-    double_splat: PyString,
-    coalesce: PyString,
-    colon: PyString,
-    semicolon: PyString,
-    comma: PyString,
-    add: PyString,
-    concat: PyString,
-    sub: PyString,
-    mul: PyString,
-    div: PyString,
-    floor_div: PyString,
-    modulo: PyString,
-    pow: PyString,
-    less: PyString,
-    greater: PyString,
-    eq: PyString,
-    ampersand: PyString,
-    pipe: PyString,
-    at: PyString,
-
-    iconst: PyString,
-    niconst: PyString,
-    fconst: PyString,
-    nfconst: PyString,
-    bconst: PyString,
-    sconst: PyString,
-
-    greater_eq: PyString,
-    less_eq: PyString,
-    not_eq: PyString,
-    distinct_from: PyString,
-    not_distinct_from: PyString,
-
-    assign: PyString,
-    add_assign: PyString,
-    sub_assign: PyString,
-    arrow: PyString,
-
-    keywords: HashMap<String, TokenInfo>,
-    unpickle_token: PyObject,
-}
-
-struct Cache {
-    keyword_buf: String,
-}
-
-pub struct TokenInfo {
-    pub kind: Kind,
-    pub name: PyString,
-    pub value: Option<PyString>,
-}
-
-pub fn init_module(py: Python) {
-    unsafe {
-        TOKENS = Some(Tokens::new(py))
-    }
-}
-
-pub fn _unpickle_token(py: Python,
-        kind: &PyString, text: &PyString, value: &PyObject,
-        start: &PyObject, end: &PyObject)
-        -> PyResult<Token>
-{
-    // TODO(tailhook) We might some strings from Tokens structure
-    //                (i.e. internning them).
-    //                But if we're storing a collection of tokens
-    //                they will store the tokens only once, so it
-    //                doesn't seem to help that much.
-    Token::create_instance(py,
-        kind.clone_ref(py),
-        text.clone_ref(py),
-        value.clone_ref(py),
-        rs_pos(py, start)?,
-        rs_pos(py, end)?)
-}
-
-pub fn tokenize(py: Python, s: &PyString) -> PyResult<PyList> {
-    let data = s.to_string(py)?;
-
-    let mut token_stream = Tokenizer::new(&data[..]).validated_values();
-    let rust_tokens: Vec<_> = py.allow_threads(|| {
-        (&mut token_stream).collect::<Result<_, _>>()
-    }).map_err(|e| {
-        TokenizerError::new(py, (e.message, py_pos(py, &e.span.start)))
-    })?;
-    return convert_tokens(py, rust_tokens, token_stream.current_pos());
-}
-
-pub fn convert_tokens(py: Python, rust_tokens: Vec<PToken<'_>>,
-    end_pos: Pos)
-    -> PyResult<PyList>
-{
-    let tokens = unsafe { TOKENS.as_ref().expect("module initialized") };
-    let mut cache = Cache {
-        keyword_buf: String::with_capacity(MAX_KEYWORD_LENGTH),
-    };
+pub fn tokens_to_py(py: Python, rust_tokens: Vec<Token>) -> PyResult<PyList> {
    let mut buf = Vec::with_capacity(rust_tokens.len());
    for tok in rust_tokens {
-        let (kind, text) = get_token_kind_and_name(py, tokens, &mut cache, &tok);
+        let py_tok = OpaqueToken::create_instance(py, tok.cloned())?.into_object();

-        let value = tok.value.as_ref()
-            .map(|v| value_to_py_object(py, v)).transpose()?
-            .unwrap_or_else(|| py.None());
-
-        let py_tok = Token::create_instance(
-            py, kind, text, value, tok.span.start, tok.span.end
-        )?;
-
-        buf.push(py_tok.into_object());
+        buf.push(py_tok);
    }
-    buf.push(Token::create_instance(
-            py,
-            tokens.eof.clone_ref(py),
-            tokens.empty.clone_ref(py),
-            py.None(),
-            end_pos,
-            end_pos
-        )?.into_object()
-    );
    Ok(PyList::new(py, &buf[..]))
 }

-impl Tokens {
-    pub fn new(py: Python) -> Tokens {
-        let mut res = Tokens {
-            ident: PyString::new(py, "IDENT"),
-            argument: PyString::new(py, "ARGUMENT"),
-            eof: PyString::new(py, "EOF"),
-            empty: PyString::new(py, ""),
-            substitution: PyString::new(py, "SUBSTITUTION"),
-            named_only: PyString::new(py, "NAMEDONLY"),
-            named_only_val: PyString::new(py, "NAMED ONLY"),
-            set_annotation: PyString::new(py, "SETANNOTATION"),
-            set_annotation_val: PyString::new(py, "SET ANNOTATION"),
-            set_type: PyString::new(py, "SETTYPE"),
-            set_type_val: PyString::new(py, "SET TYPE"),
-            extension_package: PyString::new(py, "EXTENSIONPACKAGE"),
-            extension_package_val: PyString::new(py, "EXTENSION PACKAGE"),
-            order_by: PyString::new(py, "ORDERBY"),
-            order_by_val: PyString::new(py, "ORDER BY"),
+/// To support pickle serialization of OpaqueTokens, we need to provide a
+/// deserialization function in __reduce__ methods.
+/// This function must not be inlined and must be globally accessible.
+/// To achieve this, we expose it a part of the module definition
+/// (`_unpickle_token`) and save reference to is in the `FN_UNPICKLE_TOKEN`.
+///
+/// A bit hackly, but it works.
+static mut FN_UNPICKLE_TOKEN: Option<PyObject> = None;

-            dot: PyString::new(py, "."),
-            backward_link: PyString::new(py, ".<"),
-            open_bracket: PyString::new(py, "["),
-            close_bracket: PyString::new(py, "]"),
-            open_paren: PyString::new(py, "("),
-            close_paren: PyString::new(py, ")"),
-            open_brace: PyString::new(py, "{"),
-            close_brace: PyString::new(py, "}"),
-            namespace: PyString::new(py, "::"),
-            double_splat: PyString::new(py, "**"),
-            coalesce: PyString::new(py, "??"),
-            colon: PyString::new(py, ":"),
-            semicolon: PyString::new(py, ";"),
-            comma: PyString::new(py, ","),
-            add: PyString::new(py, "+"),
-            concat: PyString::new(py, "++"),
-            sub: PyString::new(py, "-"),
-            mul: PyString::new(py, "*"),
-            div: PyString::new(py, "/"),
-            floor_div: PyString::new(py, "//"),
-            modulo: PyString::new(py, "%"),
-            pow: PyString::new(py, "^"),
-            less: PyString::new(py, "<"),
-            greater: PyString::new(py, ">"),
-            eq: PyString::new(py, "="),
-            ampersand: PyString::new(py, "&"),
-            pipe: PyString::new(py, "|"),
-            at: PyString::new(py, "@"),
-
-            iconst: PyString::new(py, "ICONST"),
-            niconst: PyString::new(py, "NICONST"),
-            fconst: PyString::new(py, "FCONST"),
-            nfconst: PyString::new(py, "NFCONST"),
-            bconst: PyString::new(py, "BCONST"),
-            sconst: PyString::new(py, "SCONST"),
-
-            // as OP
-            greater_eq: PyString::new(py, ">="),
-            less_eq: PyString::new(py, "<="),
-            not_eq: PyString::new(py, "!="),
-            distinct_from: PyString::new(py, "?!="),
-            not_distinct_from: PyString::new(py, "?="),
-
-            assign: PyString::new(py, ":="),
-            add_assign: PyString::new(py, "+="),
-            sub_assign: PyString::new(py, "-="),
-            arrow: PyString::new(py, "->"),
-
-            keywords: HashMap::new(),
-            unpickle_token: py_fn!(py, _unpickle_token(
-                kind: &PyString, text: &PyString, value: &PyObject,
-                start: &PyObject, end: &PyObject)),
-        };
-        // 'EOF'
-        for kw in UNRESERVED_KEYWORDS.iter() {
-            res.add_kw(py, kw);
-        }
-        for kw in PARTIAL_RESERVED_KEYWORDS.iter() {
-            res.add_kw(py, kw);
-        }
-        for kw in CURRENT_RESERVED_KEYWORDS.iter() {
-            res.add_kw(py, kw);
-        }
-        for kw in FUTURE_RESERVED_KEYWORDS.iter() {
-            res.add_kw(py, kw);
-        }
-        return res;
-    }
-    fn add_kw(&mut self, py: Python, name: &str) {
-        let py_name = PyString::new(py, &name.to_ascii_uppercase());
-        let tok_name = if name.starts_with("__") && name.ends_with("__") {
-            format!("DUNDER{}", name[2..name.len()-2].to_ascii_uppercase())
-            .to_py_object(py)
-        } else {
-            py_name.clone_ref(py)
-        };
-        self.keywords.insert(name.into(), TokenInfo {
-            kind: if is_keyword(name) { Kind::Keyword } else { Kind::Ident },
-            name: tok_name,
-            value: None,
-        });
+pub fn init_module(py: Python) {
+    unsafe {
+        FN_UNPICKLE_TOKEN = Some(py_fn!(py, _unpickle_token(bytes: &PyBytes)));
    }
 }

-fn get_token_kind_and_name(
-    py: Python,
-    tokens: &Tokens,
-    cache: &mut Cache,
-    token: &PToken,
-) -> (PyString, PyString) {
-    use Kind::*;
-    let text = &token.text[..];
-    match token.kind {
-        Assign => (
-            tokens.assign.clone_ref(py),
-            tokens.assign.clone_ref(py),
-        ),
-        SubAssign => (
-            tokens.sub_assign.clone_ref(py),
-            tokens.sub_assign.clone_ref(py),
-        ),
-        AddAssign => (
-            tokens.add_assign.clone_ref(py),
-            tokens.add_assign.clone_ref(py),
-        ),
-        Arrow => (
-            tokens.arrow.clone_ref(py),
-            tokens.arrow.clone_ref(py),
-        ),
-        Coalesce => (
-            tokens.coalesce.clone_ref(py),
-            tokens.coalesce.clone_ref(py),
-        ),
-        Namespace => (
-            tokens.namespace.clone_ref(py),
-            tokens.namespace.clone_ref(py),
-        ),
-        DoubleSplat => (
-            tokens.double_splat.clone_ref(py),
-            tokens.double_splat.clone_ref(py),
-        ),
-        BackwardLink => (
-            tokens.backward_link.clone_ref(py),
-            tokens.backward_link.clone_ref(py),
-        ),
-        FloorDiv => (
-            tokens.floor_div.clone_ref(py),
-            tokens.floor_div.clone_ref(py),
-        ),
-        Concat => (
-            tokens.concat.clone_ref(py),
-            tokens.concat.clone_ref(py),
-        ),
-        GreaterEq => (
-            tokens.greater_eq.clone_ref(py),
-            tokens.greater_eq.clone_ref(py),
-        ),
-        LessEq => (
-            tokens.less_eq.clone_ref(py),
-            tokens.less_eq.clone_ref(py),
-        ),
-        NotEq => (
-            tokens.not_eq.clone_ref(py),
-            tokens.not_eq.clone_ref(py),
-        ),
-        NotDistinctFrom => (
-            tokens.not_distinct_from.clone_ref(py),
-            tokens.not_distinct_from.clone_ref(py),
-        ),
-        DistinctFrom => (
-            tokens.distinct_from.clone_ref(py),
-            tokens.distinct_from.clone_ref(py),
-        ),
-        Comma => (
-            tokens.comma.clone_ref(py),
-            tokens.comma.clone_ref(py),
-        ),
-        OpenParen => (
-            tokens.open_paren.clone_ref(py),
-            tokens.open_paren.clone_ref(py),
-        ),
-        CloseParen => (
-            tokens.close_paren.clone_ref(py),
-            tokens.close_paren.clone_ref(py),
-        ),
-        OpenBracket => (
-            tokens.open_bracket.clone_ref(py),
-            tokens.open_bracket.clone_ref(py),
-        ),
-        CloseBracket => (
-            tokens.close_bracket.clone_ref(py),
-            tokens.close_bracket.clone_ref(py),
-        ),
-        OpenBrace => (
-            tokens.open_brace.clone_ref(py),
-            tokens.open_brace.clone_ref(py),
-        ),
-        CloseBrace => (
-            tokens.close_brace.clone_ref(py),
-            tokens.close_brace.clone_ref(py),
-        ),
-        Dot => (
-            tokens.dot.clone_ref(py),
-            tokens.dot.clone_ref(py),
-        ),
-        Semicolon => (
-            tokens.semicolon.clone_ref(py),
-            tokens.semicolon.clone_ref(py),
-        ),
-        Colon => (
-            tokens.colon.clone_ref(py),
-            tokens.colon.clone_ref(py),
-        ),
-        Add => (
-            tokens.add.clone_ref(py),
-            tokens.add.clone_ref(py),
-        ),
-        Sub => (
-            tokens.sub.clone_ref(py),
-            tokens.sub.clone_ref(py),
-        ),
-        Mul => (
-            tokens.mul.clone_ref(py),
-            tokens.mul.clone_ref(py),
-        ),
-        Div => (
-            tokens.div.clone_ref(py),
-            tokens.div.clone_ref(py),
-        ),
-        Modulo => (
-            tokens.modulo.clone_ref(py),
-            tokens.modulo.clone_ref(py),
-        ),
-        Pow => (
-            tokens.pow.clone_ref(py),
-            tokens.pow.clone_ref(py),
-        ),
-        Less => (
-            tokens.less.clone_ref(py),
-            tokens.less.clone_ref(py),
-        ),
-        Greater => (
-            tokens.greater.clone_ref(py),
-            tokens.greater.clone_ref(py),
-        ),
-        Eq => (
-            tokens.eq.clone_ref(py),
-            tokens.eq.clone_ref(py),
-        ),
-        Ampersand => (
-            tokens.ampersand.clone_ref(py),
-            tokens.ampersand.clone_ref(py),
-        ),
-        Pipe => (
-            tokens.pipe.clone_ref(py),
-            tokens.pipe.clone_ref(py),
-        ),
-        At => (
-            tokens.at.clone_ref(py),
-            tokens.at.clone_ref(py),
-        ),
-        Argument => (
-            tokens.argument.clone_ref(py),
-            PyString::new(py, text),
-        ),
-        DecimalConst => (
-            tokens.nfconst.clone_ref(py),
-            PyString::new(py, text),
-        ),
-        FloatConst => (
-            tokens.fconst.clone_ref(py),
-            PyString::new(py, text),
-        ),
-        IntConst => (
-            tokens.iconst.clone_ref(py),
-            PyString::new(py, text),
-        ),
-        BigIntConst => (
-            tokens.niconst.clone_ref(py),
-            PyString::new(py, text),
-        ),
-        BinStr => (
-            tokens.bconst.clone_ref(py),
-            PyString::new(py, text),
-        ),
-        Str => (
-            tokens.sconst.clone_ref(py),
-            PyString::new(py, text),
-        ),
-        BacktickName => (
-            tokens.ident.clone_ref(py),
-            PyString::new(py, text),
-        ),
-        Ident | Keyword => match text {
-            "named only" => (
-                tokens.named_only.clone_ref(py),
-                tokens.named_only_val.clone_ref(py),
-            ),
-            "set annotation" => (
-                tokens.set_annotation.clone_ref(py),
-                tokens.set_annotation_val.clone_ref(py),
-            ),
-            "set type" => (
-                tokens.set_type.clone_ref(py),
-                tokens.set_type_val.clone_ref(py),
-            ),
-            "extension package" => {
-                (
-                tokens.extension_package.clone_ref(py),
-                tokens.extension_package_val.clone_ref(py),
-            )},
-            "order by" => (
-                tokens.order_by.clone_ref(py),
-                tokens.order_by_val.clone_ref(py),
-            ),
+pub fn _unpickle_token(py: Python, bytes: &PyBytes) -> PyResult<OpaqueToken> {
+    let token = rmp_serde::from_slice(bytes.data(py)).unwrap();
+    OpaqueToken::create_instance(py, token)
+}

-            _ => {
-                if text.len() > MAX_KEYWORD_LENGTH {
-                    (
-                        tokens.ident.clone_ref(py),
-                        PyString::new(py, text),
-                    )
-                } else {
-                    cache.keyword_buf.clear();
-                    cache.keyword_buf.push_str(text);
-                    cache.keyword_buf.make_ascii_lowercase();
+pub fn get_fn_unpickle_token(py: Python) -> PyObject {
+    let py_function = unsafe { FN_UNPICKLE_TOKEN.as_ref().expect("module initialized") };
+    return py_function.clone_ref(py);
+}

-                    let kind = match tokens.keywords.get(&cache.keyword_buf) {
-                        Some(keyword) => {
-                            debug_assert_eq!(keyword.kind, token.kind);
-
-                            keyword.name.clone_ref(py)
-                        }
-                        None => {
-                            debug_assert_eq!(Kind::Ident, token.kind);
-                            tokens.ident.clone_ref(py)
-                        }
-                    };
-                    (kind, PyString::new(py, text))
-                }
-            },
-        }
-        Substitution => (
-            tokens.substitution.clone_ref(py),
-            PyString::new(py, text),
-        ),
+impl OpaqueToken {
+    pub(super) fn inner(&self, py: Python) -> Token {
+        self._inner(py).clone()
    }
 }
-
-pub fn get_unpickle_fn(py: Python) -> PyObject {
-    let tokens = unsafe { TOKENS.as_ref().expect("module initialized") };
-    return tokens.unpickle_token.clone_ref(py);
-}
--- a/edb/edgeql-parser/edgeql-parser-python/tests/normalize.rs
+++ b/edb/edgeql-parser/edgeql-parser-python/tests/normalize.rs
@ -1,5 +1,6 @@
 use edgeql_rust::normalize::{normalize, Variable};
-use edgeql_parser::tokenizer::{Value as Value};
+use edgeql_parser::tokenizer::Value;
+use num_bigint::BigInt;


 #[test]
@ -80,10 +81,10 @@ fn test_bigint() {
        "SELECT(<__std__::bigint>$0)+(<__std__::bigint>$1)");
    assert_eq!(entry.variables, vec![vec![
        Variable {
-            value: Value::BigInt(1.into()),
+            value: Value::BigInt("1".into()),
        },
        Variable {
-            value: Value::BigInt(23.into()),
+            value: Value::BigInt(BigInt::from(23).to_str_radix(16)),
        }
    ]]);
 }
@ -97,10 +98,10 @@ fn test_bigint_exponent() {
        "SELECT(<__std__::bigint>$0)+(<__std__::bigint>$1)");
    assert_eq!(entry.variables, vec![vec![
        Variable {
-            value: Value::BigInt(10000000000u64.into()),
+            value: Value::BigInt(BigInt::from(10000000000u64).to_str_radix(16)),
        },
        Variable {
-            value: Value::BigInt(230000000000000u64.into()),
+            value: Value::BigInt(BigInt::from(230000000000000u64).to_str_radix(16)),
        }
    ]]);
 }
@ -203,6 +204,7 @@ fn test_script() {
                value: Value::Int(2),
            }
        ],
+        vec![]
    ]);
 }

@ -227,5 +229,6 @@ fn test_script_with_args() {
                value: Value::Int(2),
            }
        ],
+        vec![]
    ]);
 }
--- a/edb/edgeql-parser/src/expr.rs
+++ b/edb/edgeql-parser/src/expr.rs
@ -1,4 +1,4 @@
-use crate::position::Pos;
+use crate::position::{Pos, InflatedPos};
 use crate::tokenizer::{Kind, self};

 /// Error of expression checking
@ -79,10 +79,12 @@ pub fn check(text: &str) -> Result<(), Error> {
            }
        };
        let pos = token.span.start;
+        let pos = InflatedPos::from_offset(text.as_bytes(), pos).unwrap().deflate();
+
        empty = false;
        match token.kind {
            Comma | Semicolon if brackets.is_empty() => {
-                return Err(UnexpectedToken(token.text.to_string(), pos));
+                return Err(UnexpectedToken(token.text.into(), pos));
            }
            OpenParen | OpenBracket | OpenBrace => {
                brackets.push((token.kind, pos));
--- a/edb/edgeql-parser/src/helpers/strings.rs
+++ b/edb/edgeql-parser/src/helpers/strings.rs
@ -3,7 +3,7 @@ use std::fmt::{self, Write};
 use std::error::Error;
 use std::char;

-use crate::tokenizer::is_keyword;
+use crate::keywords;

 /// Error returned from `unquote_string` function
 ///
@ -23,7 +23,7 @@ pub struct UnquoteError(String);
 pub fn quote_name(s: &str) -> Cow<str> {
    if s.chars().all(|c| c.is_alphanumeric() || c == '_') {
        let lower = s.to_ascii_lowercase();
-        if !is_keyword(&lower) {
+        if keywords::lookup(&lower).is_none() {
            return s.into();
        }
    }
--- a/edb/edgeql-parser/src/keywords.rs
+++ b/edb/edgeql-parser/src/keywords.rs
@ -1,4 +1,6 @@
-pub const UNRESERVED_KEYWORDS: &[&str] = &[
+use phf::phf_set;
+
+pub const UNRESERVED_KEYWORDS: phf::Set<&str> = phf_set!(
    "abort",
    "abstract",
    "access",
@ -103,20 +105,11 @@ pub const UNRESERVED_KEYWORDS: &[&str] = &[
    "version",
    "view",
    "write",
-];
+);

+pub const PARTIAL_RESERVED_KEYWORDS: phf::Set<&str> = phf_set!("except", "intersect", "union",);

-pub const PARTIAL_RESERVED_KEYWORDS: &[&str] = &[
-    // Keep in sync with `tokenizer::is_keyword`
-    "except",
-    "intersect",
-    "union",
-    // Keep in sync with `tokenizer::is_keyword`
-];
-
-
-pub const FUTURE_RESERVED_KEYWORDS: &[&str] = &[
-    // Keep in sync with `tokenizer::is_keyword`
+pub const FUTURE_RESERVED_KEYWORDS: phf::Set<&str> = phf_set!(
    "anyarray",
    "begin",
    "case",
@ -147,18 +140,15 @@ pub const FUTURE_RESERVED_KEYWORDS: &[&str] = &[
    "when",
    "window",
    "never",
-    // Keep in sync with `tokenizer::is_keyword`
-];
+);

-pub const CURRENT_RESERVED_KEYWORDS: &[&str] = &[
-    // Keep in sync with `tokenizer::is_keyword`
+pub const CURRENT_RESERVED_KEYWORDS: phf::Set<&str> = phf_set!(
    "__source__",
    "__subject__",
    "__type__",
    "__std__",
    "__edgedbsys__",
    "__edgedbtpl__",
-    "__std__",
    "__new__",
    "__old__",
    "__specified__",
@ -207,5 +197,38 @@ pub const CURRENT_RESERVED_KEYWORDS: &[&str] = &[
    "update",
    "variadic",
    "with",
-    // Keep in sync with `tokenizer::is_keyword`
-];
+);
+
+pub const COMBINED_KEYWORDS: phf::Set<&str> = phf_set!(
+    "named only",
+    "set annotation",
+    "set type",
+    "extension package",
+    "order by",
+);
+
+pub fn lookup(s: &str) -> Option<Keyword> {
+    None.or_else(|| PARTIAL_RESERVED_KEYWORDS.get_key(s))
+        .or_else(|| FUTURE_RESERVED_KEYWORDS.get_key(s))
+        .or_else(|| CURRENT_RESERVED_KEYWORDS.get_key(s))
+        .map(|x| Keyword(x))
+}
+
+pub fn lookup_all(s: &str) -> Option<Keyword> {
+    lookup(s).or_else(|| {
+        None.or_else(|| COMBINED_KEYWORDS.get_key(s))
+            .or_else(|| UNRESERVED_KEYWORDS.get_key(s))
+            .map(|x| Keyword(x))
+    })
+}
+
+/// This is required for serde deserializer for Token to work correctly.
+#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct Keyword(pub &'static str);
+
+impl From<Keyword> for &'static str {
+    fn from(value: Keyword) -> Self {
+        value.0
+    }
+}
--- a/edb/edgeql-parser/src/lib.rs
+++ b/edb/edgeql-parser/src/lib.rs
@ -5,8 +5,9 @@ pub mod helpers;
 #[cfg(feature = "python")]
 pub mod into_python;
 pub mod keywords;
+pub mod parser;
 pub mod position;
 pub mod preparser;
 pub mod schema_file;
 pub mod tokenizer;
-pub mod validation;
+pub mod validation;
--- a/edb/edgeql-parser/src/parser.rs
+++ b/edb/edgeql-parser/src/parser.rs
@ -0,0 +1,590 @@
+use append_only_vec::AppendOnlyVec;
+use indexmap::IndexMap;
+
+use crate::helpers::quote_name;
+use crate::keywords::Keyword;
+use crate::position::Span;
+use crate::tokenizer::{Error, Kind, Token, Value};
+
+pub struct Context<'s> {
+    spec: &'s Spec,
+    arena: bumpalo::Bump,
+    terminal_arena: AppendOnlyVec<Terminal>,
+}
+
+impl<'s> Context<'s> {
+    pub fn new(spec: &'s Spec) -> Self {
+        Context {
+            spec,
+            arena: bumpalo::Bump::new(),
+            terminal_arena: AppendOnlyVec::new(),
+        }
+    }
+}
+
+pub fn parse<'a>(input: &'a [Terminal], ctx: &'a Context) -> (Option<&'a CSTNode<'a>>, Vec<Error>) {
+    let stack_top = ctx.arena.alloc(StackNode {
+        parent: None,
+        state: 0,
+        value: CSTNode::Empty,
+    });
+    let initial_track = Parser {
+        stack_top,
+        error_cost: 0,
+        node_count: 0,
+        can_recover: true,
+        errors: Vec::new(),
+    };
+
+    // append EIO
+    let end = input.last().map(|t| t.span.end).unwrap_or_default();
+    let eoi = ctx.alloc_terminal(Terminal {
+        kind: Kind::EOI,
+        span: Span { start: end, end },
+        text: "".to_string(),
+        value: None,
+    });
+    let input = input.iter().chain(Some(eoi));
+
+    let mut parsers = vec![initial_track];
+    let mut prev_span: Option<Span> = None;
+    let mut new_parsers = Vec::with_capacity(parsers.len() + 5);
+
+    for token in input {
+        while let Some(mut parser) = parsers.pop() {
+            let res = parser.act(ctx, token);
+
+            if res.is_ok() {
+                // base case: ok
+                parser.node_successful();
+                new_parsers.push(parser);
+            } else {
+                // error: try to recover
+
+                let gap_span = {
+                    let prev_end = prev_span.map(|p| p.end).unwrap_or(token.span.start);
+
+                    Span {
+                        start: prev_end,
+                        end: token.span.start,
+                    }
+                };
+
+                // option 1: inject a token
+                let possible_actions = &ctx.spec.actions[parser.stack_top.state];
+                for token_kind in possible_actions.keys() {
+                    let mut inject = parser.clone();
+
+                    let injection = new_token_for_injection(token_kind, ctx);
+
+                    let cost = error_cost(token_kind);
+                    let error = Error::new(format!("Missing {injection}")).with_span(gap_span);
+                    inject.push_error(error, cost);
+
+                    if inject.error_cost <= ERROR_COST_INJECT_MAX {
+                        // println!("   --> [inject {injection}]");
+
+                        if inject.act(ctx, injection).is_ok() {
+                            // insert into parsers, to retry the original token
+                            parsers.push(inject);
+                        }
+                    }
+                }
+
+                // option 2: skip the token
+
+                let mut skip = parser;
+                let error = Error::new(format!("Unexpected {token}")).with_span(token.span);
+                skip.push_error(error, ERROR_COST_SKIP);
+                if token.kind == Kind::EOF {
+                    // extra penalty
+                    skip.error_cost += ERROR_COST_INJECT_MAX;
+                    skip.can_recover = false;
+                };
+
+                // println!("   --> [skip]");
+
+                // insert into new_parsers, so the token is skipped
+                new_parsers.push(skip);
+            }
+        }
+
+        // has any parser recovered?
+        if new_parsers.len() > 1 {
+            let recovered = new_parsers.iter().position(Parser::has_recovered);
+
+            if let Some(recovered) = recovered {
+                let mut recovered = new_parsers.swap_remove(recovered);
+                recovered.error_cost = 0;
+
+                new_parsers.clear();
+                new_parsers.push(recovered);
+            }
+        }
+
+        // prune: pick only X best parsers
+        if new_parsers.len() > PARSER_COUNT_MAX {
+            new_parsers.sort_by_key(Parser::adjusted_cost);
+            new_parsers.drain(PARSER_COUNT_MAX..);
+        }
+
+        assert!(parsers.is_empty());
+        std::mem::swap(&mut parsers, &mut new_parsers);
+        prev_span = Some(token.span);
+    }
+
+    // there will always be a parser left,
+    // since we always allow a token to be skipped
+    let mut parser = parsers.into_iter().min_by_key(|p| p.error_cost).unwrap();
+    parser.finish();
+
+    let node = if parser.can_recover {
+        Some(&parser.stack_top.value)
+    } else {
+        None
+    };
+    (node, parser.errors)
+}
+
+impl<'s> Context<'s> {
+    fn alloc_terminal(&self, t: Terminal) -> &'_ Terminal {
+        let idx = self.terminal_arena.push(t);
+        &self.terminal_arena[idx]
+    }
+}
+
+fn new_token_for_injection<'a>(kind: &Kind, ctx: &'a Context) -> &'a Terminal {
+    ctx.alloc_terminal(Terminal {
+        kind: kind.clone(),
+        text: kind.text().unwrap_or_default().to_string(),
+        value: match kind {
+            Kind::Keyword(Keyword(kw)) => Some(Value::String(kw.to_string())),
+            Kind::Ident => Some(Value::String("my_name".to_string())),
+            _ => None,
+        },
+        span: Span::default(),
+    })
+}
+
+pub struct Spec {
+    pub actions: Vec<IndexMap<Kind, Action>>,
+    pub goto: Vec<IndexMap<String, usize>>,
+    pub start: String,
+    pub inlines: IndexMap<usize, u8>,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+#[cfg_attr(feature = "serde", serde(untagged))]
+pub enum Action {
+    Shift(usize),
+    Reduce(Reduce),
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct Reduce {
+    /// Index of the production in the associated production array
+    pub production_id: usize,
+
+    pub non_term: String,
+
+    /// Number of arguments
+    pub cnt: usize,
+}
+
+/// A node of the CST tree.
+///
+/// Warning: allocated in the bumpalo arena, which does not Drop.
+/// Any types that do allocation with global allocator (such as String or Vec),
+/// must manually drop. This is why Terminal has a special vec arena that does
+/// Drop.
+#[derive(Debug, Clone, Copy)]
+pub enum CSTNode<'a> {
+    Empty,
+    Terminal(&'a Terminal),
+    Production(Production<'a>),
+}
+#[derive(Clone, Debug)]
+pub struct Terminal {
+    pub kind: Kind,
+    pub text: String,
+    pub value: Option<Value>,
+    pub span: Span,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct Production<'a> {
+    pub id: usize,
+    pub args: &'a [CSTNode<'a>],
+}
+
+struct StackNode<'p> {
+    parent: Option<&'p StackNode<'p>>,
+
+    state: usize,
+    value: CSTNode<'p>,
+}
+
+#[derive(Clone)]
+struct Parser<'s> {
+    stack_top: &'s StackNode<'s>,
+
+    /// sum of cost of every error recovery action
+    error_cost: u16,
+
+    /// number of nodes pushed to stack since last error
+    node_count: u16,
+
+    can_recover: bool,
+
+    errors: Vec<Error>,
+}
+
+impl<'s> Parser<'s> {
+    fn act(&mut self, ctx: &'s Context, token: &'s Terminal) -> Result<(), ()> {
+        // self.print_stack();
+        // println!("INPUT: {}", token.text);
+
+        loop {
+            // find next action
+            let Some(action) = ctx.spec.actions[self.stack_top.state].get(&token.kind) else {
+                return Err(());
+            };
+
+            match action {
+                Action::Shift(next) => {
+                    // println!("   --> [shift {next}]");
+
+                    // push on stack
+                    self.push_on_stack(ctx, *next, CSTNode::Terminal(token));
+                    return Ok(());
+                }
+                Action::Reduce(reduce) => {
+                    self.reduce(ctx, reduce);
+                }
+            }
+        }
+    }
+
+    fn reduce(&mut self, ctx: &'s Context, reduce: &'s Reduce) {
+        let args = ctx.arena.alloc_slice_fill_with(reduce.cnt, |_| {
+            let v = self.stack_top.value;
+            self.stack_top = self.stack_top.parent.unwrap();
+            v
+        });
+        args.reverse();
+
+        let value = CSTNode::Production(Production {
+            id: reduce.production_id,
+            args,
+        });
+
+        let nstate = self.stack_top.state;
+
+        let next = *ctx.spec.goto[nstate].get(&reduce.non_term).unwrap();
+
+        // inline (if there is an inlining rule)
+        let mut value = value;
+        if let CSTNode::Production(production) = value {
+            if let Some(inline_position) = ctx.spec.inlines.get(&production.id) {
+                // inline rule found
+                let args = production.args;
+                let span = get_span_of_nodes(&args);
+
+                value = args[*inline_position as usize];
+
+                extend_span(&mut value, span, ctx);
+            } else {
+                // place back
+                value = CSTNode::Production(production);
+            }
+        }
+
+        self.push_on_stack(ctx, next, value);
+
+        // println!(
+        //     "   --> [reduce {} ::= ({} popped) at {}/{}]",
+        //     production, cnt, state, nstate
+        // );
+        // self.print_stack();
+    }
+
+    pub fn push_on_stack(&mut self, ctx: &'s Context, state: usize, value: CSTNode<'s>) {
+        let node = StackNode {
+            parent: Some(self.stack_top),
+            state,
+            value,
+        };
+        self.stack_top = ctx.arena.alloc(node);
+    }
+
+    pub fn finish(&mut self) {
+        debug_assert!(matches!(
+            &self.stack_top.value,
+            CSTNode::Terminal(Terminal {
+                kind: Kind::EOI,
+                ..
+            })
+        ));
+        self.stack_top = self.stack_top.parent.unwrap();
+
+        // self.print_stack();
+        // println!("   --> accept");
+
+        #[cfg(debug_assertions)]
+        {
+            let first = self.stack_top.parent.unwrap();
+            assert!(
+                matches!(&first.value, CSTNode::Empty),
+                "expected 'Empty' found {:?}",
+                first.value
+            );
+        }
+    }
+
+    #[cfg(never)]
+    fn print_stack(&self) {
+        let prefix = "STACK: ";
+
+        let mut stack = Vec::new();
+        let mut node = Some(self.stack_top);
+        while let Some(n) = node {
+            stack.push(n);
+            node = n.parent.clone();
+        }
+        stack.reverse();
+
+        let names = stack
+            .iter()
+            .map(|s| format!("{:?}", s.value))
+            .collect::<Vec<_>>();
+
+        let mut states = format!("{:6}", ' ');
+        for (index, node) in stack.iter().enumerate() {
+            let name_width = names[index].chars().count();
+            states += &format!(" {:<width$}", node.state, width = name_width);
+        }
+
+        println!("{}{}", prefix, names.join(" "));
+        println!("{}", states);
+    }
+
+    fn push_error(&mut self, error: Error, cost: u16) {
+        self.errors.push(error);
+        self.error_cost += cost;
+        self.node_count = 0;
+    }
+
+    fn node_successful(&mut self) {
+        self.node_count += 1;
+    }
+
+    /// Error cost, subtracted by a function of successfully parsed nodes.
+    fn adjusted_cost(&self) -> u16 {
+        let x = self.node_count.saturating_sub(3);
+        self.error_cost.saturating_sub(x * x)
+    }
+
+    fn has_recovered(&self) -> bool {
+        self.can_recover && self.adjusted_cost() == 0
+    }
+}
+
+fn get_span_of_nodes(args: &[CSTNode]) -> Option<Span> {
+    let start = args.iter().find_map(|x| match x {
+        CSTNode::Terminal(t) => Some(t.span.start),
+        _ => None,
+    })?;
+    let end = args.iter().rev().find_map(|x| match x {
+        CSTNode::Terminal(t) => Some(t.span.end),
+        _ => None,
+    })?;
+    Some(Span { start, end })
+}
+
+fn extend_span<'a>(value: &mut CSTNode<'a>, span: Option<Span>, ctx: &'a Context) {
+    let Some(span) = span else {
+        return;
+    };
+
+    let CSTNode::Terminal(terminal) = value else {
+        return
+    };
+
+    let mut new_term = terminal.clone();
+
+    if span.start < new_term.span.start {
+        new_term.span.start = span.start;
+    }
+    if span.end > new_term.span.end {
+        new_term.span.end = span.end;
+    }
+    *terminal = ctx.alloc_terminal(new_term);
+}
+
+const PARSER_COUNT_MAX: usize = 10;
+
+const ERROR_COST_INJECT_MAX: u16 = 15;
+const ERROR_COST_SKIP: u16 = 3;
+
+fn error_cost(kind: &Kind) -> u16 {
+    use Kind::*;
+
+    match kind {
+        Ident => 9,
+        Substitution => 8,
+        Keyword(_) => 10,
+
+        Dot => 5,
+        OpenBrace | OpenBracket | OpenParen => 5,
+
+        CloseBrace | CloseBracket | CloseParen => 1,
+
+        Namespace => 10,
+        Semicolon | Comma | Colon => 2,
+        Eq => 5,
+
+        At => 6,
+        IntConst => 8,
+
+        Assign | Arrow => 5,
+
+        _ => 100, // forbidden
+    }
+}
+
+impl std::fmt::Display for Terminal {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if self.text.is_empty() {
+            return write!(f, "{}", self.kind.user_friendly_text().unwrap_or_default());
+        }
+
+        match self.kind {
+            Kind::Ident => write!(f, "'{}'", &quote_name(&self.text)),
+            Kind::Keyword(Keyword(kw)) => write!(f, "keyword '{}'", kw.to_ascii_uppercase()),
+            _ => write!(f, "'{}'", self.text),
+        }
+    }
+}
+
+impl<'a> Default for CSTNode<'a> {
+    fn default() -> Self {
+        CSTNode::Empty
+    }
+}
+
+impl Terminal {
+    pub fn from_token(token: Token) -> Self {
+        Terminal {
+            kind: token.kind,
+            text: token.text.into(),
+            value: token.value,
+            span: token.span,
+        }
+    }
+}
+
+#[cfg(feature = "serde")]
+impl Spec {
+    pub fn from_json(j_spec: &str) -> Result<Spec, String> {
+        #[derive(Debug, serde::Serialize, serde::Deserialize)]
+        struct SpecJson {
+            pub actions: Vec<Vec<(String, Action)>>,
+            pub goto: Vec<Vec<(String, usize)>>,
+            pub start: String,
+            pub inlines: Vec<(usize, u8)>,
+        }
+
+        let v = serde_json::from_str::<SpecJson>(j_spec).map_err(|e| e.to_string())?;
+
+        let actions = v
+            .actions
+            .into_iter()
+            .map(|x| x.into_iter().map(|(k, a)| (get_token_kind(&k), a)))
+            .map(IndexMap::from_iter)
+            .collect();
+        let goto = v.goto.into_iter().map(IndexMap::from_iter).collect();
+        let inlines = IndexMap::from_iter(v.inlines);
+        Ok(Spec {
+            actions,
+            goto,
+            start: v.start,
+            inlines,
+        })
+    }
+}
+
+#[cfg(feature = "serde")]
+fn get_token_kind(token_name: &str) -> Kind {
+    use Kind::*;
+
+    match token_name {
+        "+" => Add,
+        "&" => Ampersand,
+        "@" => At,
+        ".<" => BackwardLink,
+        "}" => CloseBrace,
+        "]" => CloseBracket,
+        ")" => CloseParen,
+        "??" => Coalesce,
+        ":" => Colon,
+        "," => Comma,
+        "++" => Concat,
+        "/" => Div,
+        "." => Dot,
+        "**" => DoubleSplat,
+        "=" => Eq,
+        "//" => FloorDiv,
+        "%" => Modulo,
+        "*" => Mul,
+        "::" => Namespace,
+        "{" => OpenBrace,
+        "[" => OpenBracket,
+        "(" => OpenParen,
+        "|" => Pipe,
+        "^" => Pow,
+        ";" => Semicolon,
+        "-" => Sub,
+
+        "?!=" => DistinctFrom,
+        ">=" => GreaterEq,
+        "<=" => LessEq,
+        "?=" => NotDistinctFrom,
+        "!=" => NotEq,
+        "<" => Less,
+        ">" => Greater,
+
+        "IDENT" => Ident,
+        "EOF" => EOF,
+        "<$>" => EOI,
+        "<e>" => Epsilon,
+
+        "BCONST" => BinStr,
+        "FCONST" => FloatConst,
+        "ICONST" => IntConst,
+        "NFCONST" => DecimalConst,
+        "NICONST" => BigIntConst,
+        "SCONST" => Str,
+
+        "+=" => AddAssign,
+        "->" => Arrow,
+        ":=" => Assign,
+        "-=" => SubAssign,
+
+        "ARGUMENT" => Argument,
+        "SUBSTITUTION" => Substitution,
+
+        _ => {
+            let mut token_name = token_name.to_lowercase();
+
+            if let Some(rem) = token_name.strip_prefix("dunder") {
+                token_name = format!("__{rem}__");
+            }
+
+            let kw = crate::keywords::lookup_all(&token_name)
+                .unwrap_or_else(|| panic!("unknown keyword {token_name}"));
+            Keyword(kw)
+        }
+    }
+}
--- a/edb/edgeql-parser/src/position.rs
+++ b/edb/edgeql-parser/src/position.rs
@ -4,13 +4,24 @@ use std::str::{from_utf8, Utf8Error};
 use unicode_width::UnicodeWidthStr;

 /// Span of an element in source code
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone, Copy, Default)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct Span {
-    pub start: Pos,
-    pub end: Pos,
+    /// Byte offset in the original file
+    ///
+    /// Technically you can read > 4Gb file on 32bit machine so it may
+    /// not fit in usize
+    pub start: u64,
+
+    /// Byte offset in the original file
+    ///
+    /// Technically you can read > 4Gb file on 32bit machine so it may
+    /// not fit in usize
+    pub end: u64,
 }
 /// Original position of element in source code
 #[derive(PartialOrd, Ord, PartialEq, Eq, Clone, Copy, Default, Hash)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct Pos {
    /// One-based line number
    pub line: usize,
@ -88,6 +99,11 @@ fn new_lines_in_fragment(data: &[u8]) -> u64 {

 impl InflatedPos {

+    pub fn from_offset(data: &[u8], offset: u64) -> Result<InflatedPos, InflatingError> {
+        let res = Self::from_offsets(data, &[offset as usize])?;
+        Ok(res.into_iter().next().unwrap())
+    }
+
    pub fn from_offsets(data: &[u8], offsets: &[usize])
        -> Result<Vec<InflatedPos>, InflatingError>
    {
@ -125,6 +141,14 @@ impl InflatedPos {
        }
        return Ok(result);
    }
+
+    pub fn deflate(self) -> Pos {
+        Pos {
+            line: self.line as usize + 1,
+            column: self.column as usize + 1,
+            offset: self.offset,
+        }
+    }
 }

 #[cfg(test)]
--- a/edb/edgeql-parser/src/tokenizer.rs
+++ b/edb/edgeql-parser/src/tokenizer.rs
--- a/edb/edgeql-parser/src/validation.rs
+++ b/edb/edgeql-parser/src/validation.rs
@ -4,11 +4,13 @@ use bigdecimal::num_bigint::ToBigInt;
 use bigdecimal::BigDecimal;

 use crate::helpers::{unquote_bytes, unquote_string};
-use crate::position::Pos;
+use crate::keywords::Keyword;
+use crate::position::{Pos, Span};
 use crate::tokenizer::{Error, Kind, Token, Tokenizer, Value, MAX_KEYWORD_LENGTH};

 /// Applies additional validation to the tokens.
 /// Combines multi-word keywords into single tokens.
+/// Remaps a few token kinds.
 pub struct Validator<'a> {
    pub inner: Tokenizer<'a>,

@ -30,12 +32,14 @@ impl<'a> Iterator for Validator<'a> {
            Err(e) => return Some(Err(Error::new(e).with_span(token.span))),
        };

-        if let Some(text) = self.combine_multi_word_keywords(&token) {
-            token.kind = Kind::Keyword;
-            token.text = text.into();
+        if let Some(keyword) = self.combine_multi_word_keywords(&token) {
+            token.text = keyword.into();
+            token.kind = Kind::Keyword(Keyword(keyword));
            self.peeked = None;
        }

+        token.kind = remap_kind(token.kind);
+
        Some(Ok(token))
    }
 }
@ -49,6 +53,13 @@ impl<'a> Validator<'a> {
        }
    }

+    pub fn with_eof(self) -> WithEof<'a> {
+        WithEof {
+            inner: self,
+            emitted: false,
+        }
+    }
+
    /// Mimics behavior of [std::iter::Peekable]. We could use that, but it
    /// hides access to underlying iterator.
    fn next_inner(&mut self) -> Option<Result<Token<'a>, Error>> {
@ -61,7 +72,7 @@ impl<'a> Validator<'a> {

    /// Mimics behavior of [std::iter::Peekable]. We could use that, but it
    /// hides access to underlying iterator.
-    fn peek(&mut self) -> &Option<Result<Token<'a>, Error>> {
+    fn peek(&mut self) -> &Option<Result<Token, Error>> {
        if self.peeked.is_none() {
            self.peeked = Some(self.inner.next());
        }
@ -73,8 +84,8 @@ impl<'a> Validator<'a> {
        self.inner.current_pos()
    }

-    fn combine_multi_word_keywords(&mut self, token: &Token) -> Option<&'static str> {
-        if !matches!(token.kind, Kind::Ident | Kind::Keyword) {
+    fn combine_multi_word_keywords(&mut self, token: &Token<'a>) -> Option<&'static str> {
+        if !matches!(token.kind, Kind::Ident | Kind::Keyword(_)) {
            return None;
        }
        let text = &token.text;
@ -115,19 +126,19 @@ impl<'a> Validator<'a> {
        return None;
    }

-    fn peek_keyword(&mut self, kw: &str) -> bool {
+    fn peek_keyword(&mut self, kw: &'static str) -> bool {
        self.peek()
            .as_ref()
            .and_then(|res| res.as_ref().ok())
            .map(|t| {
-                (t.kind == Kind::Ident || t.kind == Kind::Keyword)
-                    && t.text.eq_ignore_ascii_case(kw)
+                t.kind == Kind::Keyword(Keyword(kw))
+                    || (t.kind == Kind::Ident && t.text.eq_ignore_ascii_case(kw))
            })
            .unwrap_or(false)
    }
 }

-pub fn parse_value(token: &Token<'_>) -> Result<Option<Value>, String> {
+pub fn parse_value(token: &Token) -> Result<Option<Value>, String> {
    use Kind::*;
    let text = &token.text;
    let string_value = match token.kind {
@ -175,33 +186,69 @@ pub fn parse_value(token: &Token<'_>) -> Result<Option<Value>, String> {
            // Python has no problem of representing such a positive
            // value, though.
            return u64::from_str(&text.replace("_", ""))
-                .map(|x| Value::Int(x as i64))
-                .map(Some)
+                .map(|x| Some(Value::Int(x as i64)))
                .map_err(|e| format!("error reading int: {}", e));
        }
        BigIntConst => {
-            let dec = text[..text.len() - 1]
+            return text[..text.len() - 1]
                .replace("_", "")
                .parse::<BigDecimal>()
-                .map_err(|e| format!("error reading bigint: {}", e))?;
-            // this conversion to decimal and back to string
-            // fixes thing like `1e2n` which we support for bigints
-            return Ok(Some(Value::BigInt(
-                dec.to_bigint()
-                    .ok_or_else(|| "number is not integer".to_string())?,
-            )));
+                .map_err(|e| format!("error reading bigint: {}", e))
+                // this conversion to decimal and back to string
+                // fixes thing like `1e2n` which we support for bigints
+                .and_then(|x| {
+                    x.to_bigint()
+                        .ok_or_else(|| "number is not integer".to_string())
+                })
+                .map(|x| Some(Value::BigInt(x.to_str_radix(16))));
        }
        BinStr => {
            return unquote_bytes(text).map(Value::Bytes).map(Some);
        }

-        Str => unquote_string(text)
-            .map_err(|s| s.to_string())?
-            .to_string(),
+        Str => unquote_string(text).map_err(|s| s.to_string())?.to_string(),
        BacktickName => text[1..text.len() - 1].replace("``", "`"),
-        Ident | Keyword => text.to_string(),
+        Ident | Keyword(_) => text.to_string(),
        Substitution => text[2..text.len() - 1].to_string(),
        _ => return Ok(None),
    };
    Ok(Some(Value::String(string_value)))
 }
+
+fn remap_kind(kind: Kind) -> Kind {
+    match kind {
+        Kind::BacktickName => Kind::Ident,
+        kind => kind,
+    }
+}
+
+pub struct WithEof<'a> {
+    inner: Validator<'a>,
+
+    emitted: bool,
+}
+
+impl<'a> Iterator for WithEof<'a> {
+    type Item = Result<Token<'a>, Error>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(next) = self.inner.next() {
+            Some(next)
+        } else if !self.emitted {
+            self.emitted = true;
+            let pos = self.inner.current_pos().offset;
+
+            Some(Ok(Token {
+                kind: Kind::EOF,
+                text: "".into(),
+                value: None,
+                span: Span {
+                    start: pos,
+                    end: pos,
+                },
+            }))
+        } else {
+            None
+        }
+    }
+}
--- a/edb/edgeql-parser/tests/tokenizer.rs
+++ b/edb/edgeql-parser/tests/tokenizer.rs
@ -39,6 +39,10 @@ fn tok_err(s: &str) -> String {
    panic!("No error, where error expected");
 }

+fn keyword(kw: &'static str) -> Kind {
+    Keyword(edgeql_parser::keywords::Keyword(kw))
+}
+
 #[test]
 fn whitespace_and_comments() {
    assert_eq!(tok_str("# hello { world }"), &[] as &[&str]);
@ -64,9 +68,9 @@ fn idents() {
 #[test]
 fn keywords() {
    assert_eq!(tok_str("SELECT a"), ["SELECT", "a"]);
-    assert_eq!(tok_typ("SELECT a"), [Keyword, Ident]);
+    assert_eq!(tok_typ("SELECT a"), [keyword("select"), Ident]);
    assert_eq!(tok_str("with Select"), ["with", "Select"]);
-    assert_eq!(tok_typ("with Select"), [Keyword, Keyword]);
+    assert_eq!(tok_typ("with Select"), [keyword("with"), keyword("select")]);
 }

 #[test]
@ -375,92 +379,98 @@ fn decimal() {

 #[test]
 fn numbers_from_py() {
-    assert_eq!(tok_str("SELECT 3.5432;"), ["SELECT", "3.5432", ";"]);
-    assert_eq!(tok_typ("SELECT 3.5432;"), [Keyword, FloatConst, Semicolon]);
-    assert_eq!(tok_str("SELECT +3.5432;"), ["SELECT", "+", "3.5432", ";"]);
+    assert_eq!(tok_str("SELECT 3.5432;"),
+        ["SELECT", "3.5432", ";"]);
+    assert_eq!(tok_typ("SELECT 3.5432;"),
+        [keyword("select"), FloatConst, Semicolon]);
+    assert_eq!(tok_str("SELECT +3.5432;"),
+        ["SELECT", "+", "3.5432", ";"]);
    assert_eq!(tok_typ("SELECT +3.5432;"),
-        [Keyword, Add, FloatConst, Semicolon]);
-    assert_eq!(tok_str("SELECT -3.5432;"), ["SELECT", "-", "3.5432", ";"]);
+        [keyword("select"), Add, FloatConst, Semicolon]);
+    assert_eq!(tok_str("SELECT -3.5432;"),
+        ["SELECT", "-", "3.5432", ";"]);
    assert_eq!(tok_typ("SELECT -3.5432;"),
-        [Keyword, Sub, FloatConst, Semicolon]);
-    assert_eq!(tok_str("SELECT 354.32;"), ["SELECT", "354.32", ";"]);
-    assert_eq!(tok_typ("SELECT 354.32;"), [Keyword, FloatConst, Semicolon]);
+        [keyword("select"), Sub, FloatConst, Semicolon]);
+    assert_eq!(tok_str("SELECT 354.32;"),
+        ["SELECT", "354.32", ";"]);
+    assert_eq!(tok_typ("SELECT 354.32;"),
+        [keyword("select"), FloatConst, Semicolon]);
    assert_eq!(tok_str("SELECT 35400000000000.32;"),
        ["SELECT", "35400000000000.32", ";"]);
    assert_eq!(tok_typ("SELECT 35400000000000.32;"),
-        [Keyword, FloatConst, Semicolon]);
+        [keyword("select"), FloatConst, Semicolon]);
    assert_eq!(tok_str("SELECT 35400000000000000000.32;"),
        ["SELECT", "35400000000000000000.32", ";"]);
    assert_eq!(tok_typ("SELECT 35400000000000000000.32;"),
-        [Keyword, FloatConst, Semicolon]);
+        [keyword("select"), FloatConst, Semicolon]);
    assert_eq!(tok_str("SELECT 3.5432e20;"),
        ["SELECT", "3.5432e20", ";"]);
    assert_eq!(tok_typ("SELECT 3.5432e20;"),
-        [Keyword, FloatConst, Semicolon]);
+        [keyword("select"), FloatConst, Semicolon]);
    assert_eq!(tok_str("SELECT 3.5432e+20;"),
        ["SELECT", "3.5432e+20", ";"]);
    assert_eq!(tok_typ("SELECT 3.5432e+20;"),
-        [Keyword, FloatConst, Semicolon]);
+        [keyword("select"), FloatConst, Semicolon]);
    assert_eq!(tok_str("SELECT 3.5432e-20;"),
        ["SELECT", "3.5432e-20", ";"]);
    assert_eq!(tok_typ("SELECT 3.5432e-20;"),
-        [Keyword, FloatConst, Semicolon]);
+        [keyword("select"), FloatConst, Semicolon]);
    assert_eq!(tok_str("SELECT 354.32e-20;"),
        ["SELECT", "354.32e-20", ";"]);
    assert_eq!(tok_typ("SELECT 354.32e-20;"),
-        [Keyword, FloatConst, Semicolon]);
+        [keyword("select"), FloatConst, Semicolon]);
    assert_eq!(tok_str("SELECT -0n;"),
        ["SELECT", "-", "0n", ";"]);
    assert_eq!(tok_typ("SELECT -0n;"),
-        [Keyword, Sub, BigIntConst, Semicolon]);
+        [keyword("select"), Sub, BigIntConst, Semicolon]);
    assert_eq!(tok_str("SELECT 0n;"),
        ["SELECT", "0n", ";"]);
    assert_eq!(tok_typ("SELECT 0n;"),
-        [Keyword, BigIntConst, Semicolon]);
+        [keyword("select"), BigIntConst, Semicolon]);
    assert_eq!(tok_str("SELECT 1n;"),
        ["SELECT", "1n", ";"]);
    assert_eq!(tok_typ("SELECT 1n;"),
-        [Keyword, BigIntConst, Semicolon]);
+        [keyword("select"), BigIntConst, Semicolon]);
    assert_eq!(tok_str("SELECT -1n;"),
        ["SELECT", "-", "1n", ";"]);
    assert_eq!(tok_typ("SELECT -1n;"),
-        [Keyword, Sub, BigIntConst, Semicolon]);
+        [keyword("select"), Sub, BigIntConst, Semicolon]);
    assert_eq!(tok_str("SELECT 100000n;"),
        ["SELECT", "100000n", ";"]);
    assert_eq!(tok_typ("SELECT 100000n;"),
-        [Keyword, BigIntConst, Semicolon]);
+        [keyword("select"), BigIntConst, Semicolon]);
    assert_eq!(tok_str("SELECT -100000n;"),
        ["SELECT", "-", "100000n", ";"]);
    assert_eq!(tok_typ("SELECT -100000n;"),
-        [Keyword, Sub, BigIntConst, Semicolon]);
+        [keyword("select"), Sub, BigIntConst, Semicolon]);
    assert_eq!(tok_str("SELECT -354.32n;"),
        ["SELECT", "-", "354.32n", ";"]);
    assert_eq!(tok_typ("SELECT -354.32n;"),
-        [Keyword, Sub, DecimalConst, Semicolon]);
+        [keyword("select"), Sub, DecimalConst, Semicolon]);
    assert_eq!(tok_str("SELECT 35400000000000.32n;"),
        ["SELECT", "35400000000000.32n", ";"]);
    assert_eq!(tok_typ("SELECT 35400000000000.32n;"),
-        [Keyword, DecimalConst, Semicolon]);
+        [keyword("select"), DecimalConst, Semicolon]);
    assert_eq!(tok_str("SELECT -35400000000000000000.32n;"),
        ["SELECT", "-", "35400000000000000000.32n", ";"]);
    assert_eq!(tok_typ("SELECT -35400000000000000000.32n;"),
-        [Keyword, Sub, DecimalConst, Semicolon]);
+        [keyword("select"), Sub, DecimalConst, Semicolon]);
    assert_eq!(tok_str("SELECT 3.5432e20n;"),
        ["SELECT", "3.5432e20n", ";"]);
    assert_eq!(tok_typ("SELECT 3.5432e20n;"),
-        [Keyword, DecimalConst, Semicolon]);
+        [keyword("select"), DecimalConst, Semicolon]);
    assert_eq!(tok_str("SELECT -3.5432e+20n;"),
        ["SELECT", "-", "3.5432e+20n", ";"]);
    assert_eq!(tok_typ("SELECT -3.5432e+20n;"),
-        [Keyword, Sub, DecimalConst, Semicolon]);
+        [keyword("select"), Sub, DecimalConst, Semicolon]);
    assert_eq!(tok_str("SELECT 3.5432e-20n;"),
        ["SELECT", "3.5432e-20n", ";"]);
    assert_eq!(tok_typ("SELECT 3.5432e-20n;"),
-        [Keyword, DecimalConst, Semicolon]);
+        [keyword("select"), DecimalConst, Semicolon]);
    assert_eq!(tok_str("SELECT 354.32e-20n;"),
        ["SELECT", "354.32e-20n", ";"]);
    assert_eq!(tok_typ("SELECT 354.32e-20n;"),
-        [Keyword, DecimalConst, Semicolon]);
+        [keyword("select"), DecimalConst, Semicolon]);
 }

 #[test]
@ -598,7 +608,7 @@ fn strings() {
    assert_eq!(tok_str(r#" rb'hello'  "#), [r#"rb'hello'"#]);
    assert_eq!(tok_typ(r#" rb'hello'  "#), [BinStr]);
    assert_eq!(tok_str(r#" `hello`  "#), [r#"`hello`"#]);
-    assert_eq!(tok_typ(r#" `hello`  "#), [BacktickName]);
+    assert_eq!(tok_typ(r#" `hello`  "#), [Ident]);

    assert_eq!(tok_str(r#" "hello""#), [r#""hello""#]);
    assert_eq!(tok_typ(r#" "hello""#), [Str]);
@ -617,7 +627,7 @@ fn strings() {
    assert_eq!(tok_str(r#" rb'hello'"#), [r#"rb'hello'"#]);
    assert_eq!(tok_typ(r#" rb'hello'"#), [BinStr]);
    assert_eq!(tok_str(r#" `hello`"#), [r#"`hello`"#]);
-    assert_eq!(tok_typ(r#" `hello`"#), [BacktickName]);
+    assert_eq!(tok_typ(r#" `hello`"#), [Ident]);

    assert_eq!(tok_str(r#" "h\"ello" "#), [r#""h\"ello""#]);
    assert_eq!(tok_typ(r#" "h\"ello" "#), [Str]);
@ -636,9 +646,9 @@ fn strings() {
    assert_eq!(tok_str(r#" rb'hello\' "#), [r#"rb'hello\'"#]);
    assert_eq!(tok_typ(r#" rb'hello\' "#), [BinStr]);
    assert_eq!(tok_str(r#" `hello\` "#), [r#"`hello\`"#]);
-    assert_eq!(tok_typ(r#" `hello\` "#), [BacktickName]);
+    assert_eq!(tok_typ(r#" `hello\` "#), [Ident]);
    assert_eq!(tok_str(r#" `hel``lo` "#), [r#"`hel``lo`"#]);
-    assert_eq!(tok_typ(r#" `hel``lo` "#), [BacktickName]);
+    assert_eq!(tok_typ(r#" `hel``lo` "#), [Ident]);

    assert_eq!(tok_str(r#" "h'el`lo" "#), [r#""h'el`lo""#]);
    assert_eq!(tok_typ(r#" "h'el`lo" "#), [Str]);
@ -657,7 +667,7 @@ fn strings() {
    assert_eq!(tok_str(r#" rb'h"el`lo' "#), [r#"rb'h"el`lo'"#]);
    assert_eq!(tok_typ(r#" rb'h"el`lo' "#), [BinStr]);
    assert_eq!(tok_str(r#" `h'el"lo` "#), [r#"`h'el"lo`"#]);
-    assert_eq!(tok_typ(r#" `h'el"lo\` "#), [BacktickName]);
+    assert_eq!(tok_typ(r#" `h'el"lo\` "#), [Ident]);

    assert_eq!(tok_str(" \"hel\nlo\" "), ["\"hel\nlo\""]);
    assert_eq!(tok_typ(" \"hel\nlo\" "), [Str]);
@ -676,7 +686,7 @@ fn strings() {
    assert_eq!(tok_str(" rb'hel\nlo' "), ["rb'hel\nlo'"]);
    assert_eq!(tok_str(" br'hel\nlo' "), ["br'hel\nlo'"]);
    assert_eq!(tok_str(" `hel\nlo` "), ["`hel\nlo`"]);
-    assert_eq!(tok_typ(" `hel\nlo` "), [BacktickName]);
+    assert_eq!(tok_typ(" `hel\nlo` "), [Ident]);

    assert_eq!(tok_err(r#""hello"#),
        "unterminated string, quoted by `\"`");
@ -762,15 +772,15 @@ fn test_dollar() {
    assert_eq!(tok_str("select $$ something $$; x"),
                       ["select", "$$ something $$", ";", "x"]);
    assert_eq!(tok_typ("select $$ something $$; x"),
-                       [Keyword, Str, Semicolon, Ident]);
+                       [keyword("select"), Str, Semicolon, Ident]);
    assert_eq!(tok_str("select $a$ ; $b$ ; $b$ ; $a$; x"),
                       ["select", "$a$ ; $b$ ; $b$ ; $a$", ";", "x"]);
    assert_eq!(tok_typ("select $a$ ; $b$ ; $b$ ; $a$; x"),
-                       [Keyword, Str, Semicolon, Ident]);
+                       [keyword("select"), Str, Semicolon, Ident]);
    assert_eq!(tok_str("select $a$ ; $b$ ; $a$; x"),
                       ["select", "$a$ ; $b$ ; $a$", ";", "x"]);
    assert_eq!(tok_typ("select $a$ ; $b$ ; $a$; x"),
-                       [Keyword, Str, Semicolon, Ident]);
+                       [keyword("select"), Str, Semicolon, Ident]);
    assert_eq!(tok_err("select $$ ; $ab$ test;"),
        "unterminated string started with $$");
    assert_eq!(tok_err("select $a$ ; $$ test;"),
@ -782,24 +792,24 @@ fn test_dollar() {
    assert_eq!(tok_str("select $a$a$ ; $a$ test;"),
        ["select", "$a$a$ ; $a$", "test", ";"]);
    assert_eq!(tok_typ("select $a$a$ ; $a$ test;"),
-        [Keyword, Str, Ident, Semicolon]);
+        [keyword("select"), Str, Ident, Semicolon]);
    assert_eq!(tok_str("select $a+b; $b test; $a+b; $b ;"),
        ["select", "$a", "+", "b", ";", "$b", "test",
         ";", "$a", "+", "b", ";", "$b", ";"]);
    assert_eq!(tok_typ("select $a+b; $b test; $a+b; $b ;"),
-        [Keyword, Argument, Add, Ident, Semicolon, Argument, Ident,
+        [keyword("select"), Argument, Add, Ident, Semicolon, Argument, Ident,
         Semicolon, Argument, Add, Ident, Semicolon, Argument, Semicolon]);
    assert_eq!(tok_str("select $def x$y test; $def x$y"),
        ["select", "$def", "x", "$y", "test",
         ";", "$def", "x", "$y"]);
    assert_eq!(tok_typ("select $def x$y test; $def x$y"),
-        [Keyword, Argument, Ident, Argument, Ident,
+        [keyword("select"), Argument, Ident, Argument, Ident,
         Semicolon, Argument, Ident, Argument]);
    assert_eq!(tok_str("select $`x``y` + $0 + $`zz` + $1.2 + $фыва"),
        ["select", "$`x``y`", "+", "$0", "+", "$`zz`", "+", "$1", ".", "2",
         "+", "$фыва"]);
    assert_eq!(tok_typ("select $`x``y` + $0 + $`zz` + $1.2 + $фыва"),
-        [Keyword, Argument, Add, Argument, Add, Argument,
+        [keyword("select"), Argument, Add, Argument, Add, Argument,
         Add, Argument, Dot, IntConst, Add, Argument]);
    assert_eq!(tok_err(r#"$-"#),
        "bare $ is not allowed");
@ -831,11 +841,11 @@ fn test_substitution() {
    assert_eq!(tok_str("SELECT \\(expr);"),
                       ["SELECT", "\\(expr)", ";"]);
    assert_eq!(tok_typ("SELECT \\(expr);"),
-                       [Keyword, Substitution, Semicolon]);
+                       [keyword("select"), Substitution, Semicolon]);
    assert_eq!(tok_str("SELECT \\(other_Name1);"),
                       ["SELECT", "\\(other_Name1)", ";"]);
    assert_eq!(tok_typ("SELECT \\(other_Name1);"),
-                       [Keyword, Substitution, Semicolon]);
+                       [keyword("select"), Substitution, Semicolon]);
    assert_eq!(tok_err("SELECT \\(some-name);"),
        "only alphanumerics are allowed in \\(name) token");
    assert_eq!(tok_err("SELECT \\(some_name"),
--- a/edb/edgeql/parser/init.py
+++ b/edb/edgeql/parser/init.py
@ -21,6 +21,7 @@ from __future__ import annotations
 from typing import *

 import multiprocessing
+import json

 from edb import errors
 from edb.common import parsing
@ -29,7 +30,7 @@ from . import parser as qlparser
 from .. import ast as qlast
 from .. import tokenizer as qltokenizer

-EdgeQLParserBase = qlparser.EdgeQLParserBase
+EdgeQLParserBase = qlparser.EdgeQLParserSpec


 def append_module_aliases(tree, aliases):
@ -48,11 +49,9 @@ def append_module_aliases(tree, aliases):

 def parse_fragment(
    source: Union[qltokenizer.Source, str],
-    filename: Optional[str]=None,
+    filename: Optional[str] = None,
 ) -> qlast.Expr:
-    if isinstance(source, str):
-        source = qltokenizer.Source.from_string(source)
-    parser = qlparser.EdgeQLExpressionParser()
+    parser = qlparser.EdgeQLExpressionSpec().get_parser()
    res = parser.parse(source, filename=filename)
    assert isinstance(res, qlast.Expr)
    return res
@ -60,11 +59,9 @@ def parse_fragment(

 def parse_single(
    source: Union[qltokenizer.Source, str],
-    filename: Optional[str]=None,
+    filename: Optional[str] = None,
 ) -> qlast.Statement:
-    if isinstance(source, str):
-        source = qltokenizer.Source.from_string(source)
-    parser = qlparser.EdgeQLSingleParser()
+    parser = qlparser.EdgeQLSingleSpec().get_parser()
    res = parser.parse(source, filename=filename)
    assert isinstance(res, (qlast.Query | qlast.Command))
    return res
@ -106,9 +103,7 @@ def parse_command(


 def parse_block(source: Union[qltokenizer.Source, str]) -> List[qlast.Base]:
-    if isinstance(source, str):
-        source = qltokenizer.Source.from_string(source)
-    parser = qlparser.EdgeQLBlockParser()
+    parser = qlparser.EdgeQLBlockSpec().get_parser()
    return parser.parse(source)


@ -122,9 +117,8 @@ def parse_migration_body_block(
    # where the source contexts don't matter anyway.
    source = '{' + source + '}'

-    tsource = qltokenizer.Source.from_string(source)
-    parser = qlparser.EdgeQLMigrationBodyParser()
-    return parser.parse(tsource)
+    parser = qlparser.EdgeQLMigrationBodySpec().get_parser()
+    return parser.parse(source)


 def parse_extension_package_body_block(
@ -137,31 +131,30 @@ def parse_extension_package_body_block(
    # where the source contexts don't matter anyway.
    source = '{' + source + '}'

-    tsource = qltokenizer.Source.from_string(source)
-    parser = qlparser.EdgeQLExtensionPackageBodyParser()
-    return parser.parse(tsource)
+    parser = qlparser.EdgeQLExtensionPackageBodySpec().get_parser()
+    return parser.parse(source)


 def parse_sdl(expr: str):
-    parser = qlparser.EdgeSDLParser()
+    parser = qlparser.EdgeSDLSpec().get_parser()
    return parser.parse(expr)


-def _load_parser(parser: qlparser.EdgeQLParserBase) -> None:
+def _load_parser(parser: qlparser.EdgeQLParserSpec) -> None:
    parser.get_parser_spec(allow_rebuild=True)


 def preload(
    allow_rebuild: bool = True,
    paralellize: bool = False,
-    parsers: Optional[List[qlparser.EdgeQLParserBase]] = None,
+    parsers: Optional[List[qlparser.EdgeQLParserSpec]] = None,
 ) -> None:
    if parsers is None:
        parsers = [
-            qlparser.EdgeQLBlockParser(),
-            qlparser.EdgeQLSingleParser(),
-            qlparser.EdgeQLExpressionParser(),
-            qlparser.EdgeSDLParser(),
+            qlparser.EdgeQLBlockSpec(),
+            qlparser.EdgeQLSingleSpec(),
+            qlparser.EdgeQLExpressionSpec(),
+            qlparser.EdgeSDLSpec(),
        ]

    if not paralellize:
@ -188,3 +181,73 @@ def preload(
                pool.map(_load_parser, parsers_to_rebuild)

            preload(parsers=parsers, allow_rebuild=False)
+
+
+def process_spec(parser: parsing.ParserSpec) -> Tuple[str, List[Any]]:
+    # Converts a ParserSpec into JSON. Called from edgeql-parser Rust crate.
+
+    spec = parser.get_parser_spec()
+    assert spec.pureLR
+
+    token_map: Dict[str, str] = {
+        v._token: c for (_, c), v in parsing.TokenMeta.token_map.items()
+    }
+
+    # productions
+    productions: List[Any] = []
+    production_ids: Dict[Any, int] = {}
+    inlines: List[Tuple[int, int]] = []
+
+    def get_production_id(prod: Any) -> int:
+        if prod in production_ids:
+            return production_ids[prod]
+
+        id = len(productions)
+        productions.append(prod)
+        production_ids[prod] = id
+
+        inline = getattr(prod.method, 'inline_index', None)
+        if inline is not None:
+            assert isinstance(inline, int)
+            inlines.append((id, inline))
+
+        return id
+
+    actions = []
+    for st_actions in spec.actions():
+        out_st_actions = []
+        for tok, acts in st_actions.items():
+            act = cast(Any, acts[0])
+
+            str_tok = token_map.get(str(tok), str(tok))
+            if 'ShiftAction' in str(type(act)):
+                action_obj: Any = int(act.nextState)
+            else:
+                prod = act.production
+                action_obj = {
+                    'production_id': get_production_id(prod),
+                    'non_term': str(prod.lhs),
+                    'cnt': len(prod.rhs),
+                }
+
+            out_st_actions.append((str_tok, action_obj))
+
+        actions.append(out_st_actions)
+
+    # goto
+    goto = []
+    for st_goto in spec.goto():
+        out_goto = []
+        for nterm, action in st_goto.items():
+            out_goto.append((str(nterm), action))
+
+        goto.append(out_goto)
+
+    res = {
+        'actions': actions,
+        'goto': goto,
+        'start': str(spec.start_sym()),
+        'inlines': inlines,
+    }
+    res_json = json.dumps(res)
+    return (res_json, productions)
--- a/edb/edgeql/parser/grammar/rust_lexer.py
+++ b/edb/edgeql/parser/grammar/rust_lexer.py
@ -1,53 +0,0 @@
-#
-# This source file is part of the EdgeDB open source project.
-#
-# Copyright 2020-present MagicStack Inc. and the EdgeDB authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
-from __future__ import annotations
-from typing import *
-
-from collections import deque
-
-from edb.edgeql import tokenizer
-from edb import _edgeql_parser as ql_parser
-
-
-class EdgeQLLexer:
-    inputstr: str
-    tokens: Optional[Deque[ql_parser.Token]]
-    filename: Optional[str]
-    end_of_input: Tuple[int, int, int]
-
-    def __init__(self):
-        self.filename = None  # TODO
-
-    def setinputstr(
-        self,
-        source: Union[str, tokenizer.Source],
-        filename: Optional[str]=None,
-    ) -> None:
-        if isinstance(source, str):
-            source = tokenizer.Source.from_string(source)
-
-        self.inputstr = source.text()
-        self.filename = filename
-        self.tokens = deque(source.tokens())
-        self.end_of_input = self.tokens[-1].end()
-
-    def token(self) -> ql_parser.Token:
-        if self.tokens:
-            return self.tokens.popleft()
--- a/edb/edgeql/parser/grammar/tokens.py
+++ b/edb/edgeql/parser/grammar/tokens.py
@ -174,23 +174,23 @@ class T_PIPE(Token, lextoken='|'):
    pass


-class T_NAMEDONLY(Token):
+class T_NAMEDONLY(Token, lextoken='named only'):
    pass


-class T_SETANNOTATION(Token):
+class T_SETANNOTATION(Token, lextoken='set annotation'):
    pass


-class T_SETTYPE(Token):
+class T_SETTYPE(Token, lextoken='set type'):
    pass


-class T_EXTENSIONPACKAGE(Token):
+class T_EXTENSIONPACKAGE(Token, lextoken='extension package'):
    pass


-class T_ORDERBY(Token):
+class T_ORDERBY(Token, lextoken='order by'):
    pass


--- a/edb/edgeql/parser/parser.py
+++ b/edb/edgeql/parser/parser.py
@ -18,297 +18,176 @@

 from __future__ import annotations

+from typing import *
+
 from edb import errors
-from edb.common import debug, parsing
-from edb.common import context as pctx
-from edb.common.english import add_a as a
+from edb.common import parsing

-from .grammar import rust_lexer, tokens
-from .grammar import expressions as gr_exprs
-from .grammar import commondl as gr_commondl
-from .grammar import keywords as gr_keywords
+from .. import tokenizer
+
+import edb._edgeql_parser as ql_parser


-class EdgeQLParserBase(parsing.Parser):
-    def get_debug(self):
-        return debug.flags.edgeql_parser
-
-    def get_exception(self, native_err, context, token=None):
-        msg = native_err.args[0]
-        details = None
-        hint = None
-
-        if isinstance(native_err, errors.EdgeQLSyntaxError):
-            return native_err
-        else:
-            if msg.startswith('Unexpected token: '):
-                token = token or getattr(native_err, 'token', None)
-                token_kind = token.kind()
-                ltok = self.parser._stack[-1][0]
-
-                is_reserved = (
-                    token.text().lower()
-                    in gr_keywords.by_type[gr_keywords.RESERVED_KEYWORD]
-                )
-
-                # Look at the parsing stack and use tokens and
-                # non-terminals to infer the parser rule when the
-                # error occurred.
-                i, rule = self._get_rule()
-
-                if not token or token_kind == 'EOF':
-                    msg = 'Unexpected end of line'
-                elif (
-                    rule == 'shape' and
-                    token_kind == 'IDENT' and
-                    isinstance(ltok, parsing.Nonterm)
-                ):
-                    # Make sure that the previous element in the stack
-                    # is some kind of Nonterminal, because if it's
-                    # not, this is probably not an issue of a missing
-                    # COMMA.
-                    hint = (f"It appears that a ',' is missing in {a(rule)} "
-                            f"before {token.text()!r}")
-                elif (
-                    rule == 'list of arguments' and
-                    # The stack is like <NodeName> LPAREN <AnyIdentifier>
-                    i == 1 and
-                    isinstance(ltok, (gr_exprs.AnyIdentifier,
-                                      tokens.T_WITH,
-                                      tokens.T_SELECT,
-                                      tokens.T_FOR,
-                                      tokens.T_INSERT,
-                                      tokens.T_UPDATE,
-                                      tokens.T_DELETE))
-                ):
-                    hint = ("Missing parentheses around statement used "
-                            "as an expression")
-                    # We want the error context correspond to the
-                    # statement keyword
-                    context = ltok.context
-                    token = None
-                elif (
-                    rule == 'array slice' and
-                    # The offending token was something that could
-                    # make an expression
-                    token_kind in {'IDENT', 'ICONST'} and
-                    not isinstance(ltok, tokens.T_COLON)
-                ):
-                    hint = (f"It appears that a ':' is missing in {a(rule)} "
-                            f"before {token.text()!r}")
-                elif (
-                    rule in {'list of arguments', 'tuple', 'array'} and
-                    # The offending token was something that could
-                    # make an expression
-                    token_kind in {
-                        'IDENT', 'TRUE', 'FALSE',
-                        'ICONST', 'FCONST', 'NICONST', 'NFCONST',
-                        'BCONST', 'SCONST',
-                    } and
-                    not isinstance(ltok, tokens.T_COMMA)
-                ):
-                    hint = (f"It appears that a ',' is missing in {a(rule)} "
-                            f"before {token.text()!r}")
-                elif (
-                    rule == 'definition' and
-                    token_kind == 'IDENT'
-                ):
-                    # Something went wrong in a definition, so check
-                    # if the last successful token is a keyword.
-                    if (
-                        isinstance(ltok, gr_exprs.Identifier) and
-                        ltok.val.upper() == 'INDEX'
-                    ):
-                        msg = (f"Expected 'ON', but got {token.text()!r} "
-                               f"instead")
-                    else:
-                        msg = f'Unexpected {token.text()!r}'
-                elif rule == 'for iterator':
-                    msg = ("Missing parentheses around complex expression in "
-                           "a FOR iterator clause")
-
-                    if i > 0:
-                        context = pctx.merge_context([
-                            self.parser._stack[-i][0].context, context,
-                        ])
-                    token = None
-                elif hasattr(token, 'val'):
-                    msg = f'Unexpected {token.val!r}'
-                elif token_kind == 'NL':
-                    msg = 'Unexpected end of line'
-                elif token.text() == "explain":
-                    msg = f'Unexpected keyword {token.text()!r}'
-                    hint = f'Use `analyze` to show query performance details'
-                elif is_reserved and not isinstance(ltok, gr_exprs.Expr):
-                    # Another token followed by a reserved keyword:
-                    # likely an attempt to use keyword as identifier
-                    msg = f'Unexpected keyword {token.text()!r}'
-                    details = (
-                        f'Token {token.text()!r} is a reserved keyword and'
-                        f' cannot be used as an identifier'
-                    )
-                    hint = (
-                        f'Use a different identifier or quote the name with'
-                        f' backticks: `{token.text()}`'
-                    )
-                else:
-                    msg = f'Unexpected {token.text()!r}'
-
-        return errors.EdgeQLSyntaxError(
-            msg, details=details, hint=hint, context=context, token=token)
-
-    def _get_rule(self):
-        ltok = self.parser._stack[-1][0]
-        # Look at the parsing stack and use tokens and non-terminals
-        # to infer the parser rule when the error occurred.
-        rule = ''
-
-        def _matches_for(i):
-            return (
-                len(self.parser._stack) >= i + 3
-                and isinstance(self.parser._stack[-3 - i][0], tokens.T_FOR)
-                and isinstance(
-                    self.parser._stack[-2 - i][0], gr_exprs.Identifier)
-                and isinstance(self.parser._stack[-1 - i][0], tokens.T_IN)
-            )
-
-        # Check if we're in the `FOR x IN <bad_token>` situation
-        if (
-            len(self.parser._stack) >= 4
-            and isinstance(self.parser._stack[-2][0], tokens.T_RANGBRACKET)
-            and isinstance(self.parser._stack[-3][0], gr_exprs.FullTypeExpr)
-            and isinstance(self.parser._stack[-4][0], tokens.T_LANGBRACKET)
-            and _matches_for(4)
-        ):
-            return 4, 'for iterator'
-
-        if (
-            len(self.parser._stack) >= 2
-            and isinstance(self.parser._stack[-2][0], gr_exprs.AtomicExpr)
-            and _matches_for(2)
-        ):
-            return 2, 'for iterator'
-
-        if (
-            len(self.parser._stack) >= 1
-            and isinstance(self.parser._stack[-1][0], gr_exprs.BaseAtomicExpr)
-            and _matches_for(1)
-        ):
-            return 1, 'for iterator'
-
-        if _matches_for(0):
-            return 0, 'for iterator'
-
-        # If the last valid token was a closing brace/parent/bracket,
-        # so we need to find a match for it before deciding what rule
-        # context we're in.
-        need_match = isinstance(ltok, (tokens.T_RBRACE,
-                                       tokens.T_RPAREN,
-                                       tokens.T_RBRACKET))
-        nextel = None
-        for i, (el, _) in enumerate(reversed(self.parser._stack)):
-            if isinstance(el, tokens.Token):
-                # We'll need the element right before "{", "[", or "(".
-                prevel = self.parser._stack[-2 - i][0]
-
-                if isinstance(el, tokens.T_LBRACE):
-                    if need_match and isinstance(ltok,
-                                                 tokens.T_RBRACE):
-                        # This is matched, while we're looking
-                        # for unmatched braces.
-                        need_match = False
-                        continue
-
-                    elif isinstance(prevel, gr_commondl.OptExtending):
-                        # This is some SDL/DDL
-                        rule = 'definition'
-                    elif (
-                        isinstance(prevel, gr_exprs.Expr) or
-                        (
-                            isinstance(prevel, tokens.T_COLON) and
-                            isinstance(self.parser._stack[-3 - i][0],
-                                       gr_exprs.ShapePointer)
-                        )
-                    ):
-                        # This is some kind of shape.
-                        rule = 'shape'
-                    break
-                elif isinstance(el, tokens.T_LPAREN):
-                    if need_match and isinstance(ltok,
-                                                 tokens.T_RPAREN):
-                        # This is matched, while we're looking
-                        # for unmatched parentheses.
-                        need_match = False
-                        continue
-                    elif isinstance(prevel, gr_exprs.NodeName):
-                        rule = 'list of arguments'
-                    elif isinstance(nextel, (tokens.T_FOR,
-                                             tokens.T_SELECT,
-                                             tokens.T_UPDATE,
-                                             tokens.T_DELETE,
-                                             tokens.T_INSERT,
-                                             tokens.T_FOR)):
-                        # A parenthesized subquery expression,
-                        # we should leave the error as is.
-                        break
-                    else:
-                        rule = 'tuple'
-                    break
-                elif isinstance(el, tokens.T_LBRACKET):
-                    if need_match and isinstance(ltok,
-                                                 tokens.T_RBRACKET):
-                        # This is matched, while we're looking
-                        # for unmatched brackets.
-                        need_match = False
-                        continue
-                    # This is either an array literal or
-                    # array index.
-                    elif isinstance(prevel, gr_exprs.Expr):
-                        rule = 'array slice'
-                    else:
-                        rule = 'array'
-                    break
-
-            # Also keep track of the element right after current.
-            nextel = el
-
-        return i, rule
-
-    def get_lexer(self):
-        return rust_lexer.EdgeQLLexer()
+class EdgeQLParserSpec(parsing.ParserSpec):
+    def get_parser(self):
+        return EdgeQLParser(self)


-class EdgeQLSingleParser(EdgeQLParserBase):
+class EdgeQLSingleSpec(EdgeQLParserSpec):
    def get_parser_spec_module(self):
        from .grammar import single
+
        return single


-class EdgeQLExpressionParser(EdgeQLParserBase):
+class EdgeQLExpressionSpec(EdgeQLParserSpec):
    def get_parser_spec_module(self):
        from .grammar import fragment
+
        return fragment


-class EdgeQLBlockParser(EdgeQLParserBase):
+class EdgeQLBlockSpec(EdgeQLParserSpec):
    def get_parser_spec_module(self):
        from .grammar import block
+
        return block


-class EdgeQLMigrationBodyParser(EdgeQLParserBase):
+class EdgeQLMigrationBodySpec(EdgeQLParserSpec):
    def get_parser_spec_module(self):
        from .grammar import migration_body
+
        return migration_body


-class EdgeQLExtensionPackageBodyParser(EdgeQLParserBase):
+class EdgeQLExtensionPackageBodySpec(EdgeQLParserSpec):
    def get_parser_spec_module(self):
        from .grammar import extension_package_body
+
        return extension_package_body


-class EdgeSDLParser(EdgeQLParserBase):
+class EdgeSDLSpec(EdgeQLParserSpec):
    def get_parser_spec_module(self):
        from .grammar import sdldocument
+
        return sdldocument
+
+
+class EdgeQLParser:
+    spec: EdgeQLParserSpec
+
+    filename: Optional[str]
+    source: tokenizer.Source
+
+    def __init__(self, p: EdgeQLParserSpec):
+        self.spec = p
+        self.filename = None
+
+        mod = self.spec.get_parser_spec_module()
+        self.token_map = {}
+        for (_, token), cls in mod.TokenMeta.token_map.items():
+            self.token_map[token] = cls
+
+    def get_parser_spec(self, allow_rebuild=False):
+        return self.spec.get_parser_spec(allow_rebuild=allow_rebuild)
+
+    def parse(
+        self,
+        source: Union[str, tokenizer.Source],
+        filename: Optional[str] = None,
+    ):
+        if isinstance(source, str):
+            source = tokenizer.Source.from_string(source)
+
+        self.filename = filename
+        self.source = source
+
+        parser_name = self.spec.__class__.__name__
+        result, productions = ql_parser.parse(parser_name, source.tokens())
+
+        if len(result.errors()) > 0:
+            # TODO: emit multiple errors
+
+            # Heuristic to pick the error:
+            # - first encountered,
+            # - Unexpected before Missing,
+            # - original order.
+            errs: List[Tuple[str, Tuple[int, Optional[int]]]] = result.errors()
+            errs.sort(key=lambda e: (e[1][0], -ord(e[0][1])))
+            error = errs[0]
+
+            message, span = error
+            position = tokenizer.inflate_position(source.text(), span)
+
+            raise errors.EdgeQLSyntaxError(message, position=position)
+
+        return self._cst_to_ast(result.out(), productions).val
+
+    def _cst_to_ast(
+        self, cst: ql_parser.CSTNode, productions: List[Callable]
+    ) -> Any:
+        # Converts CST into AST by calling methods from the grammar classes.
+        #
+        # This function was originally written as a simple recursion.
+        # Then I had to unfold it, because it was hitting recursion limit.
+        # Stack here contains all remaining things to do:
+        # - CST node means the node has to be processed and pushed onto the
+        #   result stack,
+        # - production means that all args of production have been processed
+        #   are are ready to be passed to the production method. The result is
+        #   obviously pushed onto the result stack
+
+        stack: List[ql_parser.CSTNode | ql_parser.Production] = [cst]
+        result: List[Any] = []
+
+        while len(stack) > 0:
+            node = stack.pop()
+
+            if isinstance(node, ql_parser.CSTNode):
+                # this would be the body of the original recursion function
+
+                if terminal := node.terminal():
+                    # Terminal is simple: just convert to parsing.Token
+                    context = parsing.ParserContext(
+                        name=self.filename,
+                        buffer=self.source.text(),
+                        start=terminal.start(),
+                        end=terminal.end(),
+                    )
+                    result.append(
+                        parsing.Token(
+                            terminal.text(), terminal.value(), context
+                        )
+                    )
+
+                elif production := node.production():
+                    # Production needs to first process all args, then
+                    # call the appropriate method.
+                    # (this is all in reverse, because stacks)
+                    stack.append(production)
+                    args = list(production.args())
+                    args.reverse()
+                    stack.extend(args)
+                else:
+                    raise NotImplementedError(node)
+
+            elif isinstance(node, ql_parser.Production):
+                # production args are done, get them out of result stack
+                len_args = len(node.args())
+                split_at = len(result) - len_args
+                args = result[split_at:]
+                result = result[0:split_at]
+
+                # find correct method to call
+                production_id = node.id()
+                production = productions[production_id]
+
+                sym = production.lhs.nontermType()
+                assert len(args) == len(production.rhs)
+                production.method(sym, *args)
+
+                # push into result stack
+                result.append(sym)
+        return result.pop()
--- a/edb/edgeql/tokenizer.py
+++ b/edb/edgeql/tokenizer.py
@ -31,7 +31,6 @@ TRAILING_WS_IN_CONTINUATION = re.compile(r'\\ \s+\n')


 class Source:
-
    def __init__(self, text: str, tokens: List[ql_parser.Token]) -> None:
        self._cache_key = hashlib.blake2b(text.encode('utf-8')).digest()
        self._text = text
@ -67,7 +66,6 @@ class Source:


 class NormalizedSource(Source):
-
    def __init__(self, normalized: ql_parser.Entry, text: str) -> None:
        self._text = text
        self._cache_key = normalized.key()
@ -103,32 +101,70 @@ class NormalizedSource(Source):
        return cls(_normalize(text), text)


+def inflate_span(
+    source: str, span: Tuple[int, Optional[int]]
+) -> Tuple[ql_parser.SourcePoint, ql_parser.SourcePoint]:
+    (start, end) = span
+    source_bytes = source.encode('utf-8')
+
+    [start_sp] = ql_parser.SourcePoint.from_offsets(source_bytes, [start])
+
+    if end is not None:
+        [end_sp] = ql_parser.SourcePoint.from_offsets(source_bytes, [end])
+    else:
+        end_sp = None
+
+    return (start_sp, end_sp)
+
+
+def inflate_position(
+    source: str, span: Tuple[int, Optional[int]]
+) -> Tuple[int, int, int, Optional[int]]:
+    (start, end) = inflate_span(source, span)
+    return (
+        start.column,
+        start.line,
+        start.offset,
+        end.offset if end else None,
+    )
+
+
 def _tokenize(eql: str) -> List[ql_parser.Token]:
-    try:
-        return ql_parser.tokenize(eql)
-    except ql_parser.TokenizerError as e:
-        message, position = e.args
+    result = ql_parser.tokenize(eql)
+
+    if len(result.errors()) > 0:
+        # TODO: emit multiple errors
+        error = result.errors()[0]
+
+        message, span = error
+        position = inflate_position(eql, span)
+
        hint = _derive_hint(eql, message, position)
-        raise errors.EdgeQLSyntaxError(
-            message, position=position, hint=hint) from e
+        raise errors.EdgeQLSyntaxError(message, position=position, hint=hint)
+
+    return result.out()


 def _normalize(eql: str) -> ql_parser.Entry:
    try:
        return ql_parser.normalize(eql)
-    except ql_parser.TokenizerError as e:
-        message, position = e.args
+    except ql_parser.SyntaxError as e:
+        message, span = e.args
+        position = inflate_position(eql, span)
+
        hint = _derive_hint(eql, message, position)
        raise errors.EdgeQLSyntaxError(
-            message, position=position, hint=hint) from e
+            message, position=position, hint=hint
+        ) from e


 def _derive_hint(
    input: str,
    message: str,
-    position: Tuple[int, int, int],
+    position: Tuple[int, int, int, Optional[int]],
 ) -> Optional[str]:
-    _, _, off = position
+    _, _, off, _ = position
+
    if message.endswith(
        r"invalid string literal: invalid escape sequence '\ '"
    ):
--- a/edb/errors/base.py
+++ b/edb/errors/base.py
@ -90,7 +90,7 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):
        hint: Optional[str] = None,
        details: Optional[str] = None,
        context=None,
-        position: Optional[tuple[Optional[int], ...]] = None,
+        position: Optional[tuple[int, int, int, int | None]] = None,
        filename: Optional[str] = None,
        token=None,
        pgext_code: Optional[str] = None,
@ -125,7 +125,7 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):
    def set_filename(self, filename):
        self._attrs[FIELD_FILENAME] = filename

-    def set_linecol(self, line, col):
+    def set_linecol(self, line: Optional[int], col: Optional[int]):
        if line is not None:
            self._attrs[FIELD_LINE_START] = str(line)
        if col is not None:
@ -143,7 +143,10 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):
    def has_source_context(self):
        return FIELD_DETAILS in self._attrs

-    def set_source_context(self, context):
+    def set_source_context(self, context: Optional[pctx.ParserContext]):
+        if not context:
+            return
+
        start = context.start_point
        end = context.end_point
        ex.replace_context(self, context)
@ -163,17 +166,14 @@ class EdgeDBError(Exception, metaclass=EdgeDBErrorMeta):

    def set_position(
        self,
-        line: Optional[int] = None,
-        column: Optional[int] = None,
-        start: Optional[int] = None,
-        end: Optional[int] = None,
+        column: int,
+        line: int,
+        start: int,
+        end: Optional[int],
    ):
        self.set_linecol(line, column)
-        if start is not None:
-            self._attrs[FIELD_POSITION_START] = str(start)
-        end = end or start
-        if end is not None:
-            self._attrs[FIELD_POSITION_END] = str(end)
+        self._attrs[FIELD_POSITION_START] = str(start)
+        self._attrs[FIELD_POSITION_END] = str(end or start)

    @property
    def line(self):
--- a/edb/testbase/lang.py
+++ b/edb/testbase/lang.py
@ -195,7 +195,6 @@ class BaseSyntaxTest(BaseDocTest):
            markup.dump(inast)

        # make sure that the AST has context
-        #
        context.ContextValidator().visit(inast)

        processed_src = self.ast_to_source(inast)
--- a/edb/tools/docs/eql.py
+++ b/edb/tools/docs/eql.py
@ -723,7 +723,7 @@ class EQLFunctionDirective(BaseEQLDirective):
        from edb.edgeql import codegen as ql_gen
        from edb.edgeql import qltypes

-        parser = edgeql_parser.EdgeQLBlockParser()
+        parser = edgeql_parser.EdgeQLBlockSpec().get_parser()
        try:
            astnode = parser.parse(
                f'create function {sig} using SQL function "xxx";')[0]
@ -796,7 +796,7 @@ class EQLConstraintDirective(BaseEQLDirective):
        from edb.edgeql import ast as ql_ast
        from edb.edgeql import codegen as ql_gen

-        parser = edgeql_parser.EdgeQLBlockParser()
+        parser = edgeql_parser.EdgeQLBlockSpec().get_parser()
        try:
            astnode = parser.parse(
                f'create abstract constraint {sig};')[0]
--- a/edb/tools/edb.py
+++ b/edb/tools/edb.py
@ -72,4 +72,5 @@ from . import wipe  # noqa
 from . import gen_test_dumps  # noqa
 from . import gen_sql_introspection  # noqa
 from . import gen_rust_ast  # noqa
+from . import parser_demo  # noqa
 from .profiling import cli as prof_cli  # noqa
--- a/edb/tools/parser_demo.py
+++ b/edb/tools/parser_demo.py
@ -0,0 +1,299 @@
+#
+# This source file is part of the EdgeDB open source project.
+#
+# Copyright 2020-present MagicStack Inc. and the EdgeDB authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import *
+
+from edb.edgeql import ast as qlast
+from edb.edgeql import tokenizer
+from edb.edgeql.parser import parser as qlparser
+
+import edb._edgeql_parser as ql_parser
+
+from edb.tools.edb import edbcommands
+
+
+@edbcommands.command("parser-demo")
+def main():
+    for q in QUERIES:
+        sdl = q.startswith('sdl')
+        if sdl:
+            q = q[3:]
+
+        try:
+            # s = tokenizer.NormalizedSource.from_string(q)
+            source = tokenizer.Source.from_string(q)
+        except BaseException as e:
+            print('Error during tokenization:')
+            print(e)
+            continue
+
+        if sdl:
+            spec = qlparser.EdgeSDLSpec()
+        else:
+            spec = qlparser.EdgeQLBlockSpec()
+        parser = spec.get_parser()
+
+        parser.filename = None
+        parser.source = source
+
+        parser_name = spec.__class__.__name__
+        result, productions = ql_parser.parse(parser_name, source.tokens())
+
+        print('-' * 30)
+        print()
+
+        for index, error in enumerate(result.errors()):
+            message, span = error
+            (start, end) = tokenizer.inflate_span(source.text(), span)
+
+            print(f'Error [{index+1}/{len(result.errors())}]:')
+            print(
+                '\n'.join(
+                    source.text().splitlines()[(start.line - 1) : end.line]
+                )
+            )
+            print(
+                ' ' * (start.column - 1)
+                + '^'
+                + '-' * (end.column - start.column - 1)
+                + ' '
+                + message
+            )
+            print()
+
+        if result.out():
+            try:
+                ast = parser._cst_to_ast(result.out(), productions).val
+            except BaseException:
+                ast = None
+            if ast:
+                print('Recovered AST:')
+                if isinstance(ast, list):
+                    for x in ast:
+                        x.dump_edgeql()
+                elif isinstance(ast, qlast.Base):
+                    ast.dump_edgeql()
+                else:
+                    print(ast)
+
+
+QUERIES = [
+    '''
+        select 1
+    ''',
+    '''
+        select User { name, email } filter .name = 'Sully'
+    ''',
+    '''
+        SELECT {354.32,
+            35400000000000.32,
+            35400000000000000000.32,
+            3.5432e20,
+            3.5432e+20,
+            3.5432e-20,
+            3.543_2e-20,
+            354.32e-20,
+            2_354.32e-20,
+            0e-999
+        }
+        ''',
+    '''
+        with module cards
+        for g in (group Card by .element) union (for gi in 0 union (
+            element := g.key.element,
+            cst := sum(g.elements.cost + gi),
+        ))
+        ''',
+    '''
+    select '10 seconds'
+    ''',
+    '''SELECT (User.id, User { name := ''',
+    '''SELECT (false, }]})''',
+    '''
+    SELECT User { name, last_name }
+    WITH u := User SELECT u;
+    ''',
+    '''
+    SELECT (false, true false])
+    ''',
+    '''
+    for c Card union c.hello
+    ''',
+    '''
+    SELECT User id, name }
+    ''',
+    '''
+    CREATE TYPE cfg::TestSessionConfig EXTENDING cfg::ConfigObject {
+        CREATE REQUIRED PROPERTY name -> std::str {
+            CREATE CONSTRAINT std::exclusive;
+        }
+    };
+    ''',
+    '''
+    CREATE FUNCTION
+    std::_gen_series(
+        `start`: std::int64,
+        stop: std::int64
+    ) -> SET OF std::int64
+    {
+        SET volatility := 'Immutable';
+        USING SQL FUNCTION 'generate_series';
+    };
+    ''',
+    '''
+    select b"04e3b";
+    ''',
+    '''
+    select User { intersect };
+    ''',
+    '''
+    create module __std__;
+    ''',
+    '''
+    create type Hello {
+        create property intersect -> str;
+        create property `__std__` -> str;
+    };
+    ''',
+    '''
+    SELECT
+    count(
+        schema::Module
+        FILTER NOT .builtin AND NOT .name = "default"
+    ) + count(
+        schema::Object
+        FILTER .name LIKE "default::%"
+    ) > 0
+    ''',
+    '''sdl
+    module test {
+        function len1(a: str b: str) ->  std::str {
+            using SQL function 'length1'
+        }
+    ''',
+    '''
+    SELECT len('');
+    ''',
+    '''
+    SELECT __std__::len({'hello', 'world'});
+    ''',
+    '''sdl
+    module test {
+        alias FooBaz := [1 2];
+    };
+    ''',
+    '''
+    SEL ECT 1
+    ''',
+    '''
+    SELECT (
+        foo: 1,
+        bar := 3
+    );
+    ''',
+    '''
+    SELECT (
+        foo: (
+            bar: 42
+        )
+    );
+    ''',
+    '''
+    SELECT count(FOR X IN {Foo} UNION X);
+    ''',
+    '''
+    SELECT some_agg(User.name) OVER (ORDER BY User.age ASC);
+    SELECT some_agg(User.name) OVER (
+        PARTITION BY strlen(User.name)
+        ORDER BY User.age ASC);
+    SELECT some_agg(User.name) OVER (
+        PARTITION BY User.email, User.age
+        ORDER BY User.age ASC);
+    SELECT some_agg(User.name) OVER (
+        PARTITION BY User.email, User.age
+        ORDER BY User.age ASC THEN User.name ASC);
+    ''',
+    '''
+    SELECT Issue{
+            name,
+            related_to *-1,
+        };
+    ''',
+    '''
+    SELECT __type__;
+    ''',
+    '''
+    SELECT Issue{
+        name,
+        related_to *,
+    };
+    ''',
+    '''
+    SELECT Foo {(bar)};
+    ''',
+    '''
+    SELECT Foo.__source__;
+    ''',
+    '''
+    SELECT Foo.bar@__type__;
+    ''',
+    '''
+    SELECT Foo {
+        __type__.name
+    };
+    ''',
+    '''
+    SELECT INTROSPECT tuple<int64>;
+    ''',
+    '''
+    CREATE FUNCTION std::strlen(string: std::str = '1', abc: std::str)
+            -> std::int64 {};
+    ''',
+    '''
+    SELECT Obj.n + random()
+    ''',
+    '''
+    CREATE MIGRATION { ;;; CREATE TYPE Foo ;;; CREATE TYPE Bar ;;; };
+    ''',
+    '''
+    SELECT (User IS (Named, Text));
+    ''',
+    '''sdl
+    module test {
+        scalar type foobar {
+            index prop on (__source__);
+        };
+    };
+    ''',
+    '''
+    INSERT Foo FILTER Foo.bar = 42;
+    ''',
+    '''sdl
+    module test {
+        function some_func($`(`: str = ) ) -> std::str {
+            using edgeql function 'some_other_func';
+        }
+    };
+    ''',
+    '''
+    SELECT (a := 1, foo);
+    ''',
+    '''
+    CREATE MODULE `__std__`;
+    ''',
+]
--- a/tests/test_edgeql_ddl.py
+++ b/tests/test_edgeql_ddl.py
@ -4336,7 +4336,7 @@ class TestEdgeQLDDL(tb.DDLTestCase):
    async def test_edgeql_ddl_function_20(self):
        with self.assertRaisesRegex(
                edgedb.EdgeQLSyntaxError,
-                r"Unexpected ';'"):
+                "Unexpected ';'"):

            await self.con.execute(r'''
                CREATE FUNCTION ddlf_20(f: int64) -> int64
@ -12337,7 +12337,7 @@ type default::Foo {
        with self.assertRaisesRegex(
            edgedb.SchemaDefinitionError,
            r"possibly more than one element returned by the index expression",
-            _line=4, _col=34
+            _line=4, _col=38
        ):
            await self.con.execute(r"""
                CREATE TYPE Foo {
@ -12350,7 +12350,7 @@ type default::Foo {
        with self.assertRaisesRegex(
            edgedb.SchemaDefinitionError,
            r"possibly more than one element returned by the index expression",
-            _line=5, _col=34
+            _line=5, _col=38
        ):
            await self.con.execute(r"""
                CREATE TYPE Foo {
@ -12364,7 +12364,7 @@ type default::Foo {
        with self.assertRaisesRegex(
            edgedb.SchemaDefinitionError,
            r"possibly more than one element returned by the index expression",
-            _line=5, _col=34
+            _line=5, _col=38
        ):
            await self.con.execute(r"""
                CREATE TYPE Foo {
@ -12770,7 +12770,7 @@ CREATE MIGRATION m14i24uhm6przo3bpl2lqndphuomfrtq3qdjaqdg6fza7h6m7tlbra
        # work, and there is a commented bit below to test that.
        async with self.assertRaisesRegexTx(
                edgedb.QueryError,
-                "Unexpected keyword 'global'"):
+                "Unexpected keyword 'GLOBAL'"):
            await self.con.execute('''
                CREATE MIGRATION
                {
--- a/tests/test_edgeql_explain.py
+++ b/tests/test_edgeql_explain.py
@ -175,7 +175,7 @@ class TestEdgeQLExplain(tb.QueryTestCase):
                    "contexts": [
                        {
                            "buffer_idx": 0,
-                            "end": 116,
+                            "end": 115,
                            "start": 74,
                        }
                    ],
@ -278,7 +278,7 @@ class TestEdgeQLExplain(tb.QueryTestCase):
                    "contexts": [
                        {
                            "buffer_idx": 0,
-                            "end": 174,
+                            "end": 173,
                            "start": 134,
                        },
                    ],
--- a/tests/test_edgeql_expressions.py
+++ b/tests/test_edgeql_expressions.py
@ -7238,7 +7238,7 @@ aa \
                edgedb.QueryError,
                r'possibly more than one element returned by an expression '
                r'where only singletons are allowed',
-                _position=29):
+                _position=35):

            await self.con.execute('''\
                SELECT Issue LIMIT LogEntry.spent_time;
@ -7249,7 +7249,7 @@ aa \
                edgedb.QueryError,
                r'possibly more than one element returned by an expression '
                r'where only singletons are allowed',
-                _position=29):
+                _position=36):

            await self.con.execute('''\
                SELECT Issue OFFSET LogEntry.spent_time;
@ -7695,7 +7695,7 @@ aa \
    async def test_edgeql_expr_error_after_extraction_01(self):
        with self.assertRaisesRegex(
                edgedb.QueryError,
-                "Unexpected \"'1'\""):
+                "Unexpected ''1''"):

            await self.con.query("""
                SELECT '''1''';
--- a/tests/test_edgeql_insert.py
+++ b/tests/test_edgeql_insert.py
@ -450,7 +450,7 @@ class TestInsert(tb.QueryTestCase):
    async def test_edgeql_insert_nested_07(self):
        with self.assertRaisesRegex(
                edgedb.EdgeQLSyntaxError,
-                "Unexpected 'Subordinate'"):
+                r"Missing '\{'"):
            await self.con.execute('''
                INSERT InsertTest {
                    subordinates: Subordinate {
--- a/tests/test_edgeql_ir_scopetree.py
+++ b/tests/test_edgeql_ir_scopetree.py
@ -54,7 +54,7 @@ class TestEdgeQLIRScopeTree(tb.BaseEdgeQLCompilerTest):

    @tb.must_fail(errors.QueryError,
                  "reference to 'User.name' changes the interpretation",
-                  line=3, col=9)
+                  line=3, col=16)
    def test_edgeql_ir_scope_tree_bad_01(self):
        """
        SELECT User.deck
@ -63,7 +63,7 @@ class TestEdgeQLIRScopeTree(tb.BaseEdgeQLCompilerTest):

    @tb.must_fail(errors.QueryError,
                  "reference to 'User' changes the interpretation",
-                  line=3, col=9)
+                  line=3, col=16)
    def test_edgeql_ir_scope_tree_bad_02(self):
        """
        SELECT User.deck
--- a/tests/test_edgeql_select.py
+++ b/tests/test_edgeql_select.py
@ -2227,7 +2227,7 @@ class TestEdgeQLSelect(tb.QueryTestCase):
            edgedb.QueryError,
            "cannot redefine the cardinality of link 'related_to': it is "
            "defined as 'multi' in the base object type 'default::Issue'",
-            _position=73,
+            _position=74,
        ):
            await self.con.execute("""
                SELECT Issue {
@ -2253,7 +2253,7 @@ class TestEdgeQLSelect(tb.QueryTestCase):
            edgedb.QueryError,
            "cannot redefine link 'status' as optional: it is "
            "defined as required in the base object type 'default::Issue'",
-            _position=71,
+            _position=72,
        ):
            await self.con.execute("""
                SELECT Issue {
--- a/tests/test_edgeql_syntax.py
+++ b/tests/test_edgeql_syntax.py
--- a/tests/test_edgeql_update.py
+++ b/tests/test_edgeql_update.py
@ -2228,7 +2228,7 @@ class TestUpdate(tb.QueryTestCase):
            edgedb.QueryError,
            "cannot update link 'readonly_tag': "
            "it is declared as read-only",
-            _position=147,
+            _position=148,
        ):
            await self.con.execute(r'''
                UPDATE UpdateTest
--- a/tests/test_edgeql_volatility.py
+++ b/tests/test_edgeql_volatility.py
@ -1711,7 +1711,7 @@ class TestEdgeQLVolatility(tb.QueryTestCase):
            with self.assertRaisesRegex(
                    edgedb.QueryError,
                    "can not take cross product of volatile operation",
-                    _position=36):
+                    _position=37):
                await self.con.execute(
                    r"""
                    SELECT {1,2} + (FOR x in {1,2,3} UNION (x*random()))
@ -1722,7 +1722,7 @@ class TestEdgeQLVolatility(tb.QueryTestCase):
            with self.assertRaisesRegex(
                    edgedb.QueryError,
                    "can not take cross product of volatile operation",
-                    _position=36):
+                    _position=37):
                await self.con.execute(
                    r"""
                    SELECT ({1,2}, (INSERT Obj { n := 100 }))
@ -1733,7 +1733,7 @@ class TestEdgeQLVolatility(tb.QueryTestCase):
            with self.assertRaisesRegex(
                    edgedb.QueryError,
                    "can not take cross product of volatile operation",
-                    _position=64):
+                    _position=65):
                await self.con.execute(
                    r"""
                    SELECT ({1,2},
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@ -396,7 +396,7 @@ class TestSchema(tb.BaseSchemaLoadTest):
    @tb.must_fail(errors.InvalidPropertyTargetError,
                  "invalid property type: expected a scalar type, "
                  "or a scalar collection, got object type 'test::Object'",
-                  position=73)
+                  position=74)
    def test_schema_bad_prop_02(self):
        """
            type Object {
@ -1433,7 +1433,7 @@ class TestSchema(tb.BaseSchemaLoadTest):

    @tb.must_fail(errors.SchemaDefinitionError,
                  "missing value for required property",
-                  line=9, col=42)
+                  line=10, col=25)
    def test_schema_rewrite_missing_required_01(self):
        """
            type Project {
--- a/tests/test_schema_syntax.py
+++ b/tests/test_schema_syntax.py
@ -36,7 +36,7 @@ class SchemaSyntaxTest(tb.BaseSyntaxTest):

    @classmethod
    def get_parser(cls):
-        return ql_parser.EdgeSDLParser()
+        return ql_parser.EdgeSDLSpec().get_parser()


 class TestEdgeSchemaParser(SchemaSyntaxTest):
@ -265,8 +265,8 @@ class TestEdgeSchemaParser(SchemaSyntaxTest):
        };
        """

-    @tb.must_fail(errors.EdgeQLSyntaxError, "Unexpected keyword 'Commit'",
-                  line=3, col=18)
+    @tb.must_fail(errors.EdgeQLSyntaxError, "Missing identifier",
+                  line=3, col=17)
    def test_eschema_syntax_type_11(self):
        """
        module test {
@ -748,7 +748,7 @@ class TestEdgeSchemaParser(SchemaSyntaxTest):
        """

    @tb.must_fail(errors.EdgeQLSyntaxError,
-                  r"Expected 'ON', but got 'prop' instead", line=4, col=23)
+                  r"Missing ':='", line=4, col=22)
    def test_eschema_syntax_index_03(self):
        """
        module test {
@ -757,6 +757,8 @@ class TestEdgeSchemaParser(SchemaSyntaxTest):
            };
        };
        """
+        # XXX: error recovery quality regression
+        #      Expected 'ON', but got 'prop' instead

    def test_eschema_syntax_index_04(self):
        """
@ -876,8 +878,8 @@ type LogEntry extending    OwnedObject,    Text {
        """

    @tb.must_fail(errors.EdgeQLSyntaxError,
-                  r"Unexpected 'scalar'",
-                  line=4, col=9)
+                  r"Missing ';'",
+                  line=2, col=55)
    def test_eschema_syntax_ws_03(self):
        """
        scalar type test::newScalarType0 extending str#:
@ -966,7 +968,7 @@ type LogEntry extending    OwnedObject,    Text {
        };
        """

-    @tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'final'",
+    @tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'FINAL'",
                  line=3, col=13)
    def test_eschema_syntax_scalar_07(self):
        """
@ -1043,7 +1045,7 @@ type LogEntry extending    OwnedObject,    Text {
        """

    @tb.must_fail(errors.EdgeQLSyntaxError,
-                  r"Unexpected 'delegated'",
+                  r"Unexpected keyword 'DELEGATED'",
                  line=3, col=13)
    def test_eschema_syntax_constraint_02(self):
        """
@ -1112,7 +1114,7 @@ type LogEntry extending    OwnedObject,    Text {
        """

    @tb.must_fail(errors.EdgeQLSyntaxError,
-                  r"Unexpected 'constraint'",
+                  r"Unexpected keyword 'CONSTRAINT'",
                  line=4, col=26)
    def test_eschema_syntax_constraint_07(self):
        """
@ -1135,7 +1137,7 @@ type LogEntry extending    OwnedObject,    Text {
        };
        """

-    @tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'constraint'",
+    @tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'CONSTRAINT'",
                  line=3, col=13)
    def test_eschema_syntax_constraint_09(self):
        """
@ -1198,7 +1200,7 @@ abstract property test::foo {
        };
        """

-    @tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'property'",
+    @tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'PROPERTY'",
                  line=3, col=13)
    def test_eschema_syntax_property_05(self):
        """
@ -1410,7 +1412,7 @@ abstract property test::foo {
        };
        """

-    @tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'link'",
+    @tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected keyword 'LINK'",
                  line=3, col=13)
    def test_eschema_syntax_link_11(self):
        """
@ -1626,7 +1628,7 @@ abstract property test::foo {
    def test_eschema_syntax_function_12(self):
        """
        module test {
-            function some_func($`(`: str = ) ) -> std::str {
+            function some_func($`(`: str = () ) -> std::str {
                using edgeql function 'some_other_func';
            }
        };
@ -1770,10 +1772,8 @@ abstract property test::foo {
        """

    @tb.must_fail(errors.EdgeQLSyntaxError,
-                  r'Unexpected token:.+b',
-                  hint=r"It appears that a ',' is missing in a list of "
-                       r"arguments before 'b'",
-                  line=3, col=34)
+                  r"Missing ','",
+                  line=3, col=33)
    def test_eschema_syntax_function_21(self):
        """
        module test {
@ -1834,10 +1834,8 @@ abstract property test::foo {
        """

    @tb.must_fail(errors.EdgeQLSyntaxError,
-                  r'Unexpected token:.+baz',
-                  hint=r"It appears that a ',' is missing in a shape "
-                       r"before 'baz'",
-                  line=5, col=17)
+                  r"Missing ','",
+                  line=4, col=25)
    def test_eschema_syntax_alias_04(self):
        """
        module test {
@ -1850,10 +1848,8 @@ abstract property test::foo {
        """

    @tb.must_fail(errors.EdgeQLSyntaxError,
-                  r'Unexpected token:.+2',
-                  hint=r"It appears that a ',' is missing in a tuple "
-                       r"before '2'",
-                  line=3, col=32)
+                  r"Missing ','",
+                  line=3, col=31)
    def test_eschema_syntax_alias_05(self):
        """
        module test {
@ -1862,10 +1858,8 @@ abstract property test::foo {
        """

    @tb.must_fail(errors.EdgeQLSyntaxError,
-                  r'Unexpected token:.+2',
-                  hint=r"It appears that a ',' is missing in an array "
-                       r"before '2'",
-                  line=3, col=32)
+                  r"Missing ','",
+                  line=3, col=31)
    def test_eschema_syntax_alias_06(self):
        """
        module test {
@ -1948,7 +1942,7 @@ abstract property test::foo {
        """

    @tb.must_fail(errors.EdgeQLSyntaxError,
-                  r"Unexpected keyword 'extending'", line=3, col=46)
+                  r"Unexpected keyword 'EXTENDING'", line=3, col=46)
    def test_eschema_syntax_annotation_14(self):
        """
        module test {
@ -1956,7 +1950,7 @@ abstract property test::foo {
        };
        """

-    @tb.must_fail(errors.EdgeQLSyntaxError, r"Unexpected 'annotation'",
+    @tb.must_fail(errors.EdgeQLSyntaxError, r"Missing keyword 'ABSTRACT'",
                  line=2, col=1)
    def test_eschema_syntax_annotation_15(self):
        """
--- a/tests/test_server_proto.py
+++ b/tests/test_server_proto.py
@ -109,11 +109,11 @@ class TestServerProto(tb.QueryTestCase):
                await self.con.query('select syntax error')

            with self.assertRaisesRegex(edgedb.EdgeQLSyntaxError,
-                                        'Unexpected end of line'):
+                                        r"Missing '\)'"):
                await self.con.query('select (')

            with self.assertRaisesRegex(edgedb.EdgeQLSyntaxError,
-                                        'Unexpected end of line'):
+                                        r"Missing '\)'"):
                await self.con.query_json('select (')

            for _ in range(10):