Combine all EdgeQL grammars into a single one (#6175)

This commit is contained in:
Aljaž Mur Eržen 2023-09-28 19:29:39 +02:00 committed by GitHub
parent 9b8b8f58ec
commit 7c1de21247
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 315 additions and 453 deletions

View file

@ -51,7 +51,7 @@ class TokenMeta(type):
if precedence_class is not None:
result._precedence_class = precedence_class
if name == 'Token':
if name == 'Token' or name == 'GrammarToken':
return result
if token is None:

View file

@ -57,7 +57,7 @@ py_module_initializer!(
m.add(
py,
"cache_spec",
py_fn!(py, cache_spec(grammar_name: &PyString, py_spec: &PyObject)),
py_fn!(py, cache_spec(py_spec: &PyObject)),
)?;
m.add(py, "CSTNode", py.get_type::<CSTNode>())?;
m.add(py, "Production", py.get_type::<Production>())?;

View file

@ -179,7 +179,8 @@ fn is_operator(token: &Token) -> bool {
| OpenBrace | CloseBrace | Dot | Semicolon | Colon | Add | Sub | Mul | Div | Modulo
| Pow | Less | Greater | Eq | Ampersand | Pipe | At => true,
DecimalConst | FloatConst | IntConst | BigIntConst | BinStr | Argument | Str
| BacktickName | Keyword(_) | Ident | Substitution | EOF | EOI | Epsilon => false,
| BacktickName | Keyword(_) | Ident | Substitution | EOF | EOI | Epsilon | StartBlock
| StartExtension | StartFragment | StartMigration | StartSDLDocument => false,
}
}

View file

@ -1,34 +1,22 @@
use std::collections::HashMap;
use std::sync::Mutex;
use std::sync::OnceLock;
use cpython::{
ObjectProtocol, PyClone, PyInt, PyList, PyObject, PyResult, PyString, PyTuple, Python,
PythonObject, PythonObjectWithCheckedDowncast, ToPyObject, PyNone,
ObjectProtocol, PyClone, PyInt, PyList, PyNone, PyObject, PyResult, PyString, PyTuple, Python,
PythonObject, PythonObjectWithCheckedDowncast, ToPyObject,
};
use edgeql_parser::parser;
use once_cell::sync::Lazy;
use crate::errors::{parser_error_into_tuple, ParserResult};
use crate::pynormalize::value_to_py_object;
use crate::tokenizer::OpaqueToken;
pub fn parse(py: Python, grammar_name: &PyString, tokens: PyObject) -> PyResult<PyTuple> {
let mut spec_cache = PARSER_SPECS.lock().unwrap();
pub fn parse(py: Python, start_token_name: &PyString, tokens: PyObject) -> PyResult<PyTuple> {
let start_token_name = start_token_name.to_string(py).unwrap();
let grammar_name_str = grammar_name.to_string(py)?;
let (spec, productions) = match spec_cache.get(grammar_name_str.as_ref()) {
Some(spec) => spec,
None => {
let parsing_mod = py.import("edb.common.parsing")?;
let load_parser_spec = parsing_mod.get(py, "load_parser_spec")?;
let grammar_mod = py.import(grammar_name_str.as_ref())?;
let py_spec = load_parser_spec.call(py, (grammar_mod,), None)?;
_load_spec(py, &mut spec_cache, grammar_name_str.as_ref(), &py_spec)?
},
};
let (spec, productions) = get_spec(py)?;
let tokens = downcast_tokens(py, tokens)?;
let tokens = downcast_tokens(py, &start_token_name, tokens)?;
let context = parser::Context::new(spec);
let (cst, errors) = parser::parse(&tokens, &context);
@ -90,39 +78,62 @@ py_class!(pub class Terminal |py| {
}
});
type ParserSpecs = HashMap<String, (parser::Spec, PyObject)>;
static PARSER_SPECS: OnceLock<(parser::Spec, PyObject)> = OnceLock::new();
static PARSER_SPECS: Lazy<Mutex<ParserSpecs>> = Lazy::new(|| Mutex::new(HashMap::new()));
fn downcast_tokens<'a>(py: Python, token_list: PyObject) -> PyResult<Vec<parser::Terminal>> {
fn downcast_tokens<'a>(
py: Python,
start_token_name: &str,
token_list: PyObject,
) -> PyResult<Vec<parser::Terminal>> {
let tokens = PyList::downcast_from(py, token_list)?;
let mut buf = Vec::with_capacity(tokens.len(py));
let mut buf = Vec::with_capacity(tokens.len(py) + 1);
buf.push(parser::Terminal::from_start_name(start_token_name));
for token in tokens.iter(py) {
let token = OpaqueToken::downcast_from(py, token)?;
let token = token.inner(py);
buf.push(parser::Terminal::from_token(token));
}
// adjust the span of the starting token for nicer error message spans
if buf.len() >= 2 {
buf[0].span.start = buf[1].span.start;
buf[0].span.end = buf[1].span.start;
}
Ok(buf)
}
pub fn cache_spec(
py: Python,
grammar_name: &PyString,
py_spec: &PyObject,
) -> PyResult<PyNone> {
let mut parser_specs = PARSER_SPECS.lock().unwrap();
_load_spec(py, &mut parser_specs, grammar_name.to_string(py)?.as_ref(), py_spec)?;
pub fn cache_spec(py: Python, py_spec: &PyObject) -> PyResult<PyNone> {
if PARSER_SPECS.get().is_some() {
return Ok(PyNone);
}
let x = load_spec(py, py_spec)?;
PARSER_SPECS.set(x).ok();
Ok(PyNone)
}
fn _load_spec<'a>(
py: Python,
specs: &'a mut ParserSpecs,
grammar_name: &str,
py_spec: &PyObject,
) -> PyResult<&'a (parser::Spec, PyObject)> {
fn get_spec(py: Python<'_>) -> Result<&(parser::Spec, PyObject), cpython::PyErr> {
if let Some(x) = PARSER_SPECS.get() {
return Ok(x);
}
let parsing_mod = py.import("edb.common.parsing")?;
let load_parser_spec = parsing_mod.get(py, "load_parser_spec")?;
let grammar_name = "edb.edgeql.parser.grammar.start";
let grammar_mod = py.import(grammar_name)?;
let py_spec = load_parser_spec.call(py, (grammar_mod,), None)?;
let x = load_spec(py, &py_spec)?;
PARSER_SPECS.set(x).ok();
Ok(PARSER_SPECS.get().unwrap())
}
fn load_spec(py: Python, py_spec: &PyObject) -> PyResult<(parser::Spec, PyObject)> {
let spec_to_json = py.import("edb.common.parsing")?.get(py, "spec_to_json")?;
let res = spec_to_json.call(py, (py_spec,), None)?;
@ -132,11 +143,8 @@ fn _load_spec<'a>(
let spec_json = spec_json.to_string(py).unwrap();
let spec = parser::Spec::from_json(&spec_json).unwrap();
let productions = res.get_item(py, 1);
let result = (spec, productions);
specs.insert(grammar_name.to_string(), result);
Ok(specs.get(grammar_name).unwrap())
Ok((spec, productions))
}
fn to_py_cst<'a>(cst: &'a parser::CSTNode<'a>, py: Python) -> PyResult<CSTNode> {

View file

@ -4,7 +4,7 @@ use append_only_vec::AppendOnlyVec;
use indexmap::IndexMap;
use crate::helpers::quote_name;
use crate::keywords::Keyword;
use crate::keywords::{self, Keyword};
use crate::position::Span;
use crate::tokenizer::{Error, Kind, Token, Value};
@ -81,7 +81,7 @@ pub fn parse<'a>(input: &'a [Terminal], ctx: &'a Context) -> (Option<&'a CSTNode
let injection = new_token_for_injection(*token_kind, ctx);
let cost = error_cost(token_kind);
let cost = injection_cost(token_kind);
let error = Error::new(format!("Missing {injection}")).with_span(gap_span);
inject.push_error(error, cost);
@ -519,12 +519,16 @@ const ERROR_COST_INJECT_MAX: u16 = 15;
const ERROR_COST_SKIP: u16 = 3;
const ERROR_COST_CUSTOM_ERROR: u16 = 3;
fn error_cost(kind: &Kind) -> u16 {
fn injection_cost(kind: &Kind) -> u16 {
use Kind::*;
match kind {
Ident => 9,
Substitution => 8,
// A few keywords that should not be injected since they result in
// confusing error messages.
Keyword(keywords::Keyword("delete" | "update" | "link")) => 100,
Keyword(_) => 10,
Dot => 5,
@ -576,6 +580,17 @@ impl Terminal {
is_placeholder: false,
}
}
#[cfg(feature = "serde")]
pub fn from_start_name(start_name: &str) -> Self {
Terminal {
kind: get_token_kind(start_name),
text: "".to_string(),
value: None,
span: Default::default(),
is_placeholder: false,
}
}
}
#[cfg(feature = "serde")]
@ -662,6 +677,12 @@ fn get_token_kind(token_name: &str) -> Kind {
"NICONST" => BigIntConst,
"SCONST" => Str,
"STARTBLOCK" => StartBlock,
"STARTEXTENSION" => StartExtension,
"STARTFRAGMENT" => StartFragment,
"STARTMIGRATION" => StartMigration,
"STARTSDLDOCUMENT" => StartSDLDocument,
"+=" => AddAssign,
"->" => Arrow,
":=" => Assign,

View file

@ -127,6 +127,12 @@ pub enum Kind {
EOF,
EOI, // <$> (needed for LR parser)
Epsilon, // <e> (needed for LR parser)
StartBlock,
StartExtension,
StartFragment,
StartMigration,
StartSDLDocument,
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]

View file

@ -16,20 +16,16 @@
# limitations under the License.
#
from __future__ import annotations
from typing import *
import importlib
import multiprocessing
import types
from edb import errors
from edb.common import parsing
import edb._edgeql_parser as rust_parser
from . import grammar as qlgrammar
from .grammar import tokens
from .. import ast as qlast
from .. import tokenizer as qltokenizer
@ -61,7 +57,7 @@ def parse_fragment(
source: Union[qltokenizer.Source, str],
filename: Optional[str] = None,
) -> qlast.Expr:
res = parse(qlgrammar.fragment, source, filename=filename)
res = parse(tokens.T_STARTFRAGMENT, source, filename=filename)
assert isinstance(res, qlast.Expr)
return res
@ -90,7 +86,7 @@ def parse_block(
source: qltokenizer.Source | str,
module_aliases: Optional[Mapping[Optional[str], str]] = None,
) -> list[qlast.Base]:
trees = parse(qlgrammar.block, source)
trees = parse(tokens.T_STARTBLOCK, source)
if module_aliases:
for tree in trees:
append_module_aliases(tree, module_aliases)
@ -105,7 +101,7 @@ def parse_migration_body_block(
# (without braces)", so we just hack around this by adding braces.
# This is only really workable because we only use this in a place
# where the source contexts don't matter anyway.
return parse(qlgrammar.migration_body, f"{{{source}}}")
return parse(tokens.T_STARTMIGRATION, f"{{{source}}}")
def parse_extension_package_body_block(
@ -116,22 +112,23 @@ def parse_extension_package_body_block(
# (without braces)", so we just hack around this by adding braces.
# This is only really workable because we only use this in a place
# where the source contexts don't matter anyway.
return parse(qlgrammar.extension_package_body, f"{{{source}}}")
return parse(tokens.T_STARTEXTENSION, f"{{{source}}}")
def parse_sdl(expr: str):
return parse(qlgrammar.sdldocument, expr)
return parse(tokens.T_STARTSDLDOCUMENT, expr)
def parse(
grammar: types.ModuleType,
start_token: Type[tokens.Token],
source: Union[str, qltokenizer.Source],
filename: Optional[str] = None,
):
if isinstance(source, str):
source = qltokenizer.Source.from_string(source)
result, productions = rust_parser.parse(grammar.__name__, source.tokens())
start_token_name = start_token.__name__[2:]
result, productions = rust_parser.parse(start_token_name, source.tokens())
if len(result.errors()) > 0:
# TODO: emit multiple errors
@ -242,56 +239,14 @@ def _cst_to_ast(
return result.pop()
def _load_parser(grammar: str) -> None:
specmod = importlib.import_module(grammar)
parsing.load_parser_spec(specmod, allow_rebuild=True)
def preload(
allow_rebuild: bool = True,
paralellize: bool = False,
grammars: Optional[list[types.ModuleType]] = None,
) -> None:
if grammars is None:
grammars = [
qlgrammar.block,
qlgrammar.fragment,
qlgrammar.sdldocument,
qlgrammar.extension_package_body,
qlgrammar.migration_body,
]
if not paralellize:
try:
for grammar in grammars:
spec = parsing.load_parser_spec(
grammar, allow_rebuild=allow_rebuild)
rust_parser.cache_spec(grammar.__name__, spec)
except parsing.ParserSpecIncompatibleError as e:
raise errors.InternalServerError(e.args[0]) from None
else:
parsers_to_rebuild = []
for grammar in grammars:
try:
spec = parsing.load_parser_spec(grammar, allow_rebuild=False)
rust_parser.cache_spec(grammar.__name__, spec)
except parsing.ParserSpecIncompatibleError:
parsers_to_rebuild.append(grammar)
if len(parsers_to_rebuild) == 0:
pass
elif len(parsers_to_rebuild) == 1:
spec = parsing.load_parser_spec(
parsers_to_rebuild[0], allow_rebuild=True)
rust_parser.cache_spec(parsers_to_rebuild[0].__name__, spec)
def preload(allow_rebuild: bool = False) -> None:
grammar = qlgrammar.start
try:
spec = parsing.load_parser_spec(grammar, allow_rebuild=False)
except parsing.ParserSpecIncompatibleError as e:
if allow_rebuild:
spec = parsing.load_parser_spec(grammar, allow_rebuild=True)
else:
with multiprocessing.Pool(len(parsers_to_rebuild)) as pool:
pool.map(
_load_parser,
[mod.__name__ for mod in parsers_to_rebuild],
)
raise errors.InternalServerError(e.args[0]) from None
for grammar in parsers_to_rebuild:
spec = parsing.load_parser_spec(grammar, allow_rebuild=False)
rust_parser.cache_spec(grammar.__name__, spec)
rust_parser.cache_spec(spec)

View file

@ -8,8 +8,4 @@
from __future__ import annotations
from . import block as block # noqa
from . import extension_package_body as extension_package_body # noqa
from . import fragment as fragment # noqa
from . import migration_body as migration_body # noqa
from . import sdldocument as sdldocument # noqa
from . import start as start # noqa

View file

@ -1,68 +0,0 @@
#
# This source file is part of the EdgeDB open source project.
#
# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations
from edb.common import parsing
from .expressions import Nonterm
from .precedence import * # NOQA
from .tokens import * # NOQA
from .statements import * # NOQA
from .ddl import * # NOQA
from .session import * # NOQA
from .config import * # NOQA
class SingleStatement(Nonterm):
@parsing.inline(0)
def reduce_Stmt(self, _):
# Expressions
pass
@parsing.inline(0)
def reduce_DDLStmt(self, _):
# Data definition commands
pass
@parsing.inline(0)
def reduce_SessionStmt(self, _):
# Session-local utility commands
pass
@parsing.inline(0)
def reduce_ConfigStmt(self, _):
# Configuration commands
pass
class StatementBlock(parsing.ListNonterm, element=SingleStatement,
separator=Semicolons): # NOQA, Semicolons are from .ddl
pass
class EdgeQLBlock(Nonterm):
"%start"
@parsing.inline(0)
def reduce_StatementBlock_OptSemicolons_EOF(self, _, _semicolon, _eof):
pass
def reduce_OptSemicolons_EOF(self, _semicolon, _eof):
self.val = []

View file

@ -1,36 +0,0 @@
#
# This source file is part of the EdgeDB open source project.
#
# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations
from edb.common import parsing
from .expressions import Nonterm
from .precedence import * # NOQA
from .tokens import * # NOQA
from .statements import * # NOQA
from .ddl import * # NOQA
class CreateExtensionPackageBody(Nonterm):
"%start"
@parsing.inline(0)
def reduce_CreateExtensionPackageCommandsBlock_EOF(self, *kids):
pass

View file

@ -1,39 +0,0 @@
#
# This source file is part of the EdgeDB open source project.
#
# Copyright 2023-present MagicStack Inc. and the EdgeDB authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations
from edb.common import parsing
from .expressions import Nonterm
from .expressions import * # NOQA
from .precedence import * # NOQA
from .tokens import * # NOQA
class ExpressionFragment(Nonterm):
"%start"
@parsing.inline(0)
def reduce_ExprStmt_EOF(self, *kids):
pass
@parsing.inline(0)
def reduce_Expr_EOF(self, *kids):
pass

View file

@ -1,36 +0,0 @@
#
# This source file is part of the EdgeDB open source project.
#
# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations
from edb.common import parsing
from .expressions import Nonterm
from .precedence import * # NOQA
from .tokens import * # NOQA
from .statements import * # NOQA
from .ddl import * # NOQA
class CreateMigrationBody(Nonterm):
"%start"
@parsing.inline(0)
def reduce_CreateMigrationCommandsBlock_EOF(self, *kids):
pass

View file

@ -1,61 +0,0 @@
#
# This source file is part of the EdgeDB open source project.
#
# Copyright 2019-present MagicStack Inc. and the EdgeDB authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations
from edb.edgeql import ast as qlast
from .expressions import Nonterm
from .sdl import * # NOQA
from . import commondl
class SDLDocument(Nonterm):
"%start"
def reduce_OptSemicolons_EOF(self, *kids):
self.val = qlast.Schema(declarations=[])
def reduce_statement_without_semicolons(self, *kids):
r"""%reduce \
OptSemicolons SDLShortStatement EOF
"""
declarations = [kids[1].val]
commondl._validate_declarations(declarations)
self.val = qlast.Schema(declarations=declarations)
def reduce_statements_without_optional_trailing_semicolons(self, *kids):
r"""%reduce \
OptSemicolons SDLStatements \
OptSemicolons SDLShortStatement EOF
"""
declarations = kids[1].val + [kids[3].val]
commondl._validate_declarations(declarations)
self.val = qlast.Schema(declarations=declarations)
def reduce_OptSemicolons_SDLStatements_EOF(self, *kids):
declarations = kids[1].val
commondl._validate_declarations(declarations)
self.val = qlast.Schema(declarations=declarations)
def reduce_OptSemicolons_SDLStatements_Semicolons_EOF(self, *kids):
declarations = kids[1].val
commondl._validate_declarations(declarations)
self.val = qlast.Schema(declarations=declarations)

View file

@ -0,0 +1,138 @@
#
# This source file is part of the EdgeDB open source project.
#
# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations
from edb.common import parsing
from edb.edgeql import ast as qlast
from . import commondl
from .expressions import Nonterm
from .precedence import * # NOQA
from .tokens import * # NOQA
from .statements import * # NOQA
from .ddl import * # NOQA
from .session import * # NOQA
from .config import * # NOQA
# The main EdgeQL grammar, all of whose productions should start with a
# GrammarToken, that determines the "subgrammar" to use.
#
# To add a new "subgrammar":
# - add a new GrammarToken in tokens.py,
# - add a new production here,
# - add a new token kind in tokenizer.rs,
# - add a mapping from the Python token name into the Rust token kind
# in parser.rs `fn get_token_kind`
class EdgeQLGrammar(Nonterm):
"%start"
@parsing.inline(1)
def reduce_STARTBLOCK_EdgeQLBlock_EOF(self, *kids):
pass
@parsing.inline(1)
def reduce_STARTEXTENSION_CreateExtensionPackageCommandsBlock_EOF(self, *k):
pass
@parsing.inline(1)
def reduce_STARTMIGRATION_CreateMigrationCommandsBlock_EOF(self, *kids):
pass
@parsing.inline(1)
def reduce_STARTFRAGMENT_ExprStmt_EOF(self, *kids):
pass
@parsing.inline(1)
def reduce_STARTFRAGMENT_Expr_EOF(self, *kids):
pass
@parsing.inline(1)
def reduce_STARTSDLDOCUMENT_SDLDocument(self, *kids):
pass
class EdgeQLBlock(Nonterm):
@parsing.inline(0)
def reduce_StatementBlock_OptSemicolons(self, _, _semicolon):
pass
def reduce_OptSemicolons(self, _semicolon):
self.val = []
class SingleStatement(Nonterm):
@parsing.inline(0)
def reduce_Stmt(self, _):
# Expressions
pass
@parsing.inline(0)
def reduce_DDLStmt(self, _):
# Data definition commands
pass
@parsing.inline(0)
def reduce_SessionStmt(self, _):
# Session-local utility commands
pass
@parsing.inline(0)
def reduce_ConfigStmt(self, _):
# Configuration commands
pass
class StatementBlock(
parsing.ListNonterm, element=SingleStatement, separator=commondl.Semicolons
): # NOQA, Semicolons are from .ddl
pass
class SDLDocument(Nonterm):
def reduce_OptSemicolons_EOF(self, *kids):
self.val = qlast.Schema(declarations=[])
def reduce_statement_without_semicolons(self, *kids):
r"""%reduce \
OptSemicolons SDLShortStatement EOF
"""
declarations = [kids[1].val]
commondl._validate_declarations(declarations)
self.val = qlast.Schema(declarations=declarations)
def reduce_statements_without_optional_trailing_semicolons(self, *kids):
r"""%reduce \
OptSemicolons SDLStatements \
OptSemicolons SDLShortStatement EOF
"""
declarations = kids[1].val + [kids[3].val]
commondl._validate_declarations(declarations)
self.val = qlast.Schema(declarations=declarations)
def reduce_OptSemicolons_SDLStatements_EOF(self, *kids):
declarations = kids[1].val
commondl._validate_declarations(declarations)
self.val = qlast.Schema(declarations=declarations)
def reduce_OptSemicolons_SDLStatements_Semicolons_EOF(self, *kids):
declarations = kids[1].val
commondl._validate_declarations(declarations)
self.val = qlast.Schema(declarations=declarations)

View file

@ -42,6 +42,36 @@ class Token(parsing.Token, metaclass=TokenMeta,
pass
class GrammarToken(Token):
"""
Instead of having different grammars, we prefix each query with a special
grammar token which directs the parser to appropriate grammar.
This greatly reduces the combined size of grammar specifications, since the
overlap between grammars is substantial.
"""
class T_STARTBLOCK(GrammarToken):
pass
class T_STARTEXTENSION(GrammarToken):
pass
class T_STARTFRAGMENT(GrammarToken):
pass
class T_STARTMIGRATION(GrammarToken):
pass
class T_STARTSDLDOCUMENT(GrammarToken):
pass
class T_DOT(Token, lextoken='.'):
pass

View file

@ -115,7 +115,7 @@ def main(get_handler):
parser.add_argument("--version-serial", type=int)
args = parser.parse_args()
ql_parser.preload(allow_rebuild=devmode.is_in_dev_mode(), paralellize=True)
ql_parser.preload(allow_rebuild=devmode.is_in_dev_mode())
gc.freeze()
listen_for_debugger()

View file

@ -180,16 +180,8 @@ async def _init_cluster(
def _init_parsers():
# Initialize parsers that are used in the server process.
from edb.edgeql import parser as ql_parser
from edb.edgeql.parser import grammar as ql_grammar
ql_parser.preload(
allow_rebuild=devmode.is_in_dev_mode(),
paralellize=True,
grammars=[
ql_grammar.block,
ql_grammar.fragment,
]
)
ql_parser.preload(allow_rebuild=devmode.is_in_dev_mode())
async def _run_server(

View file

@ -22,7 +22,6 @@
from __future__ import annotations
from typing import *
import types
import typing
import functools
import os
@ -176,12 +175,29 @@ class BaseDocTest(unittest.TestCase, metaclass=DocTestMeta):
)
class BaseSyntaxTest(BaseDocTest):
class PreloadParserGrammarMixin:
pass
def should_preload_parser(
cases: Iterable[unittest.TestCase],
) -> bool:
for cas in cases:
if isinstance(cas, PreloadParserGrammarMixin):
return True
return False
def preload_parser() -> None:
qlparser.preload(allow_rebuild=True)
class BaseSyntaxTest(BaseDocTest, PreloadParserGrammarMixin):
ast_to_source: Optional[Any] = None
markup_dump_lexer: Optional[str] = None
@classmethod
def get_grammar(cls):
def get_grammar_token(cls) -> Type[qlgrammar.tokens.GrammarToken]:
raise NotImplementedError
def run_test(self, *, source, spec, expected=None):
@ -189,7 +205,7 @@ class BaseSyntaxTest(BaseDocTest):
if debug:
markup.dump_code(source, lexer=self.markup_dump_lexer)
inast = qlparser.parse(self.get_grammar(), source)
inast = qlparser.parse(self.get_grammar_token(), source)
if debug:
markup.dump(inast)
@ -207,59 +223,6 @@ class BaseSyntaxTest(BaseDocTest):
self.assert_equal(expected_src, processed_src)
class TestCasesSetup:
def __init__(self, grammars: list[types.ModuleType]) -> None:
self.grammars = grammars
def get_test_cases_setup(
cases: Iterable[unittest.TestCase],
) -> Optional[TestCasesSetup]:
grammars: List[types.ModuleType] = []
for case in cases:
if not hasattr(case, 'get_grammar'):
continue
grammar = case.get_grammar()
if not grammar:
continue
elif isinstance(grammar, list):
grammars.extend(grammar)
else:
grammars.append(grammar)
if not grammars:
return None
else:
return TestCasesSetup(set(grammars))
def run_test_cases_setup(setup: TestCasesSetup, jobs: int) -> None:
qlparser.preload(
grammars=setup.grammars,
allow_rebuild=True,
paralellize=jobs > 1,
)
class AstValueTest(BaseDocTest):
def run_test(self, *, source, spec=None, expected=None):
debug = bool(os.environ.get(self.parser_debug_flag))
if debug:
markup.dump_code(source, lexer=self.markup_dump_lexer)
inast = qlparser.parse(self.get_grammar(), source)
if debug:
markup.dump(inast)
for var in inast.definitions[0].variables:
asttype, val = expected[var.name]
self.assertIsInstance(var.value, asttype)
self.assertEqual(var.value.value, val)
_std_schema = None
_refl_schema = None
_schema_class_layout = None
@ -338,7 +301,7 @@ def new_compiler():
)
class BaseSchemaTest(BaseDocTest):
class BaseSchemaTest(BaseDocTest, PreloadParserGrammarMixin):
DEFAULT_MODULE = 'default'
SCHEMA: Optional[str] = None
@ -352,15 +315,6 @@ class BaseSchemaTest(BaseDocTest):
else:
cls.schema = _load_std_schema()
@classmethod
def get_grammar(cls):
return [
qlgrammar.block,
qlgrammar.fragment,
qlgrammar.sdldocument,
qlgrammar.extension_package_body,
]
@classmethod
def run_ddl(cls, schema, ddl, default_module=defines.DEFAULT_MODULE_ALIAS):
statements = edgeql.parse_block(ddl)

View file

@ -726,7 +726,7 @@ class EQLFunctionDirective(BaseEQLDirective):
try:
astnode = edgeql_parser.parse(
edgeql_grammar.block,
edgeql_grammar.tokens.T_STARTBLOCK,
f'create function {sig} using SQL function "xxx";')[0]
except Exception as ex:
raise self.error(
@ -800,8 +800,9 @@ class EQLConstraintDirective(BaseEQLDirective):
try:
astnode = edgeql_parser.parse(
edgeql_grammar.block,
f'create abstract constraint {sig};')[0]
edgeql_grammar.tokens.T_STARTBLOCK,
f'create abstract constraint {sig};'
)[0]
except Exception as ex:
raise self.error(
f'could not parse constraint signature {sig!r}') from ex

View file

@ -21,7 +21,7 @@ from typing import *
from edb.edgeql import ast as qlast
from edb.edgeql import tokenizer
from edb.edgeql import parser as qlparser
from edb.edgeql.parser import grammar as qlgrammar
from edb.edgeql.parser.grammar import tokens as qltokens
import edb._edgeql_parser as rust_parser
@ -30,7 +30,7 @@ from edb.tools.edb import edbcommands
@edbcommands.command("parser-demo")
def main():
for q in QUERIES:
for q in QUERIES[-10:]:
sdl = q.startswith('sdl')
if sdl:
q = q[3:]
@ -43,9 +43,10 @@ def main():
print(e)
continue
grammar = qlgrammar.sdldocument if sdl else qlgrammar.block
start_t = qltokens.T_STARTSDLDOCUMENT if sdl else qltokens.T_STARTBLOCK
start_t_name = start_t.__name__[2:]
tokens = source.tokens()
result, productions = rust_parser.parse(grammar.__name__, tokens)
result, productions = rust_parser.parse(start_t_name, tokens)
print('-' * 30)
print()
@ -313,4 +314,7 @@ QUERIES = [
'''
SELECT INTROSPECT tuple<int64>;
''',
'''
(SELECT User.name) OFFSET 2;
''',
]

View file

@ -824,7 +824,7 @@ class ParallelTextTestRunner:
)
setup = tb.get_test_cases_setup(cases)
server_used = tb.test_cases_use_server(cases)
lang_setup = tb_lang.get_test_cases_setup(cases)
preload_parser = tb_lang.should_preload_parser(cases)
bootstrap_time_taken = 0
tests_time_taken = 0
result = None
@ -863,8 +863,8 @@ class ParallelTextTestRunner:
os.environ["EDGEDB_SERVER_JWS_KEY_FILE"] = str(jwk_file)
if lang_setup:
tb_lang.run_test_cases_setup(lang_setup, jobs=self.num_workers)
if preload_parser:
tb_lang.preload_parser()
try:
if setup:

View file

@ -841,11 +841,7 @@ class build_parsers(setuptools.Command):
'alongside your pure Python modules')]
sources = [
"edb.edgeql.parser.grammar.block",
"edb.edgeql.parser.grammar.fragment",
"edb.edgeql.parser.grammar.sdldocument",
"edb.edgeql.parser.grammar.migration_body",
"edb.edgeql.parser.grammar.extension_package_body",
"edb.edgeql.parser.grammar.start",
]
def initialize_options(self):

View file

@ -24,7 +24,7 @@ from edb import errors
from edb.testbase import lang as tb
from edb.edgeql import generate_source as edgeql_to_source
from edb.edgeql.parser import grammar as edgeql_grammar
from edb.edgeql.parser import grammar as qlgrammar
from edb.tools import test
@ -35,8 +35,8 @@ class EdgeQLSyntaxTest(tb.BaseSyntaxTest):
ast_to_source = edgeql_to_source
@classmethod
def get_grammar(cls):
return edgeql_grammar.block
def get_grammar_token(cls):
return qlgrammar.tokens.T_STARTBLOCK
class TestEdgeQLParser(EdgeQLSyntaxTest):

View file

@ -24,7 +24,7 @@ from edb import errors
from edb.testbase import lang as tb
from edb.edgeql import generate_source
from edb.edgeql.parser import grammar as ql_grammar
from edb.edgeql.parser import grammar as qlgrammar
from edb.tools import test
@ -35,8 +35,8 @@ class SchemaSyntaxTest(tb.BaseSyntaxTest):
ast_to_source = functools.partial(generate_source, unsorted=True)
@classmethod
def get_grammar(cls):
return ql_grammar.sdldocument
def get_grammar_token(cls):
return qlgrammar.tokens.T_STARTSDLDOCUMENT
class TestEdgeSchemaParser(SchemaSyntaxTest):