mirror of
https://github.com/maxkratz/edgedb.git
synced 2024-09-16 18:59:05 +00:00
Combine all EdgeQL grammars into a single one (#6175)
This commit is contained in:
parent
9b8b8f58ec
commit
7c1de21247
24 changed files with 315 additions and 453 deletions
|
@ -51,7 +51,7 @@ class TokenMeta(type):
|
|||
if precedence_class is not None:
|
||||
result._precedence_class = precedence_class
|
||||
|
||||
if name == 'Token':
|
||||
if name == 'Token' or name == 'GrammarToken':
|
||||
return result
|
||||
|
||||
if token is None:
|
||||
|
|
|
@ -57,7 +57,7 @@ py_module_initializer!(
|
|||
m.add(
|
||||
py,
|
||||
"cache_spec",
|
||||
py_fn!(py, cache_spec(grammar_name: &PyString, py_spec: &PyObject)),
|
||||
py_fn!(py, cache_spec(py_spec: &PyObject)),
|
||||
)?;
|
||||
m.add(py, "CSTNode", py.get_type::<CSTNode>())?;
|
||||
m.add(py, "Production", py.get_type::<Production>())?;
|
||||
|
|
|
@ -179,7 +179,8 @@ fn is_operator(token: &Token) -> bool {
|
|||
| OpenBrace | CloseBrace | Dot | Semicolon | Colon | Add | Sub | Mul | Div | Modulo
|
||||
| Pow | Less | Greater | Eq | Ampersand | Pipe | At => true,
|
||||
DecimalConst | FloatConst | IntConst | BigIntConst | BinStr | Argument | Str
|
||||
| BacktickName | Keyword(_) | Ident | Substitution | EOF | EOI | Epsilon => false,
|
||||
| BacktickName | Keyword(_) | Ident | Substitution | EOF | EOI | Epsilon | StartBlock
|
||||
| StartExtension | StartFragment | StartMigration | StartSDLDocument => false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,34 +1,22 @@
|
|||
use std::collections::HashMap;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use cpython::{
|
||||
ObjectProtocol, PyClone, PyInt, PyList, PyObject, PyResult, PyString, PyTuple, Python,
|
||||
PythonObject, PythonObjectWithCheckedDowncast, ToPyObject, PyNone,
|
||||
ObjectProtocol, PyClone, PyInt, PyList, PyNone, PyObject, PyResult, PyString, PyTuple, Python,
|
||||
PythonObject, PythonObjectWithCheckedDowncast, ToPyObject,
|
||||
};
|
||||
|
||||
use edgeql_parser::parser;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::errors::{parser_error_into_tuple, ParserResult};
|
||||
use crate::pynormalize::value_to_py_object;
|
||||
use crate::tokenizer::OpaqueToken;
|
||||
|
||||
pub fn parse(py: Python, grammar_name: &PyString, tokens: PyObject) -> PyResult<PyTuple> {
|
||||
let mut spec_cache = PARSER_SPECS.lock().unwrap();
|
||||
pub fn parse(py: Python, start_token_name: &PyString, tokens: PyObject) -> PyResult<PyTuple> {
|
||||
let start_token_name = start_token_name.to_string(py).unwrap();
|
||||
|
||||
let grammar_name_str = grammar_name.to_string(py)?;
|
||||
let (spec, productions) = match spec_cache.get(grammar_name_str.as_ref()) {
|
||||
Some(spec) => spec,
|
||||
None => {
|
||||
let parsing_mod = py.import("edb.common.parsing")?;
|
||||
let load_parser_spec = parsing_mod.get(py, "load_parser_spec")?;
|
||||
let grammar_mod = py.import(grammar_name_str.as_ref())?;
|
||||
let py_spec = load_parser_spec.call(py, (grammar_mod,), None)?;
|
||||
_load_spec(py, &mut spec_cache, grammar_name_str.as_ref(), &py_spec)?
|
||||
},
|
||||
};
|
||||
let (spec, productions) = get_spec(py)?;
|
||||
|
||||
let tokens = downcast_tokens(py, tokens)?;
|
||||
let tokens = downcast_tokens(py, &start_token_name, tokens)?;
|
||||
|
||||
let context = parser::Context::new(spec);
|
||||
let (cst, errors) = parser::parse(&tokens, &context);
|
||||
|
@ -90,39 +78,62 @@ py_class!(pub class Terminal |py| {
|
|||
}
|
||||
});
|
||||
|
||||
type ParserSpecs = HashMap<String, (parser::Spec, PyObject)>;
|
||||
static PARSER_SPECS: OnceLock<(parser::Spec, PyObject)> = OnceLock::new();
|
||||
|
||||
static PARSER_SPECS: Lazy<Mutex<ParserSpecs>> = Lazy::new(|| Mutex::new(HashMap::new()));
|
||||
|
||||
fn downcast_tokens<'a>(py: Python, token_list: PyObject) -> PyResult<Vec<parser::Terminal>> {
|
||||
fn downcast_tokens<'a>(
|
||||
py: Python,
|
||||
start_token_name: &str,
|
||||
token_list: PyObject,
|
||||
) -> PyResult<Vec<parser::Terminal>> {
|
||||
let tokens = PyList::downcast_from(py, token_list)?;
|
||||
|
||||
let mut buf = Vec::with_capacity(tokens.len(py));
|
||||
let mut buf = Vec::with_capacity(tokens.len(py) + 1);
|
||||
buf.push(parser::Terminal::from_start_name(start_token_name));
|
||||
for token in tokens.iter(py) {
|
||||
let token = OpaqueToken::downcast_from(py, token)?;
|
||||
let token = token.inner(py);
|
||||
|
||||
buf.push(parser::Terminal::from_token(token));
|
||||
}
|
||||
|
||||
// adjust the span of the starting token for nicer error message spans
|
||||
if buf.len() >= 2 {
|
||||
buf[0].span.start = buf[1].span.start;
|
||||
buf[0].span.end = buf[1].span.start;
|
||||
}
|
||||
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
pub fn cache_spec(
|
||||
py: Python,
|
||||
grammar_name: &PyString,
|
||||
py_spec: &PyObject,
|
||||
) -> PyResult<PyNone> {
|
||||
let mut parser_specs = PARSER_SPECS.lock().unwrap();
|
||||
_load_spec(py, &mut parser_specs, grammar_name.to_string(py)?.as_ref(), py_spec)?;
|
||||
pub fn cache_spec(py: Python, py_spec: &PyObject) -> PyResult<PyNone> {
|
||||
if PARSER_SPECS.get().is_some() {
|
||||
return Ok(PyNone);
|
||||
}
|
||||
|
||||
let x = load_spec(py, py_spec)?;
|
||||
PARSER_SPECS.set(x).ok();
|
||||
Ok(PyNone)
|
||||
}
|
||||
|
||||
fn _load_spec<'a>(
|
||||
py: Python,
|
||||
specs: &'a mut ParserSpecs,
|
||||
grammar_name: &str,
|
||||
py_spec: &PyObject,
|
||||
) -> PyResult<&'a (parser::Spec, PyObject)> {
|
||||
fn get_spec(py: Python<'_>) -> Result<&(parser::Spec, PyObject), cpython::PyErr> {
|
||||
if let Some(x) = PARSER_SPECS.get() {
|
||||
return Ok(x);
|
||||
}
|
||||
|
||||
let parsing_mod = py.import("edb.common.parsing")?;
|
||||
let load_parser_spec = parsing_mod.get(py, "load_parser_spec")?;
|
||||
|
||||
let grammar_name = "edb.edgeql.parser.grammar.start";
|
||||
let grammar_mod = py.import(grammar_name)?;
|
||||
let py_spec = load_parser_spec.call(py, (grammar_mod,), None)?;
|
||||
|
||||
let x = load_spec(py, &py_spec)?;
|
||||
|
||||
PARSER_SPECS.set(x).ok();
|
||||
Ok(PARSER_SPECS.get().unwrap())
|
||||
}
|
||||
|
||||
fn load_spec(py: Python, py_spec: &PyObject) -> PyResult<(parser::Spec, PyObject)> {
|
||||
let spec_to_json = py.import("edb.common.parsing")?.get(py, "spec_to_json")?;
|
||||
|
||||
let res = spec_to_json.call(py, (py_spec,), None)?;
|
||||
|
@ -132,11 +143,8 @@ fn _load_spec<'a>(
|
|||
let spec_json = spec_json.to_string(py).unwrap();
|
||||
let spec = parser::Spec::from_json(&spec_json).unwrap();
|
||||
let productions = res.get_item(py, 1);
|
||||
let result = (spec, productions);
|
||||
|
||||
specs.insert(grammar_name.to_string(), result);
|
||||
|
||||
Ok(specs.get(grammar_name).unwrap())
|
||||
Ok((spec, productions))
|
||||
}
|
||||
|
||||
fn to_py_cst<'a>(cst: &'a parser::CSTNode<'a>, py: Python) -> PyResult<CSTNode> {
|
||||
|
|
|
@ -4,7 +4,7 @@ use append_only_vec::AppendOnlyVec;
|
|||
use indexmap::IndexMap;
|
||||
|
||||
use crate::helpers::quote_name;
|
||||
use crate::keywords::Keyword;
|
||||
use crate::keywords::{self, Keyword};
|
||||
use crate::position::Span;
|
||||
use crate::tokenizer::{Error, Kind, Token, Value};
|
||||
|
||||
|
@ -81,7 +81,7 @@ pub fn parse<'a>(input: &'a [Terminal], ctx: &'a Context) -> (Option<&'a CSTNode
|
|||
|
||||
let injection = new_token_for_injection(*token_kind, ctx);
|
||||
|
||||
let cost = error_cost(token_kind);
|
||||
let cost = injection_cost(token_kind);
|
||||
let error = Error::new(format!("Missing {injection}")).with_span(gap_span);
|
||||
inject.push_error(error, cost);
|
||||
|
||||
|
@ -519,12 +519,16 @@ const ERROR_COST_INJECT_MAX: u16 = 15;
|
|||
const ERROR_COST_SKIP: u16 = 3;
|
||||
const ERROR_COST_CUSTOM_ERROR: u16 = 3;
|
||||
|
||||
fn error_cost(kind: &Kind) -> u16 {
|
||||
fn injection_cost(kind: &Kind) -> u16 {
|
||||
use Kind::*;
|
||||
|
||||
match kind {
|
||||
Ident => 9,
|
||||
Substitution => 8,
|
||||
|
||||
// A few keywords that should not be injected since they result in
|
||||
// confusing error messages.
|
||||
Keyword(keywords::Keyword("delete" | "update" | "link")) => 100,
|
||||
Keyword(_) => 10,
|
||||
|
||||
Dot => 5,
|
||||
|
@ -576,6 +580,17 @@ impl Terminal {
|
|||
is_placeholder: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
pub fn from_start_name(start_name: &str) -> Self {
|
||||
Terminal {
|
||||
kind: get_token_kind(start_name),
|
||||
text: "".to_string(),
|
||||
value: None,
|
||||
span: Default::default(),
|
||||
is_placeholder: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
|
@ -662,6 +677,12 @@ fn get_token_kind(token_name: &str) -> Kind {
|
|||
"NICONST" => BigIntConst,
|
||||
"SCONST" => Str,
|
||||
|
||||
"STARTBLOCK" => StartBlock,
|
||||
"STARTEXTENSION" => StartExtension,
|
||||
"STARTFRAGMENT" => StartFragment,
|
||||
"STARTMIGRATION" => StartMigration,
|
||||
"STARTSDLDOCUMENT" => StartSDLDocument,
|
||||
|
||||
"+=" => AddAssign,
|
||||
"->" => Arrow,
|
||||
":=" => Assign,
|
||||
|
|
|
@ -127,6 +127,12 @@ pub enum Kind {
|
|||
EOF,
|
||||
EOI, // <$> (needed for LR parser)
|
||||
Epsilon, // <e> (needed for LR parser)
|
||||
|
||||
StartBlock,
|
||||
StartExtension,
|
||||
StartFragment,
|
||||
StartMigration,
|
||||
StartSDLDocument,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
|
|
|
@ -16,20 +16,16 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
from typing import *
|
||||
|
||||
import importlib
|
||||
import multiprocessing
|
||||
import types
|
||||
|
||||
from edb import errors
|
||||
from edb.common import parsing
|
||||
|
||||
import edb._edgeql_parser as rust_parser
|
||||
|
||||
from . import grammar as qlgrammar
|
||||
from .grammar import tokens
|
||||
|
||||
from .. import ast as qlast
|
||||
from .. import tokenizer as qltokenizer
|
||||
|
@ -61,7 +57,7 @@ def parse_fragment(
|
|||
source: Union[qltokenizer.Source, str],
|
||||
filename: Optional[str] = None,
|
||||
) -> qlast.Expr:
|
||||
res = parse(qlgrammar.fragment, source, filename=filename)
|
||||
res = parse(tokens.T_STARTFRAGMENT, source, filename=filename)
|
||||
assert isinstance(res, qlast.Expr)
|
||||
return res
|
||||
|
||||
|
@ -90,7 +86,7 @@ def parse_block(
|
|||
source: qltokenizer.Source | str,
|
||||
module_aliases: Optional[Mapping[Optional[str], str]] = None,
|
||||
) -> list[qlast.Base]:
|
||||
trees = parse(qlgrammar.block, source)
|
||||
trees = parse(tokens.T_STARTBLOCK, source)
|
||||
if module_aliases:
|
||||
for tree in trees:
|
||||
append_module_aliases(tree, module_aliases)
|
||||
|
@ -105,7 +101,7 @@ def parse_migration_body_block(
|
|||
# (without braces)", so we just hack around this by adding braces.
|
||||
# This is only really workable because we only use this in a place
|
||||
# where the source contexts don't matter anyway.
|
||||
return parse(qlgrammar.migration_body, f"{{{source}}}")
|
||||
return parse(tokens.T_STARTMIGRATION, f"{{{source}}}")
|
||||
|
||||
|
||||
def parse_extension_package_body_block(
|
||||
|
@ -116,22 +112,23 @@ def parse_extension_package_body_block(
|
|||
# (without braces)", so we just hack around this by adding braces.
|
||||
# This is only really workable because we only use this in a place
|
||||
# where the source contexts don't matter anyway.
|
||||
return parse(qlgrammar.extension_package_body, f"{{{source}}}")
|
||||
return parse(tokens.T_STARTEXTENSION, f"{{{source}}}")
|
||||
|
||||
|
||||
def parse_sdl(expr: str):
|
||||
return parse(qlgrammar.sdldocument, expr)
|
||||
return parse(tokens.T_STARTSDLDOCUMENT, expr)
|
||||
|
||||
|
||||
def parse(
|
||||
grammar: types.ModuleType,
|
||||
start_token: Type[tokens.Token],
|
||||
source: Union[str, qltokenizer.Source],
|
||||
filename: Optional[str] = None,
|
||||
):
|
||||
if isinstance(source, str):
|
||||
source = qltokenizer.Source.from_string(source)
|
||||
|
||||
result, productions = rust_parser.parse(grammar.__name__, source.tokens())
|
||||
start_token_name = start_token.__name__[2:]
|
||||
result, productions = rust_parser.parse(start_token_name, source.tokens())
|
||||
|
||||
if len(result.errors()) > 0:
|
||||
# TODO: emit multiple errors
|
||||
|
@ -242,56 +239,14 @@ def _cst_to_ast(
|
|||
return result.pop()
|
||||
|
||||
|
||||
def _load_parser(grammar: str) -> None:
|
||||
specmod = importlib.import_module(grammar)
|
||||
parsing.load_parser_spec(specmod, allow_rebuild=True)
|
||||
|
||||
|
||||
def preload(
|
||||
allow_rebuild: bool = True,
|
||||
paralellize: bool = False,
|
||||
grammars: Optional[list[types.ModuleType]] = None,
|
||||
) -> None:
|
||||
if grammars is None:
|
||||
grammars = [
|
||||
qlgrammar.block,
|
||||
qlgrammar.fragment,
|
||||
qlgrammar.sdldocument,
|
||||
qlgrammar.extension_package_body,
|
||||
qlgrammar.migration_body,
|
||||
]
|
||||
|
||||
if not paralellize:
|
||||
try:
|
||||
for grammar in grammars:
|
||||
spec = parsing.load_parser_spec(
|
||||
grammar, allow_rebuild=allow_rebuild)
|
||||
rust_parser.cache_spec(grammar.__name__, spec)
|
||||
except parsing.ParserSpecIncompatibleError as e:
|
||||
raise errors.InternalServerError(e.args[0]) from None
|
||||
else:
|
||||
parsers_to_rebuild = []
|
||||
|
||||
for grammar in grammars:
|
||||
try:
|
||||
spec = parsing.load_parser_spec(grammar, allow_rebuild=False)
|
||||
rust_parser.cache_spec(grammar.__name__, spec)
|
||||
except parsing.ParserSpecIncompatibleError:
|
||||
parsers_to_rebuild.append(grammar)
|
||||
|
||||
if len(parsers_to_rebuild) == 0:
|
||||
pass
|
||||
elif len(parsers_to_rebuild) == 1:
|
||||
spec = parsing.load_parser_spec(
|
||||
parsers_to_rebuild[0], allow_rebuild=True)
|
||||
rust_parser.cache_spec(parsers_to_rebuild[0].__name__, spec)
|
||||
def preload(allow_rebuild: bool = False) -> None:
|
||||
grammar = qlgrammar.start
|
||||
try:
|
||||
spec = parsing.load_parser_spec(grammar, allow_rebuild=False)
|
||||
except parsing.ParserSpecIncompatibleError as e:
|
||||
if allow_rebuild:
|
||||
spec = parsing.load_parser_spec(grammar, allow_rebuild=True)
|
||||
else:
|
||||
with multiprocessing.Pool(len(parsers_to_rebuild)) as pool:
|
||||
pool.map(
|
||||
_load_parser,
|
||||
[mod.__name__ for mod in parsers_to_rebuild],
|
||||
)
|
||||
raise errors.InternalServerError(e.args[0]) from None
|
||||
|
||||
for grammar in parsers_to_rebuild:
|
||||
spec = parsing.load_parser_spec(grammar, allow_rebuild=False)
|
||||
rust_parser.cache_spec(grammar.__name__, spec)
|
||||
rust_parser.cache_spec(spec)
|
||||
|
|
|
@ -8,8 +8,4 @@
|
|||
from __future__ import annotations
|
||||
|
||||
|
||||
from . import block as block # noqa
|
||||
from . import extension_package_body as extension_package_body # noqa
|
||||
from . import fragment as fragment # noqa
|
||||
from . import migration_body as migration_body # noqa
|
||||
from . import sdldocument as sdldocument # noqa
|
||||
from . import start as start # noqa
|
||||
|
|
|
@ -1,68 +0,0 @@
|
|||
#
|
||||
# This source file is part of the EdgeDB open source project.
|
||||
#
|
||||
# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from edb.common import parsing
|
||||
|
||||
from .expressions import Nonterm
|
||||
from .precedence import * # NOQA
|
||||
from .tokens import * # NOQA
|
||||
from .statements import * # NOQA
|
||||
from .ddl import * # NOQA
|
||||
from .session import * # NOQA
|
||||
from .config import * # NOQA
|
||||
|
||||
|
||||
class SingleStatement(Nonterm):
|
||||
@parsing.inline(0)
|
||||
def reduce_Stmt(self, _):
|
||||
# Expressions
|
||||
pass
|
||||
|
||||
@parsing.inline(0)
|
||||
def reduce_DDLStmt(self, _):
|
||||
# Data definition commands
|
||||
pass
|
||||
|
||||
@parsing.inline(0)
|
||||
def reduce_SessionStmt(self, _):
|
||||
# Session-local utility commands
|
||||
pass
|
||||
|
||||
@parsing.inline(0)
|
||||
def reduce_ConfigStmt(self, _):
|
||||
# Configuration commands
|
||||
pass
|
||||
|
||||
|
||||
class StatementBlock(parsing.ListNonterm, element=SingleStatement,
|
||||
separator=Semicolons): # NOQA, Semicolons are from .ddl
|
||||
pass
|
||||
|
||||
|
||||
class EdgeQLBlock(Nonterm):
|
||||
"%start"
|
||||
|
||||
@parsing.inline(0)
|
||||
def reduce_StatementBlock_OptSemicolons_EOF(self, _, _semicolon, _eof):
|
||||
pass
|
||||
|
||||
def reduce_OptSemicolons_EOF(self, _semicolon, _eof):
|
||||
self.val = []
|
|
@ -1,36 +0,0 @@
|
|||
#
|
||||
# This source file is part of the EdgeDB open source project.
|
||||
#
|
||||
# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from edb.common import parsing
|
||||
|
||||
from .expressions import Nonterm
|
||||
from .precedence import * # NOQA
|
||||
from .tokens import * # NOQA
|
||||
from .statements import * # NOQA
|
||||
from .ddl import * # NOQA
|
||||
|
||||
|
||||
class CreateExtensionPackageBody(Nonterm):
|
||||
"%start"
|
||||
|
||||
@parsing.inline(0)
|
||||
def reduce_CreateExtensionPackageCommandsBlock_EOF(self, *kids):
|
||||
pass
|
|
@ -1,39 +0,0 @@
|
|||
#
|
||||
# This source file is part of the EdgeDB open source project.
|
||||
#
|
||||
# Copyright 2023-present MagicStack Inc. and the EdgeDB authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from edb.common import parsing
|
||||
|
||||
from .expressions import Nonterm
|
||||
from .expressions import * # NOQA
|
||||
from .precedence import * # NOQA
|
||||
from .tokens import * # NOQA
|
||||
|
||||
|
||||
class ExpressionFragment(Nonterm):
|
||||
"%start"
|
||||
|
||||
@parsing.inline(0)
|
||||
def reduce_ExprStmt_EOF(self, *kids):
|
||||
pass
|
||||
|
||||
@parsing.inline(0)
|
||||
def reduce_Expr_EOF(self, *kids):
|
||||
pass
|
|
@ -1,36 +0,0 @@
|
|||
#
|
||||
# This source file is part of the EdgeDB open source project.
|
||||
#
|
||||
# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from edb.common import parsing
|
||||
|
||||
from .expressions import Nonterm
|
||||
from .precedence import * # NOQA
|
||||
from .tokens import * # NOQA
|
||||
from .statements import * # NOQA
|
||||
from .ddl import * # NOQA
|
||||
|
||||
|
||||
class CreateMigrationBody(Nonterm):
|
||||
"%start"
|
||||
|
||||
@parsing.inline(0)
|
||||
def reduce_CreateMigrationCommandsBlock_EOF(self, *kids):
|
||||
pass
|
|
@ -1,61 +0,0 @@
|
|||
#
|
||||
# This source file is part of the EdgeDB open source project.
|
||||
#
|
||||
# Copyright 2019-present MagicStack Inc. and the EdgeDB authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from edb.edgeql import ast as qlast
|
||||
|
||||
from .expressions import Nonterm
|
||||
from .sdl import * # NOQA
|
||||
|
||||
from . import commondl
|
||||
|
||||
|
||||
class SDLDocument(Nonterm):
|
||||
"%start"
|
||||
|
||||
def reduce_OptSemicolons_EOF(self, *kids):
|
||||
self.val = qlast.Schema(declarations=[])
|
||||
|
||||
def reduce_statement_without_semicolons(self, *kids):
|
||||
r"""%reduce \
|
||||
OptSemicolons SDLShortStatement EOF
|
||||
"""
|
||||
declarations = [kids[1].val]
|
||||
commondl._validate_declarations(declarations)
|
||||
self.val = qlast.Schema(declarations=declarations)
|
||||
|
||||
def reduce_statements_without_optional_trailing_semicolons(self, *kids):
|
||||
r"""%reduce \
|
||||
OptSemicolons SDLStatements \
|
||||
OptSemicolons SDLShortStatement EOF
|
||||
"""
|
||||
declarations = kids[1].val + [kids[3].val]
|
||||
commondl._validate_declarations(declarations)
|
||||
self.val = qlast.Schema(declarations=declarations)
|
||||
|
||||
def reduce_OptSemicolons_SDLStatements_EOF(self, *kids):
|
||||
declarations = kids[1].val
|
||||
commondl._validate_declarations(declarations)
|
||||
self.val = qlast.Schema(declarations=declarations)
|
||||
|
||||
def reduce_OptSemicolons_SDLStatements_Semicolons_EOF(self, *kids):
|
||||
declarations = kids[1].val
|
||||
commondl._validate_declarations(declarations)
|
||||
self.val = qlast.Schema(declarations=declarations)
|
138
edb/edgeql/parser/grammar/start.py
Normal file
138
edb/edgeql/parser/grammar/start.py
Normal file
|
@ -0,0 +1,138 @@
|
|||
#
|
||||
# This source file is part of the EdgeDB open source project.
|
||||
#
|
||||
# Copyright 2008-present MagicStack Inc. and the EdgeDB authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from edb.common import parsing
|
||||
from edb.edgeql import ast as qlast
|
||||
|
||||
from . import commondl
|
||||
from .expressions import Nonterm
|
||||
from .precedence import * # NOQA
|
||||
from .tokens import * # NOQA
|
||||
from .statements import * # NOQA
|
||||
from .ddl import * # NOQA
|
||||
from .session import * # NOQA
|
||||
from .config import * # NOQA
|
||||
|
||||
|
||||
# The main EdgeQL grammar, all of whose productions should start with a
|
||||
# GrammarToken, that determines the "subgrammar" to use.
|
||||
#
|
||||
# To add a new "subgrammar":
|
||||
# - add a new GrammarToken in tokens.py,
|
||||
# - add a new production here,
|
||||
# - add a new token kind in tokenizer.rs,
|
||||
# - add a mapping from the Python token name into the Rust token kind
|
||||
# in parser.rs `fn get_token_kind`
|
||||
class EdgeQLGrammar(Nonterm):
|
||||
"%start"
|
||||
|
||||
@parsing.inline(1)
|
||||
def reduce_STARTBLOCK_EdgeQLBlock_EOF(self, *kids):
|
||||
pass
|
||||
|
||||
@parsing.inline(1)
|
||||
def reduce_STARTEXTENSION_CreateExtensionPackageCommandsBlock_EOF(self, *k):
|
||||
pass
|
||||
|
||||
@parsing.inline(1)
|
||||
def reduce_STARTMIGRATION_CreateMigrationCommandsBlock_EOF(self, *kids):
|
||||
pass
|
||||
|
||||
@parsing.inline(1)
|
||||
def reduce_STARTFRAGMENT_ExprStmt_EOF(self, *kids):
|
||||
pass
|
||||
|
||||
@parsing.inline(1)
|
||||
def reduce_STARTFRAGMENT_Expr_EOF(self, *kids):
|
||||
pass
|
||||
|
||||
@parsing.inline(1)
|
||||
def reduce_STARTSDLDOCUMENT_SDLDocument(self, *kids):
|
||||
pass
|
||||
|
||||
|
||||
class EdgeQLBlock(Nonterm):
|
||||
@parsing.inline(0)
|
||||
def reduce_StatementBlock_OptSemicolons(self, _, _semicolon):
|
||||
pass
|
||||
|
||||
def reduce_OptSemicolons(self, _semicolon):
|
||||
self.val = []
|
||||
|
||||
|
||||
class SingleStatement(Nonterm):
|
||||
@parsing.inline(0)
|
||||
def reduce_Stmt(self, _):
|
||||
# Expressions
|
||||
pass
|
||||
|
||||
@parsing.inline(0)
|
||||
def reduce_DDLStmt(self, _):
|
||||
# Data definition commands
|
||||
pass
|
||||
|
||||
@parsing.inline(0)
|
||||
def reduce_SessionStmt(self, _):
|
||||
# Session-local utility commands
|
||||
pass
|
||||
|
||||
@parsing.inline(0)
|
||||
def reduce_ConfigStmt(self, _):
|
||||
# Configuration commands
|
||||
pass
|
||||
|
||||
|
||||
class StatementBlock(
|
||||
parsing.ListNonterm, element=SingleStatement, separator=commondl.Semicolons
|
||||
): # NOQA, Semicolons are from .ddl
|
||||
pass
|
||||
|
||||
|
||||
class SDLDocument(Nonterm):
|
||||
def reduce_OptSemicolons_EOF(self, *kids):
|
||||
self.val = qlast.Schema(declarations=[])
|
||||
|
||||
def reduce_statement_without_semicolons(self, *kids):
|
||||
r"""%reduce \
|
||||
OptSemicolons SDLShortStatement EOF
|
||||
"""
|
||||
declarations = [kids[1].val]
|
||||
commondl._validate_declarations(declarations)
|
||||
self.val = qlast.Schema(declarations=declarations)
|
||||
|
||||
def reduce_statements_without_optional_trailing_semicolons(self, *kids):
|
||||
r"""%reduce \
|
||||
OptSemicolons SDLStatements \
|
||||
OptSemicolons SDLShortStatement EOF
|
||||
"""
|
||||
declarations = kids[1].val + [kids[3].val]
|
||||
commondl._validate_declarations(declarations)
|
||||
self.val = qlast.Schema(declarations=declarations)
|
||||
|
||||
def reduce_OptSemicolons_SDLStatements_EOF(self, *kids):
|
||||
declarations = kids[1].val
|
||||
commondl._validate_declarations(declarations)
|
||||
self.val = qlast.Schema(declarations=declarations)
|
||||
|
||||
def reduce_OptSemicolons_SDLStatements_Semicolons_EOF(self, *kids):
|
||||
declarations = kids[1].val
|
||||
commondl._validate_declarations(declarations)
|
||||
self.val = qlast.Schema(declarations=declarations)
|
|
@ -42,6 +42,36 @@ class Token(parsing.Token, metaclass=TokenMeta,
|
|||
pass
|
||||
|
||||
|
||||
class GrammarToken(Token):
|
||||
"""
|
||||
Instead of having different grammars, we prefix each query with a special
|
||||
grammar token which directs the parser to appropriate grammar.
|
||||
|
||||
This greatly reduces the combined size of grammar specifications, since the
|
||||
overlap between grammars is substantial.
|
||||
"""
|
||||
|
||||
|
||||
class T_STARTBLOCK(GrammarToken):
|
||||
pass
|
||||
|
||||
|
||||
class T_STARTEXTENSION(GrammarToken):
|
||||
pass
|
||||
|
||||
|
||||
class T_STARTFRAGMENT(GrammarToken):
|
||||
pass
|
||||
|
||||
|
||||
class T_STARTMIGRATION(GrammarToken):
|
||||
pass
|
||||
|
||||
|
||||
class T_STARTSDLDOCUMENT(GrammarToken):
|
||||
pass
|
||||
|
||||
|
||||
class T_DOT(Token, lextoken='.'):
|
||||
pass
|
||||
|
||||
|
|
|
@ -115,7 +115,7 @@ def main(get_handler):
|
|||
parser.add_argument("--version-serial", type=int)
|
||||
args = parser.parse_args()
|
||||
|
||||
ql_parser.preload(allow_rebuild=devmode.is_in_dev_mode(), paralellize=True)
|
||||
ql_parser.preload(allow_rebuild=devmode.is_in_dev_mode())
|
||||
gc.freeze()
|
||||
|
||||
listen_for_debugger()
|
||||
|
|
|
@ -180,16 +180,8 @@ async def _init_cluster(
|
|||
def _init_parsers():
|
||||
# Initialize parsers that are used in the server process.
|
||||
from edb.edgeql import parser as ql_parser
|
||||
from edb.edgeql.parser import grammar as ql_grammar
|
||||
|
||||
ql_parser.preload(
|
||||
allow_rebuild=devmode.is_in_dev_mode(),
|
||||
paralellize=True,
|
||||
grammars=[
|
||||
ql_grammar.block,
|
||||
ql_grammar.fragment,
|
||||
]
|
||||
)
|
||||
ql_parser.preload(allow_rebuild=devmode.is_in_dev_mode())
|
||||
|
||||
|
||||
async def _run_server(
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
from __future__ import annotations
|
||||
from typing import *
|
||||
|
||||
import types
|
||||
import typing
|
||||
import functools
|
||||
import os
|
||||
|
@ -176,12 +175,29 @@ class BaseDocTest(unittest.TestCase, metaclass=DocTestMeta):
|
|||
)
|
||||
|
||||
|
||||
class BaseSyntaxTest(BaseDocTest):
|
||||
class PreloadParserGrammarMixin:
|
||||
pass
|
||||
|
||||
|
||||
def should_preload_parser(
|
||||
cases: Iterable[unittest.TestCase],
|
||||
) -> bool:
|
||||
for cas in cases:
|
||||
if isinstance(cas, PreloadParserGrammarMixin):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def preload_parser() -> None:
|
||||
qlparser.preload(allow_rebuild=True)
|
||||
|
||||
|
||||
class BaseSyntaxTest(BaseDocTest, PreloadParserGrammarMixin):
|
||||
ast_to_source: Optional[Any] = None
|
||||
markup_dump_lexer: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def get_grammar(cls):
|
||||
def get_grammar_token(cls) -> Type[qlgrammar.tokens.GrammarToken]:
|
||||
raise NotImplementedError
|
||||
|
||||
def run_test(self, *, source, spec, expected=None):
|
||||
|
@ -189,7 +205,7 @@ class BaseSyntaxTest(BaseDocTest):
|
|||
if debug:
|
||||
markup.dump_code(source, lexer=self.markup_dump_lexer)
|
||||
|
||||
inast = qlparser.parse(self.get_grammar(), source)
|
||||
inast = qlparser.parse(self.get_grammar_token(), source)
|
||||
|
||||
if debug:
|
||||
markup.dump(inast)
|
||||
|
@ -207,59 +223,6 @@ class BaseSyntaxTest(BaseDocTest):
|
|||
self.assert_equal(expected_src, processed_src)
|
||||
|
||||
|
||||
class TestCasesSetup:
|
||||
def __init__(self, grammars: list[types.ModuleType]) -> None:
|
||||
self.grammars = grammars
|
||||
|
||||
|
||||
def get_test_cases_setup(
|
||||
cases: Iterable[unittest.TestCase],
|
||||
) -> Optional[TestCasesSetup]:
|
||||
grammars: List[types.ModuleType] = []
|
||||
|
||||
for case in cases:
|
||||
if not hasattr(case, 'get_grammar'):
|
||||
continue
|
||||
|
||||
grammar = case.get_grammar()
|
||||
if not grammar:
|
||||
continue
|
||||
elif isinstance(grammar, list):
|
||||
grammars.extend(grammar)
|
||||
else:
|
||||
grammars.append(grammar)
|
||||
|
||||
if not grammars:
|
||||
return None
|
||||
else:
|
||||
return TestCasesSetup(set(grammars))
|
||||
|
||||
|
||||
def run_test_cases_setup(setup: TestCasesSetup, jobs: int) -> None:
|
||||
qlparser.preload(
|
||||
grammars=setup.grammars,
|
||||
allow_rebuild=True,
|
||||
paralellize=jobs > 1,
|
||||
)
|
||||
|
||||
|
||||
class AstValueTest(BaseDocTest):
|
||||
def run_test(self, *, source, spec=None, expected=None):
|
||||
debug = bool(os.environ.get(self.parser_debug_flag))
|
||||
if debug:
|
||||
markup.dump_code(source, lexer=self.markup_dump_lexer)
|
||||
|
||||
inast = qlparser.parse(self.get_grammar(), source)
|
||||
|
||||
if debug:
|
||||
markup.dump(inast)
|
||||
|
||||
for var in inast.definitions[0].variables:
|
||||
asttype, val = expected[var.name]
|
||||
self.assertIsInstance(var.value, asttype)
|
||||
self.assertEqual(var.value.value, val)
|
||||
|
||||
|
||||
_std_schema = None
|
||||
_refl_schema = None
|
||||
_schema_class_layout = None
|
||||
|
@ -338,7 +301,7 @@ def new_compiler():
|
|||
)
|
||||
|
||||
|
||||
class BaseSchemaTest(BaseDocTest):
|
||||
class BaseSchemaTest(BaseDocTest, PreloadParserGrammarMixin):
|
||||
DEFAULT_MODULE = 'default'
|
||||
SCHEMA: Optional[str] = None
|
||||
|
||||
|
@ -352,15 +315,6 @@ class BaseSchemaTest(BaseDocTest):
|
|||
else:
|
||||
cls.schema = _load_std_schema()
|
||||
|
||||
@classmethod
|
||||
def get_grammar(cls):
|
||||
return [
|
||||
qlgrammar.block,
|
||||
qlgrammar.fragment,
|
||||
qlgrammar.sdldocument,
|
||||
qlgrammar.extension_package_body,
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def run_ddl(cls, schema, ddl, default_module=defines.DEFAULT_MODULE_ALIAS):
|
||||
statements = edgeql.parse_block(ddl)
|
||||
|
|
|
@ -726,7 +726,7 @@ class EQLFunctionDirective(BaseEQLDirective):
|
|||
|
||||
try:
|
||||
astnode = edgeql_parser.parse(
|
||||
edgeql_grammar.block,
|
||||
edgeql_grammar.tokens.T_STARTBLOCK,
|
||||
f'create function {sig} using SQL function "xxx";')[0]
|
||||
except Exception as ex:
|
||||
raise self.error(
|
||||
|
@ -800,8 +800,9 @@ class EQLConstraintDirective(BaseEQLDirective):
|
|||
|
||||
try:
|
||||
astnode = edgeql_parser.parse(
|
||||
edgeql_grammar.block,
|
||||
f'create abstract constraint {sig};')[0]
|
||||
edgeql_grammar.tokens.T_STARTBLOCK,
|
||||
f'create abstract constraint {sig};'
|
||||
)[0]
|
||||
except Exception as ex:
|
||||
raise self.error(
|
||||
f'could not parse constraint signature {sig!r}') from ex
|
||||
|
|
|
@ -21,7 +21,7 @@ from typing import *
|
|||
from edb.edgeql import ast as qlast
|
||||
from edb.edgeql import tokenizer
|
||||
from edb.edgeql import parser as qlparser
|
||||
from edb.edgeql.parser import grammar as qlgrammar
|
||||
from edb.edgeql.parser.grammar import tokens as qltokens
|
||||
|
||||
import edb._edgeql_parser as rust_parser
|
||||
|
||||
|
@ -30,7 +30,7 @@ from edb.tools.edb import edbcommands
|
|||
|
||||
@edbcommands.command("parser-demo")
|
||||
def main():
|
||||
for q in QUERIES:
|
||||
for q in QUERIES[-10:]:
|
||||
sdl = q.startswith('sdl')
|
||||
if sdl:
|
||||
q = q[3:]
|
||||
|
@ -43,9 +43,10 @@ def main():
|
|||
print(e)
|
||||
continue
|
||||
|
||||
grammar = qlgrammar.sdldocument if sdl else qlgrammar.block
|
||||
start_t = qltokens.T_STARTSDLDOCUMENT if sdl else qltokens.T_STARTBLOCK
|
||||
start_t_name = start_t.__name__[2:]
|
||||
tokens = source.tokens()
|
||||
result, productions = rust_parser.parse(grammar.__name__, tokens)
|
||||
result, productions = rust_parser.parse(start_t_name, tokens)
|
||||
|
||||
print('-' * 30)
|
||||
print()
|
||||
|
@ -313,4 +314,7 @@ QUERIES = [
|
|||
'''
|
||||
SELECT INTROSPECT tuple<int64>;
|
||||
''',
|
||||
'''
|
||||
(SELECT User.name) OFFSET 2;
|
||||
''',
|
||||
]
|
||||
|
|
|
@ -824,7 +824,7 @@ class ParallelTextTestRunner:
|
|||
)
|
||||
setup = tb.get_test_cases_setup(cases)
|
||||
server_used = tb.test_cases_use_server(cases)
|
||||
lang_setup = tb_lang.get_test_cases_setup(cases)
|
||||
preload_parser = tb_lang.should_preload_parser(cases)
|
||||
bootstrap_time_taken = 0
|
||||
tests_time_taken = 0
|
||||
result = None
|
||||
|
@ -863,8 +863,8 @@ class ParallelTextTestRunner:
|
|||
|
||||
os.environ["EDGEDB_SERVER_JWS_KEY_FILE"] = str(jwk_file)
|
||||
|
||||
if lang_setup:
|
||||
tb_lang.run_test_cases_setup(lang_setup, jobs=self.num_workers)
|
||||
if preload_parser:
|
||||
tb_lang.preload_parser()
|
||||
|
||||
try:
|
||||
if setup:
|
||||
|
|
6
setup.py
6
setup.py
|
@ -841,11 +841,7 @@ class build_parsers(setuptools.Command):
|
|||
'alongside your pure Python modules')]
|
||||
|
||||
sources = [
|
||||
"edb.edgeql.parser.grammar.block",
|
||||
"edb.edgeql.parser.grammar.fragment",
|
||||
"edb.edgeql.parser.grammar.sdldocument",
|
||||
"edb.edgeql.parser.grammar.migration_body",
|
||||
"edb.edgeql.parser.grammar.extension_package_body",
|
||||
"edb.edgeql.parser.grammar.start",
|
||||
]
|
||||
|
||||
def initialize_options(self):
|
||||
|
|
|
@ -24,7 +24,7 @@ from edb import errors
|
|||
|
||||
from edb.testbase import lang as tb
|
||||
from edb.edgeql import generate_source as edgeql_to_source
|
||||
from edb.edgeql.parser import grammar as edgeql_grammar
|
||||
from edb.edgeql.parser import grammar as qlgrammar
|
||||
from edb.tools import test
|
||||
|
||||
|
||||
|
@ -35,8 +35,8 @@ class EdgeQLSyntaxTest(tb.BaseSyntaxTest):
|
|||
ast_to_source = edgeql_to_source
|
||||
|
||||
@classmethod
|
||||
def get_grammar(cls):
|
||||
return edgeql_grammar.block
|
||||
def get_grammar_token(cls):
|
||||
return qlgrammar.tokens.T_STARTBLOCK
|
||||
|
||||
|
||||
class TestEdgeQLParser(EdgeQLSyntaxTest):
|
||||
|
|
|
@ -24,7 +24,7 @@ from edb import errors
|
|||
|
||||
from edb.testbase import lang as tb
|
||||
from edb.edgeql import generate_source
|
||||
from edb.edgeql.parser import grammar as ql_grammar
|
||||
from edb.edgeql.parser import grammar as qlgrammar
|
||||
from edb.tools import test
|
||||
|
||||
|
||||
|
@ -35,8 +35,8 @@ class SchemaSyntaxTest(tb.BaseSyntaxTest):
|
|||
ast_to_source = functools.partial(generate_source, unsorted=True)
|
||||
|
||||
@classmethod
|
||||
def get_grammar(cls):
|
||||
return ql_grammar.sdldocument
|
||||
def get_grammar_token(cls):
|
||||
return qlgrammar.tokens.T_STARTSDLDOCUMENT
|
||||
|
||||
|
||||
class TestEdgeSchemaParser(SchemaSyntaxTest):
|
||||
|
|
Loading…
Reference in a new issue