edgedb/tests/test_docs.py

506 lines
17 KiB
Python

##
# Copyright (c) 2017-present MagicStack Inc.
# All rights reserved.
#
# See LICENSE for details.
##
from typing import *
import collections
import json
import os
import re
import subprocess
import sys
import tempfile
import textwrap
import unittest
try:
import docutils.nodes
import docutils.parsers
import docutils.utils
import docutils.frontend
import docutils.parsers.rst.directives.body # type: ignore
from edb.tools.docs.shared import make_CodeBlock
docutils.parsers.rst.directives.register_directive(
'code-block',
make_CodeBlock(docutils.parsers.rst.directives.body.CodeBlock)
)
except ImportError:
docutils = None # type: ignore
try:
import sphinx
except ImportError:
sphinx = None # type: ignore
from graphql.language import parser as graphql_parser
from edb.edgeql import parser as ql_parser
def find_edgedb_root():
return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
class TestDocSnippets(unittest.TestCase):
"""Lint and validate EdgeDB documentation files.
Checks:
* all source code in "code-block" directives is parsed to
check that the syntax is valid;
* lines must be shorter than 79 characters;
* any ReST warnings (like improper headers or broken indentation)
are reported as errors.
"""
MAX_LINE_LEN = 79
CodeSnippet = collections.namedtuple(
'CodeSnippet',
['filename', 'lineno', 'lang', 'code'])
class RestructuredTextStyleError(Exception):
pass
if docutils is not None:
class CustomDocutilsReporter(docutils.utils.Reporter):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.lint_errors = set()
def system_message(self, level, message, *children, **kwargs):
skip = (
message.startswith('Unknown interpreted text role') or
message.startswith('No role entry for') or
message.startswith('Unknown directive type') or
message.startswith('No directive entry for') or
level < 2 # Ignore DEBUG and INFO messages.
)
msg = super().system_message(
level, message, *children, **kwargs)
if not skip:
self.lint_errors.add(
f"{message} at {msg['source']} on line "
f"{msg.get('line', '?')}")
return msg
def find_rest_files(self, path: str) -> List[str]:
def scan(path):
with os.scandir(path) as it:
for entry in it:
if entry.is_file() and entry.name.endswith('.rst'):
files.append(entry.path)
if entry.is_dir():
scan(entry.path)
files: List[str] = []
scan(path)
return files
def extract_code_blocks(self, source: str, filename: str):
blocks = []
parser_class = docutils.parsers.get_parser_class('rst')
parser = parser_class()
settings = docutils.frontend.OptionParser(
components=(parser_class, )).get_default_values()
settings.syntax_highlight = 'none'
min_error_code = 100 # Ignore all errors, we process them manually.
reporter = self.CustomDocutilsReporter(
filename, min_error_code, min_error_code)
document = docutils.nodes.document(settings, reporter, source=filename)
document.note_source(filename, -1)
parser.parse(source, document)
lines = source.split('\n')
lint_on = True
for lineno, line in enumerate(lines, 1):
if line.startswith('.. lint-off'):
if lint_on:
lint_on = False
else:
reporter.lint_errors.add(
f'Mismatched lint-on/lint-off in '
f'{filename}, line {lineno}')
elif line.startswith('.. lint-on'):
if not lint_on:
lint_on = True
else:
reporter.lint_errors.add(
f'Mismatched lint-on/lint-off in '
f'{filename}, line {lineno}')
if len(line) > self.MAX_LINE_LEN and lint_on:
reporter.lint_errors.add(
f'Line longer than {self.MAX_LINE_LEN} characters in '
f'{filename}, line {lineno}')
if not lint_on:
reporter.lint_errors.add(
f'Unexpected EOF. No closing \'.. lint-on\' found in '
f'{filename}')
if reporter.lint_errors:
raise self.RestructuredTextStyleError(
'\n\nRestructuredText lint errors:\n' +
'\n'.join(reporter.lint_errors))
directives = []
for node in document.traverse():
if node.tagname == 'literal_block':
if 'code' in node.attributes['classes']:
directives.append(node)
else:
block = node.astext()
# certain literal blocks also contain code-blocks
if re.match(r'^\.\. eql:(operator|function|constraint)::',
block):
# figure out the line offset of the start of the block
node_parent = node
while node_parent and node_parent.line is None:
node_parent = node_parent.parent
if node_parent:
node_parent_line = \
node_parent.line - block.count('\n')
else:
node_parent_line = 0
subdoc = docutils.nodes.document(
settings, reporter, source=filename)
subdoc.note_source(filename, node_parent_line)
# cut off the first chunk
block = block.split('\n\n', maxsplit=1)[1]
# dedent the rest
block = textwrap.dedent(block)
parser.parse(block, subdoc)
subdirs = subdoc.traverse(
condition=lambda node: (
node.tagname == 'literal_block' and
'code' in node.attributes['classes'])
)
for subdir in subdirs:
if subdir.line is not None:
subdir.line += node_parent_line
directives.append(subdir)
for directive in directives:
classes = directive.attributes['classes']
if len(classes) < 2 or classes[0] != 'code':
continue
lang = directive.attributes['classes'][1]
code = directive.astext()
lineno = directive.line
if lineno is None:
# Some docutils blocks (like tables) do not support
# line numbers, so we try to traverse the parent tree
# to find the nearest block with a line number.
parent_directive = directive
while parent_directive and parent_directive.line is None:
parent_directive = parent_directive.parent
if parent_directive and parent_directive.line is not None:
lineno = parent_directive.line
else:
lineno = lineno
blocks.append(self.CodeSnippet(filename, str(lineno), lang, code))
return blocks
def extract_snippets_from_repl(self, replblock):
in_query = False
snips = []
for line in replblock.split('\n'):
if not in_query:
m = re.match(r'(?P<p>[\w\[:\]>]+>\s)(?P<l>.*)', line)
if m:
# >>> prompt
in_query = True
snips.append(
(len(m.group('p')), [])
)
snips[len(snips) - 1][1].append(m.group('l'))
else:
# output
if not snips:
raise AssertionError(
f'invalid REPL block (starts with output); '
f'offending line {line!r}')
else:
# ... prompt?
m = re.match(r'(?P<p>\.+\s)(?P<l>.*)', line)
if m:
# yes, it's "... " line
if not snips:
raise AssertionError(
f'invalid REPL block (... before >>>); '
f'offending line {line!r}')
if len(m.group('p')) != snips[len(snips) - 1][0]:
raise AssertionError(
f'invalid REPL block: number of "." does not '
f'match number of ">"; '
f'offending line {line!r}')
snips[len(snips) - 1][1].append(m.group('l'))
else:
# no, this is output
in_query = False
return ['\n'.join(s[1]) for s in snips
# ignore the "\c" and other REPL commands
if not re.match(r'\\\w+', s[1][0])]
def run_block_test(self, block):
try:
lang = block.lang
if lang.endswith('-repl'):
lang = lang.rpartition('-')[0]
code = self.extract_snippets_from_repl(block.code)
elif lang.endswith('-diff'):
# In the diff block we need to truncate "-"/"+" at the
# beginning of each line. We will make two copies of
# the code as the before and after version. Both will
# be validated.
before = []
after = []
for line in block.code.split('\n'):
if line == "":
continue
first = line.strip()[0]
if first == '-':
before.append(line[1:])
elif first == '+':
after.append(line[1:])
else:
before.append(line[1:])
after.append(line[1:])
code = ['\n'.join(before), '\n'.join(after)]
# truncate the "-diff" from the language
lang = lang[:-5]
else:
code = [block.code]
for snippet in code:
if lang == 'edgeql':
ql_parser.parse_block(snippet)
elif lang == 'sdl':
# Strip all the "using extension ..." and comment
# lines as they interfere with our module
# detection.
sdl = re.sub(
r'(using\s+extension\s+\w+;)|(#.*?\n)',
'',
snippet
).strip()
# the snippet itself may either contain a module
# block or have a fully-qualified top-level name
if not sdl or re.match(
r'''(?xm)
(\bmodule\s+\w+\s*{) |
(^.*
(type|annotation|link|property|constraint)
\s+(\w+::\w+)\s+
({|extending)
)
''',
sdl):
ql_parser.parse_sdl(snippet)
else:
ql_parser.parse_sdl(f'module default {{ {snippet} }}')
elif lang == 'edgeql-result':
# REPL results
pass
elif lang == 'pseudo-eql':
# Skip "pseudo-eql" language as we don't have a
# parser for it.
pass
elif lang == 'graphql':
graphql_parser.parse(snippet)
elif lang == 'graphql-schema':
# The graphql-schema can be highlighted using graphql
# lexer, but it does not have a dedicated parser.
pass
elif lang == 'json':
json.loads(snippet)
elif lang in {
'bash',
'powershell',
'shell',
'c',
'javascript',
'python',
'typescript',
'go',
'yaml',
'jsx',
'rust',
'tsx',
'elixir',
'toml',
'sql'
}:
pass
elif lang[-5:] == '-diff':
pass
else:
raise LookupError(f'unknown code-lang {lang}')
except Exception as ex:
raise AssertionError(
f'unable to parse {block.lang} code block in '
f'{block.filename}, around line {block.lineno}: '
f'{code}') from ex
@unittest.skipIf(docutils is None, 'docutils is missing')
def test_cqa_doc_snippets(self):
edgepath = edgepath = find_edgedb_root()
docspath = os.path.join(edgepath, 'docs')
for filename in self.find_rest_files(docspath):
with open(filename, 'rt') as f:
source = f.read()
blocks = self.extract_code_blocks(source, filename)
for block in blocks:
self.run_block_test(block)
@unittest.skipIf(docutils is None, 'docutils is missing')
def test_doc_test_broken_code_block_01(self):
source = '''
In large applications, the schema will usually be split
into several :ref:`modules<ref_schema_evolution_modules>`.
.. code-block:: edgeql
SELECT 122 + foo();
A *schema module* defines the effective namespace for
elements it defines.
.. code-block:: edgeql
SELECT 42;
# ^ expected to return 42
SELECT foo(
Schema modules can import other modules to use schema
elements they define.
'''
blocks = self.extract_code_blocks(source, '<test>')
self.assertEqual(len(blocks), 2)
self.assertEqual(blocks[0].code, 'SELECT 122 + foo();')
self.run_block_test(blocks[0])
with self.assertRaisesRegex(AssertionError, 'unable to parse edgeql'):
self.run_block_test(blocks[1])
@unittest.skipIf(docutils is None, 'docutils is missing')
def test_doc_test_broken_code_block_02(self):
source = r'''
String operator with a buggy example.
.. eql:operator:: LIKE: str LIKE str -> bool
str NOT LIKE str -> bool
Case-sensitive simple string matching.
Example:
.. code-block:: edgeql-repl
db> SELECT 'a%%c' NOT LIKE 'a\%c';
{true}
'''
blocks = self.extract_code_blocks(source, '<test>')
self.assertEqual(len(blocks), 1)
self.assertEqual(blocks[0].code,
"db> SELECT 'a%%c' NOT LIKE 'a\\%c';\n{true}")
with self.assertRaisesRegex(AssertionError, 'unable to parse edgeql'):
self.run_block_test(blocks[0])
@unittest.skipIf(docutils is None, 'docutils is missing')
def test_doc_test_broken_long_lines(self):
source = f'''
aaaaaa aa aaa:
- aaa
- {'a' * self.MAX_LINE_LEN}
- aaa
'''
with self.assertRaisesRegex(self.RestructuredTextStyleError,
r'lint errors:[.\s]*Line longer'):
self.extract_code_blocks(source, '<test>')
@unittest.skipIf(docutils is None, 'docutils is missing')
def test_doc_test_bad_header(self):
source = textwrap.dedent('''
Section
-----
aaa aaa aaa
''')
with self.assertRaisesRegex(
self.RestructuredTextStyleError,
r'lint errors:[.\s]*Title underline too short'):
self.extract_code_blocks(source, '<test>')
@unittest.skipIf(sphinx is None, 'sphinx is missing')
def test_doc_full_build(self):
docs_root = os.path.join(find_edgedb_root(), 'docs')
with tempfile.TemporaryDirectory() as td:
proc = subprocess.run(
[
sys.executable,
'-m', 'sphinx',
'-n',
'-W', # fail on warnings
'-b', 'xml',
'-q',
'-D', 'master_doc=index',
docs_root,
td,
],
text=True,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
)
if proc.returncode:
raise AssertionError(
f'Unable to build docs with Sphinx.\n\n'
f'STDOUT:\n{proc.stdout}\n\n'
f'STDERR:\n{proc.stderr}\n'
)