gristlabs_grist-core/sandbox/grist/codebuilder.py
Dmitry S f0d0a07295 (core) Implement PREVIOUS/NEXT/RANK and lookupRecords().find.* methods.
Summary:
- `lookupRecords()` now allows efficient search in sorted results, with
  the syntax  `lookupRecords(..., order_by="-Date").find.le($Date)`. This will find the record with the nearest date that's <= `$Date`.
- The `find.*` methods are `le`, `lt`, `ge`, `gt`, and `eq`. All have O(log N) performance.
- `PREVIOUS(rec, group_by=..., order_by=...)` finds the previous record to rec, according to `group_by` / `order_by`, in amortized O(log N) time. For example, `PREVIOUS(rec, group_by="Account", order_by="Date")`.
- `PREVIOUS(rec, order_by=None)` finds the previous record in the full table, sorted by the `manualSort` column, to match the order visible in the unsorted table.
- `NEXT(...)` is just like `PREVIOUS(...)` but finds the next record.
- `RANK(rec, group_by=..., order_by=..., order="asc")` returns the rank of the record within the group, starting with 1. Order can be `"asc"` (default) or `"desc"`.
- The `order_by` argument in `lookupRecords`, and the new functions now supports tuples, as well as the "-" prefix to reverse order, e.g. `("Category", "-Date")`.
- New functions are only available in Python3, for a minor reason (to support keyword-only arguments for `group_by` and `order_by`) and also as a nudge to Python2 users to update.

- Includes fixes for several situations related to lookups that used to cause quadratic complexity.

Test Plan:
- New performance check that sorted lookups don't add quadratic complexity.
- Tests added for lookup find.* methods, and for PREVIOUS/NEXT/RANK.
- Tests added that renaming columns updates `order_by` and `group_by` arguments, and attributes on results (e.g. `PREVIOUS(...).ColId`) appropriately.
- Python3 tests can now produce verbose output when VERBOSE=1 and -v are given.

Reviewers: jarek, georgegevoian

Reviewed By: jarek, georgegevoian

Subscribers: paulfitz, jarek

Differential Revision: https://phab.getgrist.com/D4265
2024-07-17 12:00:55 -04:00

550 lines
21 KiB
Python

import ast
import contextlib
import itertools
import linecache
import logging
import re
import textwrap
import astroid
import asttokens
import six
import friendly_errors
import textbuilder
log = logging.getLogger(__name__)
DOLLAR_REGEX = re.compile(r'\$(?=[a-zA-Z_][a-zA-Z_0-9]*)')
# For functions needing lazy evaluation, the slice for which arguments to wrap in a lambda.
LAZY_ARG_FUNCTIONS = {
'IF': slice(1, 3),
'ISERR': slice(0, 1),
'ISERROR': slice(0, 1),
'IFERROR': slice(0, 1),
'PEEK': slice(0, 1),
}
class GristSyntaxError(SyntaxError):
"""
Indicates a formula is invalid in a Grist-specific way.
"""
def make_formula_body(formula, default_value, assoc_value=None):
"""
Given a formula, returns a textbuilder.Builder object suitable to be the body of a function,
with the formula transformed to replace `$foo` with `rec.foo`, and to insert `return` if
appropriate. Assoc_value is associated with textbuilder.Text() to be returned by map_back_patch.
"""
if isinstance(formula, six.binary_type):
formula = formula.decode('utf8')
# Remove any common leading whitespace. In python, extra indent should not be an error, but
# it is in Grist because we parse the formula body before it gets inserted into a function (i.e.
# as if at module level).
formula = textwrap.dedent(formula)
if not formula.strip():
return textbuilder.Text('return ' + repr(default_value), assoc_value)
formula_builder_text = textbuilder.Text(formula, assoc_value)
# Start with a temporary builder, since we need to translate "$" before we can parse the code at
# all (namely, we turn '$foo' into 'DOLLARfoo' first). Once we can parse the code, we'll create
# a proper set of patches. Note that we initially translate into 'DOLLARfoo' rather than
# 'rec.foo', so that the translated entity is a single token: this makes for more precisely
# reported errors if there are any.
tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR')
tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches)
atok = asttokens.ASTText(tmp_formula.get_text(), filename=code_filename)
# Parse the formula into an abstract syntax tree (AST), catching syntax errors.
# Constructing ASTText doesn't parse the code, but the .tree property does.
try:
tree = atok.tree
except SyntaxError as e:
return textbuilder.Text(_create_syntax_error_code(tmp_formula, formula, e))
# Once we have a tree, go through it and create a subset of the dollar patches that are actually
# relevant. E.g. this is where we'll skip the "$foo" patches that appear in strings or comments.
patches = []
for node in ast.walk(tree):
if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'):
startpos = atok.get_text_range(node)[0]
input_pos = tmp_formula.map_back_offset(startpos)
m = DOLLAR_REGEX.match(formula, input_pos)
# If there is no match, then we must have had a "DOLLARblah" identifier that didn't come
# from translating a "$" prefix.
if m:
patches.append(textbuilder.make_patch(formula, m.start(0), m.end(0), 'rec.'))
# Wrap arguments to the top-level "IF()" function into lambdas, for lazy evaluation. This is
# to ensure it's not affected by an exception in the unused value, to match Excel behavior.
if isinstance(node, ast.Call) and isinstance(node.func, ast.Name):
lazy_args_slice = LAZY_ARG_FUNCTIONS.get(node.func.id)
if lazy_args_slice:
for arg in node.args[lazy_args_slice]:
start, end = map(tmp_formula.map_back_offset, atok.get_text_range(arg))
patches.append(textbuilder.make_patch(formula, start, start, 'lambda: ('))
patches.append(textbuilder.make_patch(formula, end, end, ')'))
# If the last statement is an expression that has its result unused (an ast.Expr node),
# then insert a "return" keyword.
last_statement = tree.body[-1] if tree.body else None
if isinstance(last_statement, ast.Expr):
startpos = atok.get_text_range(last_statement)[0]
input_pos = tmp_formula.map_back_offset(startpos)
patches.append(textbuilder.make_patch(formula, input_pos, input_pos, "return "))
elif last_statement is None:
# If we have an empty body (e.g. just a comment), add a 'pass' at the end.
patches.append(textbuilder.make_patch(formula, len(formula), len(formula), '\npass'))
elif not any(
# Raise an error if the user forgot to return anything. For performance:
# - Use type() instead of isinstance()
# - Check last_statement first to try avoiding walking the tree
type(node) == ast.Return # pylint: disable=unidiomatic-typecheck
for node in itertools.chain([last_statement], ast.walk(tree))
):
message = "No `return` statement, and the last line isn't an expression."
if isinstance(last_statement, ast.Assign):
message += " If you want to check for equality, use `==` instead of `=`."
error = GristSyntaxError(message, ('<string>', 1, 1, ""))
return textbuilder.Text(_create_syntax_error_code(tmp_formula, formula, error))
# Apply the new set of patches to the original formula to get the real output.
final_formula = textbuilder.Replacer(formula_builder_text, patches)
# Try parsing again before returning it just in case we have new syntax errors. These are
# possible in cases when a single token ('DOLLARfoo') is valid but an expression ('rec.foo') is
# not, e.g. `foo($bar=1)` or `def $foo()`.
# Also check for common mistakes: assigning to `rec` or its attributes (e.g. `$foo = 1`).
with use_inferences(InferRecAssignment, InferRecAttrAssignment):
try:
astroid.parse(final_formula.get_text())
except (astroid.AstroidSyntaxError, SyntaxError) as e:
error = getattr(e, "error", e) # extract SyntaxError from AstroidSyntaxError
return textbuilder.Text(_create_syntax_error_code(final_formula, formula, error))
# We return the text-builder object whose .get_text() is the final formula.
return final_formula
def get_dollar_replacer(formula):
"""
Returns a textbuilder.Replacer that would replace all dollar signs ("$") in the given
formula with "rec.". The Replacer tracks extra info we can later use to restore the
dollar signs back. To get the processed text, call .get_text() on the Replacer.
"""
formula_builder_text = textbuilder.Text(formula)
tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR')
tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches)
atok = asttokens.ASTText(tmp_formula.get_text())
patches = []
for node in ast.walk(atok.tree):
if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'):
startpos = atok.get_text_range(node)[0]
input_pos = tmp_formula.map_back_offset(startpos)
m = DOLLAR_REGEX.match(formula, input_pos)
if m:
patches.append(textbuilder.make_patch(formula, m.start(0), m.end(0), 'rec.'))
final_formula = textbuilder.Replacer(formula_builder_text, patches)
return final_formula
def _create_syntax_error_code(builder, input_text, err):
"""
Returns the text for a function that raises the given SyntaxError and includes the offending
code in a commented-out form. In addition, it translates the error's position from builder's
output to input_text.
"""
output_ln = asttokens.LineNumbers(builder.get_text())
input_ln = asttokens.LineNumbers(input_text)
# A SyntaxError contains .lineno and .offset (1-based), which we need to translate to offset
# within the transformed text, so that it can be mapped back to an offset in the original text,
# and finally translated back into a line number and 1-based position to report to the user. An
# example is that "$x*" is translated to "return x*", and the syntax error in the transformed
# python code (line 2 offset 9) needs to be translated to be in line 2 offset 3.
output_offset = output_ln.line_to_offset(err.lineno, err.offset - 1 if err.offset else 0)
input_offset = builder.map_back_offset(output_offset)
line, col = input_ln.offset_to_line(input_offset)
input_text_line = input_text.splitlines()[line - 1]
message = err.args[0]
err_type = type(err)
if isinstance(err, GristSyntaxError):
# Just use SyntaxError in the final code
err_type = SyntaxError
elif six.PY3:
# Add explanation from friendly-traceback.
# Only supported in Python 3.
# Not helpful for Grist-specific errors.
# Needs to use the source code, so save it to its source cache.
save_to_linecache(builder.get_text())
message += friendly_errors.friendly_message(err)
return "%s\nraise %s(%r, ('usercode', %r, %r, %r))" % (
textbuilder.line_start_re.sub('# ', input_text.rstrip()),
err_type.__name__, message, line, col + 1, input_text_line)
#----------------------------------------------------------------------
def infer(node):
try:
return next(node.infer(), None)
except astroid.exceptions.InferenceError as e:
return "InferenceError on %r: %r" % (node, e)
_lookup_method_names = ('lookupOne', 'lookupRecords')
_prev_next_functions = ('PREVIOUS', 'NEXT', 'RANK')
_lookup_find_methods = ('lt', 'le', 'gt', 'ge', 'eq', 'previous', 'next')
def _is_table(node):
"""
Return true if obj is a class defining a user table.
"""
return (isinstance(node, astroid.nodes.ClassDef) and node.decorators and
node.decorators.nodes[0].as_string() == 'grist.UserTable')
def _is_local(node):
"""
Returns true if node is a Name node for an innermost variable.
"""
return isinstance(node, astroid.nodes.Name) and node.name in node.scope().locals
@contextlib.contextmanager
def use_inferences(*inference_tips):
transform_args = [(cls.node_class, astroid.inference_tip(cls.infer), cls.filter)
for cls in inference_tips]
for args in transform_args:
astroid.MANAGER.register_transform(*args)
yield
for args in transform_args:
astroid.MANAGER.unregister_transform(*args)
class InferenceTip(object):
"""
Base class for inference tips. A derived class can implement the filter() and infer() class
methods, and then register() will put that inference helper into use.
"""
node_class = None
@classmethod
def filter(cls, node):
raise NotImplementedError()
@classmethod
def infer(cls, node, context):
raise NotImplementedError()
class InferReferenceColumn(InferenceTip):
"""
Inference helper to treat the return value of `grist.Reference("Foo")` as an instance of the
table `Foo`.
"""
node_class = astroid.nodes.Call
@classmethod
def filter(cls, node):
return (isinstance(node.func, astroid.nodes.Attribute) and
node.func.as_string() in ('grist.Reference', 'grist.ReferenceList'))
@classmethod
def infer(cls, node, context=None):
table_id = node.args[0].value
table_class = next(node.root().igetattr(table_id))
yield astroid.bases.Instance(table_class)
def _get_formula_type(function_node):
decorators = function_node.decorators.nodes if function_node.decorators else ()
for dec in decorators:
if (isinstance(dec, astroid.nodes.Call) and
dec.func.as_string() == 'grist.formulaType'):
return dec.args[0]
return None
class InferReferenceFormula(InferenceTip):
"""
Inference helper to treat functions decorated with `grist.formulaType(grist.Reference("Foo"))`
as returning instances of table `Foo`.
"""
node_class = astroid.nodes.FunctionDef
@classmethod
def filter(cls, node):
# All methods on tables are really used as properties.
return _is_table(node.parent.frame())
@classmethod
def infer(cls, node, context=None):
ftype = _get_formula_type(node)
if ftype and InferReferenceColumn.filter(ftype):
return InferReferenceColumn.infer(ftype, context)
return node.infer_call_result(node.parent.frame(), context)
class InferLookupReference(InferenceTip):
"""
Inference helper to treat the return value of `Table.lookupRecords(...)` as returning instances
of table `Table`.
"""
node_class = astroid.nodes.Call
@classmethod
def filter(cls, node):
return (isinstance(node.func, astroid.nodes.Attribute) and
node.func.attrname in _lookup_method_names and
_is_table(infer(node.func.expr)))
@classmethod
def infer(cls, node, context=None):
yield astroid.bases.Instance(infer(node.func.expr))
class InferAllReference(InferenceTip):
"""
Inference helper to treat the return value of `Table.all` as returning instances
of table `Table`.
"""
node_class = astroid.nodes.Attribute
@classmethod
def filter(cls, node):
return node.attrname == "all" and _is_table(infer(node.expr))
@classmethod
def infer(cls, node, context=None):
yield astroid.bases.Instance(infer(node.expr))
class InferLookupFindResult(InferenceTip):
"""
Inference helper to treat the return value of `Table.lookupRecords(...).find.lt(...)` as
returning instances of table `Table`.
"""
node_class = astroid.nodes.Call
@classmethod
def filter(cls, node):
func = node.func
if isinstance(func, astroid.nodes.Attribute) and func.attrname in _lookup_find_methods:
p_expr = func.expr
if isinstance(p_expr, astroid.nodes.Attribute) and p_expr.attrname in ('find', '_find'):
obj = infer(p_expr.expr)
if isinstance(obj, astroid.bases.Instance) and _is_table(obj._proxied):
return True
return False
@classmethod
def infer(cls, node, context=None):
# A bit of fuzziness here: node.func.expr.expr is the result of lookupRecords(). It so happens
# that at the moment it is already of type Instance(table), as if a single record rather than
# a list, to support recognizing `.ColId` attributes. So we return the same type.
yield infer(node.func.expr.expr)
class InferPrevNextResult(InferenceTip):
"""
Inference helper to treat the return value of PREVIOUS(...) and NEXT(...) as returning instances
of table `Table`.
"""
node_class = astroid.nodes.Call
@classmethod
def filter(cls, node):
return (isinstance(node.func, astroid.nodes.Name) and
node.func.name in _prev_next_functions and
node.args)
@classmethod
def infer(cls, node, context=None):
yield infer(node.args[0])
class InferComprehensionBase(InferenceTip):
node_class = astroid.nodes.AssignName
reference_inference_class = None
@classmethod
def filter(cls, node):
compr = node.parent
if not isinstance(compr, astroid.nodes.Comprehension):
return False
if isinstance(compr.iter, cls.reference_inference_class.node_class):
return cls.reference_inference_class.filter(compr.iter)
return False
@classmethod
def infer(cls, node, context=None):
return cls.reference_inference_class.infer(node.parent.iter)
class InferLookupComprehension(InferComprehensionBase):
reference_inference_class = InferLookupReference
class InferAllComprehension(InferComprehensionBase):
reference_inference_class = InferAllReference
class InferRecAssignment(InferenceTip):
"""
Inference helper to raise exception on assignment to `rec`.
"""
node_class = astroid.nodes.AssignName
@classmethod
def filter(cls, node):
if node.name == 'rec':
raise GristSyntaxError('Grist disallows assignment to the special variable "rec"',
('<string>', node.lineno, node.col_offset, ""))
@classmethod
def infer(cls, node, context):
raise NotImplementedError()
class InferRecAttrAssignment(InferenceTip):
"""
Inference helper to raise exception on assignment to `rec`.
"""
node_class = astroid.nodes.AssignAttr
@classmethod
def filter(cls, node):
if isinstance(node.expr, astroid.nodes.Name) and node.expr.name == 'rec':
raise GristSyntaxError("You can't assign a value to a column with `=`. "
"If you mean to check for equality, use `==` instead.",
('<string>', node.lineno, node.col_offset, ""))
@classmethod
def infer(cls, node, context):
raise NotImplementedError()
#----------------------------------------------------------------------
def parse_grist_names(builder):
"""
Returns a list of tuples (col_info, start_pos, table_id, col_id):
col_info: (table_id, col_id) for the formula the name is found in. It is the value passed
in by gencode.py to codebuilder.make_formula_body().
start_pos: Index of the start character of the name in col_info.formula
table_id: Parsed name when the tuple is for a table name; the name of the column's table
when the tuple is for a column name.
col_id: None when tuple is for a table name; col_id when the tuple is for a column name.
"""
code_text = builder.get_text()
with use_inferences(InferReferenceColumn, InferReferenceFormula, InferLookupReference,
InferLookupComprehension, InferAllReference, InferAllComprehension,
InferLookupFindResult, InferPrevNextResult):
atok = asttokens.ASTText(code_text, tree=astroid.builder.parse(code_text))
def make_tuple(start, end, table_id, col_id):
name = col_id or table_id
assert end - start == len(name)
patch = textbuilder.Patch(start, end, name, name)
assert code_text[start:end] == name
patch_source = builder.map_back_patch(patch)
if not patch_source:
return None
in_text, in_value, in_patch = patch_source
if in_value:
return (in_value, in_patch.start, table_id, col_id)
return None
# Helper for collecting column IDs mentioned in order_by/group_by parameters, so that
# those can be updated when a column is renamed.
def list_order_group_by_tuples(table_id, node):
for start, end, col_id in parse_order_group_by(atok, node):
if code_text[start:end] == col_id:
yield make_tuple(start, end, table_id, col_id)
parsed_names = []
for node in asttokens.util.walk(atok.tree, include_joined_str=True):
if isinstance(node, astroid.nodes.Name):
obj = infer(node)
if _is_table(obj) and not _is_local(node):
start, end = atok.get_text_range(node)
parsed_names.append(make_tuple(start, end, node.name, None))
elif isinstance(node, astroid.nodes.Attribute):
obj = infer(node.expr)
if isinstance(obj, astroid.bases.Instance):
cls = obj._proxied
if _is_table(cls):
end = atok.get_text_range(node)[1]
start = end - len(node.attrname)
if code_text[start:end] == node.attrname:
parsed_names.append(make_tuple(start, end, cls.name, node.attrname))
elif isinstance(node, astroid.nodes.Keyword):
func = node.parent.func
if isinstance(func, astroid.nodes.Attribute) and func.attrname in _lookup_method_names:
obj = infer(func.expr)
if _is_table(obj) and node.arg is not None: # Skip **kwargs, which have arg value of None
table_id = obj.name
start = atok.get_text_range(node)[0]
end = start + len(node.arg)
if node.arg == 'order_by':
# Rename values in 'order_by' arguments to lookup methods.
parsed_names.extend(list_order_group_by_tuples(table_id, node.value))
elif code_text[start:end] == node.arg:
parsed_names.append(make_tuple(start, end, table_id, node.arg))
elif (isinstance(func, astroid.nodes.Name)
# Rename values in 'order_by' and 'group_by' arguments to PREVIOUS() and NEXT().
and func.name in _prev_next_functions
and node.arg in ('order_by', 'group_by')
and node.parent.args):
obj = infer(node.parent.args[0])
if isinstance(obj, astroid.bases.Instance):
cls = obj._proxied
if _is_table(cls):
table_id = cls.name
parsed_names.extend(list_order_group_by_tuples(table_id, node.value))
return [name for name in parsed_names if name]
code_filename = "usercode"
def parse_order_group_by(atok, node):
"""
order_by and group_by parameters take the form of a column ID string, optionally prefixed by a
"-", or a tuple of them. We parse out the list of (start, end, col_id) tuples for each column ID
mentioned, to support automatic formula updates when a mentioned column is renamed.
"""
if isinstance(node, astroid.nodes.Const):
if isinstance(node.value, six.string_types):
start, end = atok.get_text_range(node)
# Account for opening/closing quote, and optional leading "-".
return [(start + 2, end - 1, node.value[1:]) if node.value.startswith("-") else
(start + 1, end - 1, node.value)]
elif isinstance(node, astroid.nodes.Tuple):
return [t for e in node.elts for t in parse_order_group_by(atok, e)]
return []
def save_to_linecache(source_code):
"""
Makes source code available to friendly-traceback and traceback formatting in general.
"""
if six.PY3:
import friendly_traceback.source_cache # pylint: disable=import-error
friendly_traceback.source_cache.cache.add(code_filename, source_code)
else:
linecache.cache[code_filename] = (
len(source_code),
None,
[line + '\n' for line in source_code.splitlines()],
code_filename,
)