From b9adcefcce99d3d4f5ab7f129664165a28a55a33 Mon Sep 17 00:00:00 2001 From: Alex Hall Date: Wed, 23 Aug 2023 12:45:14 +0200 Subject: [PATCH] (core) Use new asttokens.ASTText to support dollar signs inside f-strings Summary: Replaced uses of asttokens.ASTTokens with asttokens.ASTText when working with plain `ast` trees, and use `atok.get_text_range` instead of `node.first_token`. Upgraded asttokens in Python 2 (it was already upgraded in Python 3). Test Plan: Added a test with f-strings. Reviewers: dsagal Reviewed By: dsagal Differential Revision: https://phab.getgrist.com/D4001 --- sandbox/grist/codebuilder.py | 21 +++++++++++++-------- sandbox/grist/formula_prompt.py | 14 ++++++++------ sandbox/grist/test_codebuilder.py | 6 ++++++ sandbox/grist/test_formula_prompt.py | 2 +- sandbox/requirements.txt | 2 +- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/sandbox/grist/codebuilder.py b/sandbox/grist/codebuilder.py index 26212c65..66f7cd3e 100644 --- a/sandbox/grist/codebuilder.py +++ b/sandbox/grist/codebuilder.py @@ -60,18 +60,21 @@ def make_formula_body(formula, default_value, assoc_value=None): tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR') tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches) + atok = asttokens.ASTText(tmp_formula.get_text(), filename=code_filename) # Parse the formula into an abstract syntax tree (AST), catching syntax errors. + # Constructing ASTText doesn't parse the code, but the .tree property does. try: - atok = asttokens.ASTTokens(tmp_formula.get_text(), parse=True, filename=code_filename) + tree = atok.tree except SyntaxError as e: return textbuilder.Text(_create_syntax_error_code(tmp_formula, formula, e)) # Once we have a tree, go through it and create a subset of the dollar patches that are actually # relevant. E.g. this is where we'll skip the "$foo" patches that appear in strings or comments. patches = [] - for node in ast.walk(atok.tree): + for node in ast.walk(tree): if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'): - input_pos = tmp_formula.map_back_offset(node.first_token.startpos) + startpos = atok.get_text_range(node)[0] + input_pos = tmp_formula.map_back_offset(startpos) m = DOLLAR_REGEX.match(formula, input_pos) # If there is no match, then we must have had a "DOLLARblah" identifier that didn't come # from translating a "$" prefix. @@ -90,9 +93,10 @@ def make_formula_body(formula, default_value, assoc_value=None): # If the last statement is an expression that has its result unused (an ast.Expr node), # then insert a "return" keyword. - last_statement = atok.tree.body[-1] if atok.tree.body else None + last_statement = tree.body[-1] if tree.body else None if isinstance(last_statement, ast.Expr): - input_pos = tmp_formula.map_back_offset(last_statement.first_token.startpos) + startpos = atok.get_text_range(last_statement)[0] + input_pos = tmp_formula.map_back_offset(startpos) patches.append(textbuilder.make_patch(formula, input_pos, input_pos, "return ")) elif last_statement is None: # If we have an empty body (e.g. just a comment), add a 'pass' at the end. @@ -102,7 +106,7 @@ def make_formula_body(formula, default_value, assoc_value=None): # - Use type() instead of isinstance() # - Check last_statement first to try avoiding walking the tree type(node) == ast.Return # pylint: disable=unidiomatic-typecheck - for node in itertools.chain([last_statement], ast.walk(atok.tree)) + for node in itertools.chain([last_statement], ast.walk(tree)) ): message = "No `return` statement, and the last line isn't an expression." if isinstance(last_statement, ast.Assign): @@ -136,11 +140,12 @@ def replace_dollar_attrs(formula): formula_builder_text = textbuilder.Text(formula) tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR') tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches) - atok = asttokens.ASTTokens(tmp_formula.get_text(), parse=True) + atok = asttokens.ASTText(tmp_formula.get_text()) patches = [] for node in ast.walk(atok.tree): if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'): - input_pos = tmp_formula.map_back_offset(node.first_token.startpos) + startpos = atok.get_text_range(node)[0] + input_pos = tmp_formula.map_back_offset(startpos) m = DOLLAR_REGEX.match(formula, input_pos) if m: patches.append(textbuilder.make_patch(formula, m.start(0), m.end(0), 'rec.')) diff --git a/sandbox/grist/formula_prompt.py b/sandbox/grist/formula_prompt.py index 5cb9a8f6..6a72d8b5 100644 --- a/sandbox/grist/formula_prompt.py +++ b/sandbox/grist/formula_prompt.py @@ -190,15 +190,15 @@ def convert_completion(completion): completion = match.group(1) result = textwrap.dedent(completion) + atok = asttokens.ASTText(result) try: - atok = asttokens.ASTTokens(result, parse=True) + # Constructing ASTText doesn't parse the code, but the .tree property does. + stmts = atok.tree.body except SyntaxError: # If we don't have valid Python code, don't suggest a formula at all return "" - stmts = atok.tree.body - # If the code starts with imports, save them for later. # In particular, the model may return something like: # from datetime import date @@ -233,21 +233,23 @@ def convert_completion(completion): result = imports + "\n" + result # Now convert `rec.` to `$` and remove redundant `return ` at the end. + atok = asttokens.ASTText(result) try: - atok = asttokens.ASTTokens(result, parse=True) + # Constructing ASTText doesn't parse the code, but the .tree property does. + tree = atok.tree except SyntaxError: # In case the above extraction somehow messed things up return "" replacements = [] - for node in ast.walk(atok.tree): + for node in ast.walk(tree): if isinstance(node, ast.Attribute): start, end = atok.get_text_range(node.value) end += 1 if result[start:end] == "rec.": replacements.append((start, end, "$")) - last_stmt = atok.tree.body[-1] + last_stmt = tree.body[-1] if isinstance(last_stmt, ast.Return): start, _ = atok.get_text_range(last_stmt) expected = "return " diff --git a/sandbox/grist/test_codebuilder.py b/sandbox/grist/test_codebuilder.py index 534095f6..df15d5ee 100644 --- a/sandbox/grist/test_codebuilder.py +++ b/sandbox/grist/test_codebuilder.py @@ -72,6 +72,12 @@ class TestCodeBuilder(test_engine.EngineTestCase): self.assertEqual(make_body("'''test1'''\n\"\"\"test2\"\"\""), "'''test1'''\nreturn \"\"\"test2\"\"\"") + if six.PY3: + self.assertEqual( + make_body("f'{$foo + 1 + $bar} 2 {3 + $baz}' + $foo2 + f'{4 + $bar2}!'"), + "return f'{rec.foo + 1 + rec.bar} 2 {3 + rec.baz}' + rec.foo2 + f'{4 + rec.bar2}!'" + ) + # Test that we produce valid code when "$foo" occurs in invalid places. if six.PY2: raise_code = "raise SyntaxError('invalid syntax', ('usercode', 1, 5, u'foo($bar=1)'))" diff --git a/sandbox/grist/test_formula_prompt.py b/sandbox/grist/test_formula_prompt.py index 3fa31117..d7e2890a 100644 --- a/sandbox/grist/test_formula_prompt.py +++ b/sandbox/grist/test_formula_prompt.py @@ -251,7 +251,7 @@ from x import ( z, ) -x = f"hello {rec.name} " + $name + "!" +x = f"hello {$name} " + $name + "!" if $bar.spam: return 0 $a * $b""") diff --git a/sandbox/requirements.txt b/sandbox/requirements.txt index 11a10d8c..118531ac 100644 --- a/sandbox/requirements.txt +++ b/sandbox/requirements.txt @@ -1,7 +1,7 @@ ### python 2 requirements, see requirements3.txt for python 3 astroid==1.6.6 -asttokens==2.0.5 +asttokens==2.2.1 backports.functools-lru-cache==1.6.4 chardet==4.0.0 enum34==1.1.10