(core) Use new asttokens.ASTText to support dollar signs inside f-strings

Summary:
Replaced uses of asttokens.ASTTokens with asttokens.ASTText when working with plain `ast` trees, and use `atok.get_text_range` instead of `node.first_token`.

Upgraded asttokens in Python 2 (it was already upgraded in Python 3).

Test Plan: Added a test with f-strings.

Reviewers: dsagal

Reviewed By: dsagal

Differential Revision: https://phab.getgrist.com/D4001
This commit is contained in:
Alex Hall 2023-08-23 12:45:14 +02:00
parent fe12562ad7
commit b9adcefcce
5 changed files with 29 additions and 16 deletions

View File

@ -60,18 +60,21 @@ def make_formula_body(formula, default_value, assoc_value=None):
tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR') tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR')
tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches) tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches)
atok = asttokens.ASTText(tmp_formula.get_text(), filename=code_filename)
# Parse the formula into an abstract syntax tree (AST), catching syntax errors. # Parse the formula into an abstract syntax tree (AST), catching syntax errors.
# Constructing ASTText doesn't parse the code, but the .tree property does.
try: try:
atok = asttokens.ASTTokens(tmp_formula.get_text(), parse=True, filename=code_filename) tree = atok.tree
except SyntaxError as e: except SyntaxError as e:
return textbuilder.Text(_create_syntax_error_code(tmp_formula, formula, e)) return textbuilder.Text(_create_syntax_error_code(tmp_formula, formula, e))
# Once we have a tree, go through it and create a subset of the dollar patches that are actually # Once we have a tree, go through it and create a subset of the dollar patches that are actually
# relevant. E.g. this is where we'll skip the "$foo" patches that appear in strings or comments. # relevant. E.g. this is where we'll skip the "$foo" patches that appear in strings or comments.
patches = [] patches = []
for node in ast.walk(atok.tree): for node in ast.walk(tree):
if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'): if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'):
input_pos = tmp_formula.map_back_offset(node.first_token.startpos) startpos = atok.get_text_range(node)[0]
input_pos = tmp_formula.map_back_offset(startpos)
m = DOLLAR_REGEX.match(formula, input_pos) m = DOLLAR_REGEX.match(formula, input_pos)
# If there is no match, then we must have had a "DOLLARblah" identifier that didn't come # If there is no match, then we must have had a "DOLLARblah" identifier that didn't come
# from translating a "$" prefix. # from translating a "$" prefix.
@ -90,9 +93,10 @@ def make_formula_body(formula, default_value, assoc_value=None):
# If the last statement is an expression that has its result unused (an ast.Expr node), # If the last statement is an expression that has its result unused (an ast.Expr node),
# then insert a "return" keyword. # then insert a "return" keyword.
last_statement = atok.tree.body[-1] if atok.tree.body else None last_statement = tree.body[-1] if tree.body else None
if isinstance(last_statement, ast.Expr): if isinstance(last_statement, ast.Expr):
input_pos = tmp_formula.map_back_offset(last_statement.first_token.startpos) startpos = atok.get_text_range(last_statement)[0]
input_pos = tmp_formula.map_back_offset(startpos)
patches.append(textbuilder.make_patch(formula, input_pos, input_pos, "return ")) patches.append(textbuilder.make_patch(formula, input_pos, input_pos, "return "))
elif last_statement is None: elif last_statement is None:
# If we have an empty body (e.g. just a comment), add a 'pass' at the end. # If we have an empty body (e.g. just a comment), add a 'pass' at the end.
@ -102,7 +106,7 @@ def make_formula_body(formula, default_value, assoc_value=None):
# - Use type() instead of isinstance() # - Use type() instead of isinstance()
# - Check last_statement first to try avoiding walking the tree # - Check last_statement first to try avoiding walking the tree
type(node) == ast.Return # pylint: disable=unidiomatic-typecheck type(node) == ast.Return # pylint: disable=unidiomatic-typecheck
for node in itertools.chain([last_statement], ast.walk(atok.tree)) for node in itertools.chain([last_statement], ast.walk(tree))
): ):
message = "No `return` statement, and the last line isn't an expression." message = "No `return` statement, and the last line isn't an expression."
if isinstance(last_statement, ast.Assign): if isinstance(last_statement, ast.Assign):
@ -136,11 +140,12 @@ def replace_dollar_attrs(formula):
formula_builder_text = textbuilder.Text(formula) formula_builder_text = textbuilder.Text(formula)
tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR') tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR')
tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches) tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches)
atok = asttokens.ASTTokens(tmp_formula.get_text(), parse=True) atok = asttokens.ASTText(tmp_formula.get_text())
patches = [] patches = []
for node in ast.walk(atok.tree): for node in ast.walk(atok.tree):
if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'): if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'):
input_pos = tmp_formula.map_back_offset(node.first_token.startpos) startpos = atok.get_text_range(node)[0]
input_pos = tmp_formula.map_back_offset(startpos)
m = DOLLAR_REGEX.match(formula, input_pos) m = DOLLAR_REGEX.match(formula, input_pos)
if m: if m:
patches.append(textbuilder.make_patch(formula, m.start(0), m.end(0), 'rec.')) patches.append(textbuilder.make_patch(formula, m.start(0), m.end(0), 'rec.'))

View File

@ -190,15 +190,15 @@ def convert_completion(completion):
completion = match.group(1) completion = match.group(1)
result = textwrap.dedent(completion) result = textwrap.dedent(completion)
atok = asttokens.ASTText(result)
try: try:
atok = asttokens.ASTTokens(result, parse=True) # Constructing ASTText doesn't parse the code, but the .tree property does.
stmts = atok.tree.body
except SyntaxError: except SyntaxError:
# If we don't have valid Python code, don't suggest a formula at all # If we don't have valid Python code, don't suggest a formula at all
return "" return ""
stmts = atok.tree.body
# If the code starts with imports, save them for later. # If the code starts with imports, save them for later.
# In particular, the model may return something like: # In particular, the model may return something like:
# from datetime import date # from datetime import date
@ -233,21 +233,23 @@ def convert_completion(completion):
result = imports + "\n" + result result = imports + "\n" + result
# Now convert `rec.` to `$` and remove redundant `return ` at the end. # Now convert `rec.` to `$` and remove redundant `return ` at the end.
atok = asttokens.ASTText(result)
try: try:
atok = asttokens.ASTTokens(result, parse=True) # Constructing ASTText doesn't parse the code, but the .tree property does.
tree = atok.tree
except SyntaxError: except SyntaxError:
# In case the above extraction somehow messed things up # In case the above extraction somehow messed things up
return "" return ""
replacements = [] replacements = []
for node in ast.walk(atok.tree): for node in ast.walk(tree):
if isinstance(node, ast.Attribute): if isinstance(node, ast.Attribute):
start, end = atok.get_text_range(node.value) start, end = atok.get_text_range(node.value)
end += 1 end += 1
if result[start:end] == "rec.": if result[start:end] == "rec.":
replacements.append((start, end, "$")) replacements.append((start, end, "$"))
last_stmt = atok.tree.body[-1] last_stmt = tree.body[-1]
if isinstance(last_stmt, ast.Return): if isinstance(last_stmt, ast.Return):
start, _ = atok.get_text_range(last_stmt) start, _ = atok.get_text_range(last_stmt)
expected = "return " expected = "return "

View File

@ -72,6 +72,12 @@ class TestCodeBuilder(test_engine.EngineTestCase):
self.assertEqual(make_body("'''test1'''\n\"\"\"test2\"\"\""), self.assertEqual(make_body("'''test1'''\n\"\"\"test2\"\"\""),
"'''test1'''\nreturn \"\"\"test2\"\"\"") "'''test1'''\nreturn \"\"\"test2\"\"\"")
if six.PY3:
self.assertEqual(
make_body("f'{$foo + 1 + $bar} 2 {3 + $baz}' + $foo2 + f'{4 + $bar2}!'"),
"return f'{rec.foo + 1 + rec.bar} 2 {3 + rec.baz}' + rec.foo2 + f'{4 + rec.bar2}!'"
)
# Test that we produce valid code when "$foo" occurs in invalid places. # Test that we produce valid code when "$foo" occurs in invalid places.
if six.PY2: if six.PY2:
raise_code = "raise SyntaxError('invalid syntax', ('usercode', 1, 5, u'foo($bar=1)'))" raise_code = "raise SyntaxError('invalid syntax', ('usercode', 1, 5, u'foo($bar=1)'))"

View File

@ -251,7 +251,7 @@ from x import (
z, z,
) )
x = f"hello {rec.name} " + $name + "!" x = f"hello {$name} " + $name + "!"
if $bar.spam: if $bar.spam:
return 0 return 0
$a * $b""") $a * $b""")

View File

@ -1,7 +1,7 @@
### python 2 requirements, see requirements3.txt for python 3 ### python 2 requirements, see requirements3.txt for python 3
astroid==1.6.6 astroid==1.6.6
asttokens==2.0.5 asttokens==2.2.1
backports.functools-lru-cache==1.6.4 backports.functools-lru-cache==1.6.4
chardet==4.0.0 chardet==4.0.0
enum34==1.1.10 enum34==1.1.10