(core) Use new asttokens.ASTText to support dollar signs inside f-strings

Summary: Replaced uses of asttokens.ASTTokens with asttokens.ASTText when working with plain `ast` trees, and use `atok.get_text_range` instead of `node.first_token`. Upgraded asttokens in Python 2 (it was already upgraded in Python 3). Test Plan: Added a test with f-strings. Reviewers: dsagal Reviewed By: dsagal Differential Revision: https://phab.getgrist.com/D4001
2024-10-27 20:44:07 +00:00 · 2023-08-23 12:45:14 +02:00 · 2023-08-23 12:45:14 +02:00 · b9adcefcce
commit b9adcefcce
parent fe12562ad7
5 changed files with 29 additions and 16 deletions
--- a/sandbox/grist/codebuilder.py
+++ b/sandbox/grist/codebuilder.py
@ -60,18 +60,21 @@ def make_formula_body(formula, default_value, assoc_value=None):
  tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR')
  tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches)

+  atok = asttokens.ASTText(tmp_formula.get_text(), filename=code_filename)
  # Parse the formula into an abstract syntax tree (AST), catching syntax errors.
+  # Constructing ASTText doesn't parse the code, but the .tree property does.
  try:
-    atok = asttokens.ASTTokens(tmp_formula.get_text(), parse=True, filename=code_filename)
+    tree = atok.tree
  except SyntaxError as e:
    return textbuilder.Text(_create_syntax_error_code(tmp_formula, formula, e))

  # Once we have a tree, go through it and create a subset of the dollar patches that are actually
  # relevant. E.g. this is where we'll skip the "$foo" patches that appear in strings or comments.
  patches = []
-  for node in ast.walk(atok.tree):
+  for node in ast.walk(tree):
    if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'):
-      input_pos = tmp_formula.map_back_offset(node.first_token.startpos)
+      startpos = atok.get_text_range(node)[0]
+      input_pos = tmp_formula.map_back_offset(startpos)
      m = DOLLAR_REGEX.match(formula, input_pos)
      # If there is no match, then we must have had a "DOLLARblah" identifier that didn't come
      # from translating a "$" prefix.
@ -90,9 +93,10 @@ def make_formula_body(formula, default_value, assoc_value=None):

  # If the last statement is an expression that has its result unused (an ast.Expr node),
  # then insert a "return" keyword.
-  last_statement = atok.tree.body[-1] if atok.tree.body else None
+  last_statement = tree.body[-1] if tree.body else None
  if isinstance(last_statement, ast.Expr):
-    input_pos = tmp_formula.map_back_offset(last_statement.first_token.startpos)
+    startpos = atok.get_text_range(last_statement)[0]
+    input_pos = tmp_formula.map_back_offset(startpos)
    patches.append(textbuilder.make_patch(formula, input_pos, input_pos, "return "))
  elif last_statement is None:
    # If we have an empty body (e.g. just a comment), add a 'pass' at the end.
@ -102,7 +106,7 @@ def make_formula_body(formula, default_value, assoc_value=None):
      # - Use type() instead of isinstance()
      # - Check last_statement first to try avoiding walking the tree
      type(node) == ast.Return  # pylint: disable=unidiomatic-typecheck
-      for node in itertools.chain([last_statement], ast.walk(atok.tree))
+      for node in itertools.chain([last_statement], ast.walk(tree))
  ):
    message = "No `return` statement, and the last line isn't an expression."
    if isinstance(last_statement, ast.Assign):
@ -136,11 +140,12 @@ def replace_dollar_attrs(formula):
  formula_builder_text = textbuilder.Text(formula)
  tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR')
  tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches)
-  atok = asttokens.ASTTokens(tmp_formula.get_text(), parse=True)
+  atok = asttokens.ASTText(tmp_formula.get_text())
  patches = []
  for node in ast.walk(atok.tree):
    if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'):
-      input_pos = tmp_formula.map_back_offset(node.first_token.startpos)
+      startpos = atok.get_text_range(node)[0]
+      input_pos = tmp_formula.map_back_offset(startpos)
      m = DOLLAR_REGEX.match(formula, input_pos)
      if m:
        patches.append(textbuilder.make_patch(formula, m.start(0), m.end(0), 'rec.'))
--- a/sandbox/grist/formula_prompt.py
+++ b/sandbox/grist/formula_prompt.py
@ -190,15 +190,15 @@ def convert_completion(completion):
    completion = match.group(1)

  result = textwrap.dedent(completion)
+  atok = asttokens.ASTText(result)

  try:
-    atok = asttokens.ASTTokens(result, parse=True)
+    # Constructing ASTText doesn't parse the code, but the .tree property does.
+    stmts = atok.tree.body
  except SyntaxError:
    # If we don't have valid Python code, don't suggest a formula at all
    return ""

-  stmts = atok.tree.body
-
  # If the code starts with imports, save them for later.
  # In particular, the model may return something like:
  #  from datetime import date
@ -233,21 +233,23 @@ def convert_completion(completion):
      result = imports + "\n" + result

  # Now convert `rec.` to `$` and remove redundant `return ` at the end.
+  atok = asttokens.ASTText(result)
  try:
-    atok = asttokens.ASTTokens(result, parse=True)
+    # Constructing ASTText doesn't parse the code, but the .tree property does.
+    tree = atok.tree
  except SyntaxError:
    # In case the above extraction somehow messed things up
    return ""

  replacements = []
-  for node in ast.walk(atok.tree):
+  for node in ast.walk(tree):
    if isinstance(node, ast.Attribute):
      start, end = atok.get_text_range(node.value)
      end += 1
      if result[start:end] == "rec.":
        replacements.append((start, end, "$"))

-  last_stmt = atok.tree.body[-1]
+  last_stmt = tree.body[-1]
  if isinstance(last_stmt, ast.Return):
    start, _ = atok.get_text_range(last_stmt)
    expected = "return "
--- a/sandbox/grist/test_codebuilder.py
+++ b/sandbox/grist/test_codebuilder.py
@ -72,6 +72,12 @@ class TestCodeBuilder(test_engine.EngineTestCase):
    self.assertEqual(make_body("'''test1'''\n\"\"\"test2\"\"\""),
                     "'''test1'''\nreturn \"\"\"test2\"\"\"")

+    if six.PY3:
+      self.assertEqual(
+        make_body("f'{$foo + 1 + $bar} 2 {3 + $baz}' + $foo2 + f'{4 + $bar2}!'"),
+        "return f'{rec.foo + 1 + rec.bar} 2 {3 + rec.baz}' + rec.foo2 + f'{4 + rec.bar2}!'"
+      )
+
    # Test that we produce valid code when "$foo" occurs in invalid places.
    if six.PY2:
      raise_code = "raise SyntaxError('invalid syntax', ('usercode', 1, 5, u'foo($bar=1)'))"
--- a/sandbox/grist/test_formula_prompt.py
+++ b/sandbox/grist/test_formula_prompt.py
@ -251,7 +251,7 @@ from x import (
  z,
 )

-x = f"hello {rec.name} " + $name + "!"
+x = f"hello {$name} " + $name + "!"
 if $bar.spam:
  return 0
 $a * $b""")
--- a/sandbox/requirements.txt
+++ b/sandbox/requirements.txt
@ -1,7 +1,7 @@
 ### python 2 requirements, see requirements3.txt for python 3

 astroid==1.6.6
-asttokens==2.0.5
+asttokens==2.2.1
 backports.functools-lru-cache==1.6.4
 chardet==4.0.0
 enum34==1.1.10