From b9adcefcce99d3d4f5ab7f129664165a28a55a33 Mon Sep 17 00:00:00 2001
From: Alex Hall <alex.mojaki@gmail.com>
Date: Wed, 23 Aug 2023 12:45:14 +0200
Subject: [PATCH] (core) Use new asttokens.ASTText to support dollar signs
 inside f-strings

Summary:
Replaced uses of asttokens.ASTTokens with asttokens.ASTText when working with plain `ast` trees, and use `atok.get_text_range` instead of `node.first_token`.

Upgraded asttokens in Python 2 (it was already upgraded in Python 3).

Test Plan: Added a test with f-strings.

Reviewers: dsagal

Reviewed By: dsagal

Differential Revision: https://phab.getgrist.com/D4001
---
 sandbox/grist/codebuilder.py         | 21 +++++++++++++--------
 sandbox/grist/formula_prompt.py      | 14 ++++++++------
 sandbox/grist/test_codebuilder.py    |  6 ++++++
 sandbox/grist/test_formula_prompt.py |  2 +-
 sandbox/requirements.txt             |  2 +-
 5 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/sandbox/grist/codebuilder.py b/sandbox/grist/codebuilder.py
index 26212c65..66f7cd3e 100644
--- a/sandbox/grist/codebuilder.py
+++ b/sandbox/grist/codebuilder.py
@@ -60,18 +60,21 @@ def make_formula_body(formula, default_value, assoc_value=None):
   tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR')
   tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches)
 
+  atok = asttokens.ASTText(tmp_formula.get_text(), filename=code_filename)
   # Parse the formula into an abstract syntax tree (AST), catching syntax errors.
+  # Constructing ASTText doesn't parse the code, but the .tree property does.
   try:
-    atok = asttokens.ASTTokens(tmp_formula.get_text(), parse=True, filename=code_filename)
+    tree = atok.tree
   except SyntaxError as e:
     return textbuilder.Text(_create_syntax_error_code(tmp_formula, formula, e))
 
   # Once we have a tree, go through it and create a subset of the dollar patches that are actually
   # relevant. E.g. this is where we'll skip the "$foo" patches that appear in strings or comments.
   patches = []
-  for node in ast.walk(atok.tree):
+  for node in ast.walk(tree):
     if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'):
-      input_pos = tmp_formula.map_back_offset(node.first_token.startpos)
+      startpos = atok.get_text_range(node)[0]
+      input_pos = tmp_formula.map_back_offset(startpos)
       m = DOLLAR_REGEX.match(formula, input_pos)
       # If there is no match, then we must have had a "DOLLARblah" identifier that didn't come
       # from translating a "$" prefix.
@@ -90,9 +93,10 @@ def make_formula_body(formula, default_value, assoc_value=None):
 
   # If the last statement is an expression that has its result unused (an ast.Expr node),
   # then insert a "return" keyword.
-  last_statement = atok.tree.body[-1] if atok.tree.body else None
+  last_statement = tree.body[-1] if tree.body else None
   if isinstance(last_statement, ast.Expr):
-    input_pos = tmp_formula.map_back_offset(last_statement.first_token.startpos)
+    startpos = atok.get_text_range(last_statement)[0]
+    input_pos = tmp_formula.map_back_offset(startpos)
     patches.append(textbuilder.make_patch(formula, input_pos, input_pos, "return "))
   elif last_statement is None:
     # If we have an empty body (e.g. just a comment), add a 'pass' at the end.
@@ -102,7 +106,7 @@ def make_formula_body(formula, default_value, assoc_value=None):
       # - Use type() instead of isinstance()
       # - Check last_statement first to try avoiding walking the tree
       type(node) == ast.Return  # pylint: disable=unidiomatic-typecheck
-      for node in itertools.chain([last_statement], ast.walk(atok.tree))
+      for node in itertools.chain([last_statement], ast.walk(tree))
   ):
     message = "No `return` statement, and the last line isn't an expression."
     if isinstance(last_statement, ast.Assign):
@@ -136,11 +140,12 @@ def replace_dollar_attrs(formula):
   formula_builder_text = textbuilder.Text(formula)
   tmp_patches = textbuilder.make_regexp_patches(formula, DOLLAR_REGEX, 'DOLLAR')
   tmp_formula = textbuilder.Replacer(formula_builder_text, tmp_patches)
-  atok = asttokens.ASTTokens(tmp_formula.get_text(), parse=True)
+  atok = asttokens.ASTText(tmp_formula.get_text())
   patches = []
   for node in ast.walk(atok.tree):
     if isinstance(node, ast.Name) and node.id.startswith('DOLLAR'):
-      input_pos = tmp_formula.map_back_offset(node.first_token.startpos)
+      startpos = atok.get_text_range(node)[0]
+      input_pos = tmp_formula.map_back_offset(startpos)
       m = DOLLAR_REGEX.match(formula, input_pos)
       if m:
         patches.append(textbuilder.make_patch(formula, m.start(0), m.end(0), 'rec.'))
diff --git a/sandbox/grist/formula_prompt.py b/sandbox/grist/formula_prompt.py
index 5cb9a8f6..6a72d8b5 100644
--- a/sandbox/grist/formula_prompt.py
+++ b/sandbox/grist/formula_prompt.py
@@ -190,15 +190,15 @@ def convert_completion(completion):
     completion = match.group(1)
 
   result = textwrap.dedent(completion)
+  atok = asttokens.ASTText(result)
 
   try:
-    atok = asttokens.ASTTokens(result, parse=True)
+    # Constructing ASTText doesn't parse the code, but the .tree property does.
+    stmts = atok.tree.body
   except SyntaxError:
     # If we don't have valid Python code, don't suggest a formula at all
     return ""
 
-  stmts = atok.tree.body
-
   # If the code starts with imports, save them for later.
   # In particular, the model may return something like:
   #  from datetime import date
@@ -233,21 +233,23 @@ def convert_completion(completion):
       result = imports + "\n" + result
 
   # Now convert `rec.` to `$` and remove redundant `return ` at the end.
+  atok = asttokens.ASTText(result)
   try:
-    atok = asttokens.ASTTokens(result, parse=True)
+    # Constructing ASTText doesn't parse the code, but the .tree property does.
+    tree = atok.tree
   except SyntaxError:
     # In case the above extraction somehow messed things up
     return ""
 
   replacements = []
-  for node in ast.walk(atok.tree):
+  for node in ast.walk(tree):
     if isinstance(node, ast.Attribute):
       start, end = atok.get_text_range(node.value)
       end += 1
       if result[start:end] == "rec.":
         replacements.append((start, end, "$"))
 
-  last_stmt = atok.tree.body[-1]
+  last_stmt = tree.body[-1]
   if isinstance(last_stmt, ast.Return):
     start, _ = atok.get_text_range(last_stmt)
     expected = "return "
diff --git a/sandbox/grist/test_codebuilder.py b/sandbox/grist/test_codebuilder.py
index 534095f6..df15d5ee 100644
--- a/sandbox/grist/test_codebuilder.py
+++ b/sandbox/grist/test_codebuilder.py
@@ -72,6 +72,12 @@ class TestCodeBuilder(test_engine.EngineTestCase):
     self.assertEqual(make_body("'''test1'''\n\"\"\"test2\"\"\""),
                      "'''test1'''\nreturn \"\"\"test2\"\"\"")
 
+    if six.PY3:
+      self.assertEqual(
+        make_body("f'{$foo + 1 + $bar} 2 {3 + $baz}' + $foo2 + f'{4 + $bar2}!'"),
+        "return f'{rec.foo + 1 + rec.bar} 2 {3 + rec.baz}' + rec.foo2 + f'{4 + rec.bar2}!'"
+      )
+
     # Test that we produce valid code when "$foo" occurs in invalid places.
     if six.PY2:
       raise_code = "raise SyntaxError('invalid syntax', ('usercode', 1, 5, u'foo($bar=1)'))"
diff --git a/sandbox/grist/test_formula_prompt.py b/sandbox/grist/test_formula_prompt.py
index 3fa31117..d7e2890a 100644
--- a/sandbox/grist/test_formula_prompt.py
+++ b/sandbox/grist/test_formula_prompt.py
@@ -251,7 +251,7 @@ from x import (
   z,
 )
 
-x = f"hello {rec.name} " + $name + "!"
+x = f"hello {$name} " + $name + "!"
 if $bar.spam:
   return 0
 $a * $b""")
diff --git a/sandbox/requirements.txt b/sandbox/requirements.txt
index 11a10d8c..118531ac 100644
--- a/sandbox/requirements.txt
+++ b/sandbox/requirements.txt
@@ -1,7 +1,7 @@
 ### python 2 requirements, see requirements3.txt for python 3
 
 astroid==1.6.6
-asttokens==2.0.5
+asttokens==2.2.1
 backports.functools-lru-cache==1.6.4
 chardet==4.0.0
 enum34==1.1.10