2020-07-27 18:57:36 +00:00
|
|
|
"""
|
|
|
|
This module allows building text with transformations. It is used specifically for transforming
|
|
|
|
code, such as replacing "$foo" with "rec.foo" in formulas, and composing formulas into a full
|
|
|
|
usercode module.
|
|
|
|
|
|
|
|
The importance of this module is in allowing to map back replacements (or patches) to output code,
|
|
|
|
such as those generated to rename column references, into patches to the original inputs. It
|
|
|
|
allows us to deal with the complete valid usercode module text when searching for renames.
|
|
|
|
"""
|
|
|
|
import bisect
|
|
|
|
import re
|
|
|
|
from collections import namedtuple
|
|
|
|
|
2021-06-22 15:12:25 +00:00
|
|
|
import six
|
|
|
|
|
2020-07-27 18:57:36 +00:00
|
|
|
Patch = namedtuple('Patch', ('start', 'end', 'old_text', 'new_text'))
|
|
|
|
|
|
|
|
line_start_re = re.compile(r'^', re.M)
|
|
|
|
|
|
|
|
|
|
|
|
def make_patch(full_text, start, end, new_text):
|
|
|
|
"""
|
|
|
|
Returns a patch to `full_text` to replace `full_text[start:end]` with `new_text`.
|
|
|
|
"""
|
|
|
|
return Patch(start, end, full_text[start:end], new_text)
|
|
|
|
|
|
|
|
|
|
|
|
def make_regexp_patches(full_text, regexp, repl):
|
|
|
|
"""
|
|
|
|
Returns a list of patches to `full_text` to replace each occurrence of `regexp` with `repl`. If
|
|
|
|
repl is a function, will replace with `repl(match_object)`. If repl is a string, it is used
|
|
|
|
verbatim, without interpreting any special characters.
|
|
|
|
"""
|
|
|
|
repl_func = repl if callable(repl) else (lambda m: repl)
|
|
|
|
return [make_patch(full_text, m.start(0), m.end(0), repl_func(m))
|
|
|
|
for m in regexp.finditer(full_text)]
|
|
|
|
|
|
|
|
|
|
|
|
def validate_patch(text, patch):
|
|
|
|
"""
|
|
|
|
Ensures that the given patch fits the given text, raising ValueError if not.
|
|
|
|
"""
|
|
|
|
found = text[patch.start : patch.end]
|
|
|
|
if found != patch.old_text:
|
|
|
|
before = text[patch.start - 10 : patch.start]
|
|
|
|
after = text[patch.end : patch.end + 10]
|
|
|
|
raise ValueError("Invalid patch to '%s[%s]%s' at %s; expected '%s'" % (
|
|
|
|
before, found, after, patch.start, patch.old_text))
|
|
|
|
|
|
|
|
|
|
|
|
class Builder(object):
|
|
|
|
"""
|
|
|
|
The base for classes that produce text and can map back a text patch to some useful value. A
|
|
|
|
series of Builders transforms text, and when we know what to change in the result, we use
|
|
|
|
map_back_patch() to get the source of the original `Text` object.
|
|
|
|
"""
|
|
|
|
def map_back_patch(self, patch):
|
|
|
|
"""
|
|
|
|
See Text.map_back_patch.
|
|
|
|
"""
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
def get_text(self):
|
|
|
|
"""
|
|
|
|
Returns the output text of this Builder.
|
|
|
|
"""
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
|
|
|
|
class Text(Builder):
|
|
|
|
"""
|
|
|
|
The lowest Builder that holds a simple string with an optional associated arbitrary value (e.g.
|
|
|
|
which column a formula came from). When we map back a patch of transformed text, we get a tuple
|
|
|
|
(text, value, patch) with text and value from the constructor, and patch that applies to text.
|
|
|
|
"""
|
|
|
|
def __init__(self, text, value=None):
|
|
|
|
self._text = text
|
|
|
|
self._value = value
|
|
|
|
|
|
|
|
def map_back_patch(self, patch):
|
|
|
|
"""
|
|
|
|
Returns the tuple (text, value, patch) with text and value from the constructor, and patch
|
|
|
|
that applies to text.
|
|
|
|
"""
|
|
|
|
assert self._text[patch.start:patch.end] == patch.old_text
|
|
|
|
return (self._text, self._value, patch)
|
|
|
|
|
|
|
|
def get_text(self):
|
|
|
|
return self._text
|
|
|
|
|
|
|
|
|
|
|
|
class Replacer(Builder):
|
|
|
|
"""
|
|
|
|
Builder that transforms an input Builder with some patches to produce output. It remembers
|
|
|
|
positions of replacements, so it can map patches of its output back to its input.
|
|
|
|
"""
|
|
|
|
def __init__(self, in_builder, patches):
|
|
|
|
self._in_builder = in_builder
|
|
|
|
|
|
|
|
# Two parallel lists of input and output offsets, with corresponding offsets at the same index
|
|
|
|
# in the two lists. Each list is ordered by offset.
|
|
|
|
self._input_offsets = [0]
|
|
|
|
self._output_offsets = [0]
|
|
|
|
|
|
|
|
out_parts = []
|
|
|
|
in_pos = 0
|
|
|
|
out_pos = 0
|
|
|
|
text = self._in_builder.get_text()
|
|
|
|
# Note that we have to go through patches in sorted order.
|
|
|
|
for in_patch in sorted(patches):
|
|
|
|
validate_patch(text, in_patch)
|
|
|
|
out_parts.append(text[in_pos:in_patch.start])
|
|
|
|
out_parts.append(in_patch.new_text)
|
|
|
|
out_pos += (in_patch.start - in_pos) + len(in_patch.new_text)
|
|
|
|
in_pos = in_patch.end
|
|
|
|
# If the replacement text is shorter or longer than the original, insert a new pair of
|
|
|
|
# offsets corresponding to the patch's end position in the input and output text.
|
|
|
|
if len(in_patch.new_text) != in_patch.end - in_patch.start:
|
|
|
|
self._input_offsets.append(in_pos)
|
|
|
|
self._output_offsets.append(out_pos)
|
|
|
|
|
|
|
|
out_parts.append(text[in_pos:])
|
|
|
|
self._output_text = ''.join(out_parts)
|
|
|
|
|
|
|
|
def get_text(self):
|
|
|
|
return self._output_text
|
|
|
|
|
|
|
|
def map_back_patch(self, patch):
|
|
|
|
validate_patch(self._output_text, patch)
|
|
|
|
in_start = self.get_input_pos(patch.start)
|
|
|
|
in_end = self.get_input_pos(patch.end)
|
|
|
|
in_patch = make_patch(self._in_builder.get_text(), in_start, in_end, patch.new_text)
|
|
|
|
return self._in_builder.map_back_patch(in_patch)
|
|
|
|
|
|
|
|
def get_input_pos(self, out_pos):
|
|
|
|
"""Returns the position in the input text corresponding to the given position in output."""
|
|
|
|
index = bisect.bisect_right(self._output_offsets, out_pos) - 1
|
|
|
|
offset = out_pos - self._output_offsets[index]
|
|
|
|
return self._input_offsets[index] + offset
|
|
|
|
|
|
|
|
def map_back_offset(self, out_pos):
|
|
|
|
"""
|
|
|
|
Returns the position corresponding to out_pos in the original input, in case it was
|
|
|
|
processed by a series of Replacers.
|
|
|
|
"""
|
|
|
|
input_pos = self.get_input_pos(out_pos)
|
|
|
|
if isinstance(self._in_builder, Replacer):
|
|
|
|
return self._in_builder.map_back_offset(input_pos)
|
|
|
|
return input_pos
|
|
|
|
|
|
|
|
|
|
|
|
class Combiner(Builder):
|
|
|
|
"""
|
|
|
|
Combiner allows building output text from a sequence of other Builders. When a patch is mapped
|
|
|
|
back, it gets passed to the Builder it came from, and must not span more than one input Builder.
|
|
|
|
"""
|
|
|
|
def __init__(self, parts):
|
|
|
|
self._parts = parts
|
|
|
|
self._offsets = []
|
2021-06-24 12:23:33 +00:00
|
|
|
text_parts = [
|
|
|
|
(p if isinstance(p, six.text_type) else
|
|
|
|
p.decode('utf8') if isinstance(p, six.binary_type) else
|
|
|
|
p.get_text())
|
|
|
|
for p in self._parts]
|
2020-07-27 18:57:36 +00:00
|
|
|
self._text = ''.join(text_parts)
|
|
|
|
|
|
|
|
offset = 0
|
|
|
|
self._offsets = []
|
|
|
|
for t in text_parts:
|
|
|
|
self._offsets.append(offset)
|
|
|
|
offset += len(t)
|
|
|
|
|
|
|
|
def get_text(self):
|
|
|
|
return self._text
|
|
|
|
|
|
|
|
def map_back_patch(self, patch):
|
|
|
|
validate_patch(self._text, patch)
|
|
|
|
start_index = bisect.bisect_right(self._offsets, patch.start)
|
|
|
|
end_index = bisect.bisect_right(self._offsets, patch.end - 1)
|
|
|
|
if start_index <= 0 or end_index <= 0 or start_index != end_index:
|
|
|
|
raise ValueError("Invalid patch to Combiner: %s" % (patch,))
|
|
|
|
offset = self._offsets[start_index - 1]
|
|
|
|
part = self._parts[start_index - 1]
|
|
|
|
in_patch = Patch(patch.start - offset, patch.end - offset, patch.old_text, patch.new_text)
|
2021-06-22 15:12:25 +00:00
|
|
|
return None if isinstance(part, six.string_types) else part.map_back_patch(in_patch)
|