gristlabs_grist-core/sandbox/grist/objtypes.py

"""
This module implements handling of non-primitive objects as values in Grist data cells. It is
currently only used to handle errors thrown from formulas.

Non-primitive values are represented in actions as [type_name, args...].
  objtypes.register_converter() - registers a new supported object type.
  objtypes.encode_object(obj)   - returns a marshallable list representation.
  objtypes.decode_object(val)   - returns an object represented by the [name, args...] argument.

If an object cannot be encoded or decoded, an "UnmarshallableValue" is returned instead
of the form ['U', repr(obj)].
"""
# pylint: disable=too-many-return-statements
import traceback
from datetime import date, datetime
from math import isnan

import moment
import records
import six


class UnmarshallableError(ValueError):
  """
  Error raised when an object cannot be represented in an action by Grist. It happens if the
  object is of a type for which there is no registered converter, or if encoding it involves
  values that cannot be marshalled.
  """
  pass


class ConversionError(ValueError):
  """
  Indicates a failure to convert a value between Grist types. We don't usually expose it to the
  user, since such a failure normally results in silent alttext.
  """
  pass


class InvalidTypedValue(ValueError):
  """
  Indicates that AltText was in place of a typed value and produced an error. The value of AltText
  is included into the exception, both to be more informative, and to sort displayCols properly.
  """
  def __init__(self, typename, value):
    super(InvalidTypedValue, self).__init__(typename)
    self.typename = typename
    self.value = value

  def __str__(self):
    return "Invalid %s: %s" % (self.typename, self.value)


class AltText(object):
  """
  Represents a text value in a non-text column. The separate class allows formulas to access
  wrong-type values. We use a wrapper rather than expose text directly to formulas, because with
  text there is a risk that e.g. a formula that's supposed to add numbers would add two strings
  with unexpected result.
  """
  def __init__(self, text, typename=None):
    self._text = text
    self._typename = typename

  def __str__(self):
    return self._text

  def __int__(self):
    # This ensures that AltText values that look like ints may be cast back to int.
    # Convert to float first, since python does not allow casting strings with decimals to int.
    return int(float(self._text))

  def __float__(self):
    # This ensures that AltText values that look like floats may be cast back to float.
    return float(self._text)

  def __repr__(self):
    return '%s(%r)' % (self.__class__.__name__, self._text)

  # Allow comparing to AltText("something")
  def __eq__(self, other):
    return isinstance(other, self.__class__) and self._text == other._text

  def __ne__(self, other):
    return not self.__eq__(other)

  def __hash__(self):
    return hash((self.__class__, self._text))

  def __getattr__(self, name):
    # On attempt to do $foo.Bar on an AltText value such as "hello", raise an exception that will
    # show up as e.g. "Invalid Ref: hello" or "Invalid Date: hello".
    raise InvalidTypedValue(self._typename, self._text)


class UnmarshallableValue(object):
  """
  Represents an UnmarshallableValue. There is nothing we can do with it except encode it back.
  """
  def __init__(self, value_repr):
    self.value_repr = value_repr


# Unique sentinel value representing a pending value. It's encoded as ['P'], and shown to the user
# as "Loading..." text. With the switch to stored formulas, it's currently only used when a
# document was just migrated.
_pending_sentinel = object()

# A placeholder for a value hidden by access control rules.
# Depending on the types of the columns involved, copying
# a censored value and pasting elsewhere will either use
# CensoredValue.__repr__ (python) or CensoredValue.toString (typescript)
# so they should match
class CensoredValue(object):
  def __repr__(self):
    return 'CENSORED'

_censored_sentinel = CensoredValue()


_max_js_int = 1<<31

def is_int_short(value):
  return -_max_js_int <= value < _max_js_int

def safe_repr(obj):
  """
  Like repr(obj) but falls back to a simpler "<type-name>" string when repr() itself fails.
  """
  try:
    return repr(obj)
  except Exception:
    return '<' + type(obj).__name__ + '>'

def strict_equal(a, b):
  """Checks the equality of the types of the values as well as the values, and handle errors."""
  # pylint: disable=unidiomatic-typecheck
  # Try/catch needed because some comparisons may fail (e.g. datetimes with different tzinfo)
  try:
    return type(a) == type(b) and a == b
  except Exception:
    return False

def equal_encoding(a, b):
  # Compare NaNs as equal.
  if isinstance(a, float) and isinstance(b, float):
    return a == b or (isnan(a) and isnan(b))

  # Compare bools as equal only to bools (these are distinguishable from numbers in JSON, and we
  # take care to distinguish them in DB too).
  if isinstance(a, bool) or isinstance(b, bool):
    # pylint: disable=unidiomatic-typecheck
    return type(a) == type(b) and a == b

  # Note for simple types, encode_object is trivial, and will result in a non-type-specific
  # comparison (e.g. 1 and 1.0 will compare equal, as would "a" and u"a"). This is to capture
  # equivalence of values in their JSON representations.
  return encode_object(a) == encode_object(b)

def encode_object(value):
  """
  Produces a Grist-encoded version of the value, e.g. turning a Date into ['d', timestamp].
  Returns ['U', repr(value)] if it fails to encode otherwise.
  """
  try:
    if isinstance(value, (str, unicode, float, bool)) or value is None:
      return value
    elif isinstance(value, (long, int)):
      if not is_int_short(value):
        raise UnmarshallableError("Integer too large")
      return value
    elif isinstance(value, AltText):
      return str(value)
    elif isinstance(value, records.Record):
      return ['R', value._table.table_id, value._row_id]
    elif isinstance(value, RecordStub):
      return ['R', value.table_id, value.row_id]
    elif isinstance(value, datetime):
      return ['D', moment.dt_to_ts(value), value.tzinfo.zone.name if value.tzinfo else 'UTC']
    elif isinstance(value, date):
      return ['d', moment.date_to_ts(value)]
    elif isinstance(value, RaisedException):
      return ['E'] + value.encode_args()
    elif isinstance(value, (list, tuple, RecordList, records.ColumnView)):
      return ['L'] + [encode_object(item) for item in value]
    elif isinstance(value, records.RecordSet):
      # Represent RecordSet (e.g. result of lookupRecords) in the same way as a RecordList.
      return ['L'] + [encode_object(int(item)) for item in value]
    elif isinstance(value, dict):
      if not all(isinstance(key, six.string_types) for key in value):
        raise UnmarshallableError("Dict with non-string keys")
      return ['O', {key: encode_object(val) for key, val in six.iteritems(value)}]
    elif value == _pending_sentinel:
      return ['P']
    elif value == _censored_sentinel:
      return ['C']
    elif isinstance(value, UnmarshallableValue):
      return ['U', value.value_repr]
  except Exception as e:
    pass
  # We either don't know how to convert the value, or failed during the conversion. Instead we
  # return an "UnmarshallableValue" object, with repr() of the value to show to the user.
  return ['U', safe_repr(value)]

def decode_object(value):
  """
  Given a Grist-encoded value, returns an object represented by it.
  If typename is unknown, or construction fails for any reason, returns (not raises!)
  RaisedException with the original exception in its .error property.
  """
  try:
    if not isinstance(value, (list, tuple)):
      if isinstance(value, unicode):
        # TODO For now, the sandbox uses binary strings throughout; see TODO in main.py for more
        # on this. Strings that come from JS become Python binary strings, and we will not see
        # unicode here. But we may see it if unmarshalling data that comes from DB, since
        # DocStorage encodes/decodes values by marshaling JS strings as unicode. For consistency,
        # convert those unicode strings to binary strings too.
        return value.encode('utf8')
      return value
    code = value[0]
    args = value[1:]
    if code == 'R':
      return RecordStub(args[0], args[1])
    elif code == 'D':
      return moment.ts_to_dt(args[0], moment.Zone(args[1]))
    elif code == 'd':
      return moment.ts_to_date(args[0])
    elif code == 'E':
      return RaisedException.decode_args(*args)
    elif code == 'L':
      return [decode_object(item) for item in args]
    elif code == 'O':
      return {decode_object(key): decode_object(val) for key, val in six.iteritems(args[0])}
    elif code == 'P':
      return _pending_sentinel
    elif code == 'C':
      return _censored_sentinel
    elif code == 'U':
      return UnmarshallableValue(args[0])
    raise KeyError("Unknown object type code %r" % code)
  except Exception as e:
    return RaisedException(e)

#----------------------------------------------------------------------

class RaisedException(object):
  """
  RaisedException is a special type of object which indicates that a value in a cell isn't a plain
  value but an exception to be raised. All caught exceptions are wrapped in RaisedException. The
  original exception is saved in the .error attribute. The traceback is saved in .details
  attribute only when needed (flag include_details is set).

  RaisedException is registered under a special short name ("E") to save bytes since it's such a
  widely-used wrapper. To encode_args, it simply returns the entire encoded stored error, e.g.
  RaisedException(ValueError("foo")) is encoded as ["E", "ValueError", "foo"].
  """
  def __init__(self, error, include_details=False, encoded_error=None):
    self.error = error
    self.details = traceback.format_exc() if include_details else None
    self._encoded_error = encoded_error or self._encode_error()

  def encode_args(self):
    return self._encoded_error

  def _encode_error(self):
    # TODO: We should probably return all args, to communicate the error details to the browser
    # and to DB (for when we store formula results). There are two concerns: one is that it's
    # potentially quite verbose; the other is that it's makes the tests more annoying (again b/c
    # verbose).
    if self.details:
      return [type(self.error).__name__, str(self.error), self.details]
    if isinstance(self.error, InvalidTypedValue):
      return [type(self.error).__name__, self.error.typename, self.error.value]
    return [type(self.error).__name__]

  @classmethod
  def decode_args(cls, *args):
    # Decoding of a RaisedException is only enough to re-encode it.
    return cls(None, encoded_error=list(args))


class RecordList(list):
  """
  Just like list but allows setting custom attributes, which we use for remembering _group_by and
  _sort_by attributes when storing RecordSet as usertypes.ReferenceList type.
  """
  def __init__(self, row_ids, group_by=None, sort_by=None):
    list.__init__(self, row_ids)
    self._group_by = group_by
    self._sort_by = sort_by

  def __repr__(self):
    return "RecordList(%r, group_by=%r, sort_by=%r)" % (
      list.__repr__(self), self._group_by, self._sort_by)


# We don't currently have a good way to convert an incoming marshalled record to a proper Record
# object for an appropriate table. We don't expect incoming marshalled records at all, but if such
# a thing happens, we'll construct this RecordStub.
class RecordStub(object):
  def __init__(self, table_id, row_id):
    self.table_id = table_id
    self.row_id = row_id