You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
258 lines
8.0 KiB
258 lines
8.0 KiB
"""
|
|
The import_json module converts json file into a list of grist tables.
|
|
|
|
It supports data being structured as a list of record, turning each
|
|
object into a row and each object's key into a column. For
|
|
example:
|
|
```
|
|
[{'a': 1, 'b': 'tree'}, {'a': 4, 'b': 'flowers'}, ... ]
|
|
```
|
|
is turned into a table with two columns 'a' of type 'Numeric' and 'b' of
|
|
type 'Text'.
|
|
|
|
Nested object are stored as references to a distinct table where the
|
|
nested object is stored. For example:
|
|
```
|
|
[{'a': {'b': 4}}, ...]
|
|
```
|
|
is turned into a column 'a' of type 'Ref:my_import_name.a', and into
|
|
another table 'my_import_name.a' with a column 'b' of type
|
|
'Numeric'. (Nested-nested objects are supported as well and the module
|
|
assumes no limit to the number of level of nesting you can do.)
|
|
|
|
Each value which is not an object will be stored into a column with id
|
|
'' (empty string). For example:
|
|
```
|
|
['apple', 'peach', ... ]
|
|
```
|
|
is turned into a table with an un-named column that stores the values.
|
|
|
|
Arrays are stored as a list of references to a table where the content
|
|
of the array is stored. For example:
|
|
```
|
|
[{'items': [{'a':'apple'}, {'a':'peach'}]}, {'items': [{'a':'cucumber'}, {'a':'carots'}, ...]}, ...]
|
|
```
|
|
is turned into a column named 'items' of type
|
|
'RefList:my_import_name.items' which points to another table named
|
|
'my_import_name.items' which has a column 'a' of type Text.
|
|
|
|
Data could be structured with an object at the root as well in which
|
|
case, the object is considered to represent a single row, and gets
|
|
turned into a table with one row.
|
|
|
|
A column's type is defined by the type of its first value that is not
|
|
None (ie: if another value with different type is stored in the same
|
|
column, the column's type remains unchanged), 'Text' otherwise.
|
|
|
|
Usage:
|
|
import import_json
|
|
# if you have a file to parse
|
|
import_json.parse_file(file_path)
|
|
|
|
# if data is already encoded with python's standard containers (dict and list)
|
|
import_json.dumps(data, import_name)
|
|
|
|
|
|
TODO:
|
|
- references should map to appropriate column type ie: `Ref:{$colname}` and
|
|
`RefList:{$colname}` (which depends on T413).
|
|
- Allows user to set the uniqueValues options per table.
|
|
- User should be able to choose some objects to be imported as
|
|
indexes: for instance:
|
|
```
|
|
{
|
|
'pink lady': {'type': 'apple', 'taste': 'juicy'},
|
|
'gala': {'type': 'apple', 'taste': 'tart'},
|
|
'comice': {'type': 'pear', 'taste': 'lemon'},
|
|
...
|
|
}
|
|
```
|
|
could be mapped to columns 'type', 'taste' and a 3rd that holds the
|
|
property 'name'.
|
|
|
|
"""
|
|
import os
|
|
import json
|
|
from collections import OrderedDict, namedtuple
|
|
from itertools import count, chain
|
|
|
|
import six
|
|
|
|
from imports import import_utils
|
|
|
|
Ref = namedtuple('Ref', ['table_name', 'rowid'])
|
|
Row = namedtuple('Row', ['values', 'parent', 'ref'])
|
|
Col = namedtuple('Col', ['type', 'values'])
|
|
|
|
GRIST_TYPES={
|
|
float: "Numeric",
|
|
bool: "Bool",
|
|
}
|
|
|
|
for typ in six.integer_types:
|
|
GRIST_TYPES[typ] = "Numeric"
|
|
|
|
for typ in six.string_types:
|
|
GRIST_TYPES[typ] = "Text"
|
|
|
|
SCHEMA = [{
|
|
'name': 'includes',
|
|
'label': 'Includes (list of tables separated by semicolon)',
|
|
'type': 'string',
|
|
'visible': True
|
|
}, {
|
|
'name': 'excludes',
|
|
'label': 'Excludes (list of tables separated by semicolon)',
|
|
'type': 'string',
|
|
'visible': True
|
|
}]
|
|
|
|
DEFAULT_PARSE_OPTIONS = {
|
|
'includes': '',
|
|
'excludes': '',
|
|
'SCHEMA': SCHEMA
|
|
}
|
|
|
|
def parse_file(file_source, parse_options):
|
|
"Deserialize `file_source` into a python object and dumps it into jgrist form"
|
|
path = import_utils.get_path(file_source['path'])
|
|
name, ext = os.path.splitext(file_source['origName'])
|
|
if 'SCHEMA' not in parse_options:
|
|
parse_options.update(DEFAULT_PARSE_OPTIONS)
|
|
with open(path, 'r') as json_file:
|
|
data = json.loads(json_file.read())
|
|
|
|
return dumps(data, name, parse_options)
|
|
|
|
def dumps(data, name = "", parse_options = DEFAULT_PARSE_OPTIONS):
|
|
" Serializes `data` to a jgrist formatted object. "
|
|
tables = Tables(parse_options)
|
|
if not isinstance(data, list):
|
|
# put simple record into a list
|
|
data = [data]
|
|
for val in data:
|
|
tables.add_row(name, val)
|
|
return {
|
|
'tables': tables.dumps(),
|
|
'parseOptions': parse_options
|
|
}
|
|
|
|
|
|
class Tables(object):
|
|
"""
|
|
Tables maintains the list of tables indexed by their name. Each table
|
|
is a list of row. A row is a dictionary mapping columns id to a value.
|
|
"""
|
|
|
|
def __init__(self, parse_options):
|
|
self._tables = OrderedDict()
|
|
self._includes_opt = list(filter(None, parse_options['includes'].split(';')))
|
|
self._excludes_opt = list(filter(None, parse_options['excludes'].split(';')))
|
|
|
|
|
|
def dumps(self):
|
|
" Dumps tables in jgrist format "
|
|
return [_dump_table(name, rows) for name, rows in six.iteritems(self._tables)]
|
|
|
|
def add_row(self, table, value, parent = None):
|
|
"""
|
|
Adds a row to `table` and fill it with the content of value, then
|
|
returns a Ref object pointing to this row. Returns None if the row
|
|
was excluded. Calls itself recursively to add nested object and
|
|
lists.
|
|
"""
|
|
row = None
|
|
if self._is_included(table):
|
|
rows = self._tables.setdefault(table, [])
|
|
row = Row(OrderedDict(), parent, Ref(table, len(rows)+1))
|
|
rows.append(row)
|
|
|
|
# we need a dictionary to map values to the row's columns
|
|
value = _dictify(value)
|
|
for (k, val) in sorted(six.iteritems(value)):
|
|
if isinstance(val, dict):
|
|
val = self.add_row(table + '_' + k, val)
|
|
if row and val:
|
|
row.values[k] = val.ref
|
|
elif isinstance(val, list):
|
|
for list_val in val:
|
|
self.add_row(table + '_' + k, list_val, row)
|
|
else:
|
|
if row and self._is_included(table + '_' + k):
|
|
row.values[k] = val
|
|
return row
|
|
|
|
|
|
def _is_included(self, property_path):
|
|
is_included = (any(property_path.startswith(inc) for inc in self._includes_opt)
|
|
if self._includes_opt else True)
|
|
is_excluded = (any(property_path.startswith(exc) for exc in self._excludes_opt)
|
|
if self._excludes_opt else False)
|
|
return is_included and not is_excluded
|
|
|
|
|
|
def first_available_key(dictionary, name):
|
|
"""
|
|
Returns the first of (name, name2, name3 ...) that is not a key of
|
|
dictionary.
|
|
"""
|
|
names = chain([name], ("{}{}".format(name, i) for i in count(2)))
|
|
return next(n for n in names if n not in dictionary)
|
|
|
|
|
|
def _dictify(value):
|
|
"""
|
|
Converts non-dictionary value to a dictionary with a single
|
|
empty-string key mapping to the given value. Or returns the value
|
|
itself if it's already a dictionary. This is useful to map values to
|
|
row's columns.
|
|
"""
|
|
return value if isinstance(value, dict) else {'': value}
|
|
|
|
|
|
def _dump_table(name, rows):
|
|
"Converts a list of rows into a jgrist table and set 'table_name' to name."
|
|
columns = _transpose([r.values for r in rows])
|
|
# find ref to first parent
|
|
ref = next((r.parent.ref for r in rows if r.parent), None)
|
|
if ref:
|
|
# adds a column to store ref to parent
|
|
col_id = first_available_key(columns, ref.table_name)
|
|
columns[col_id] = Col(_grist_type(ref),
|
|
[row.parent.ref if row.parent else None for row in rows])
|
|
return {
|
|
'column_metadata': [{'id': key, 'type': col.type} for (key, col) in six.iteritems(columns)],
|
|
'table_data': [[_dump_value(val) for val in col.values] for col in columns.values()],
|
|
'table_name': name
|
|
}
|
|
|
|
def _transpose(rows):
|
|
"""
|
|
Transposes a collection of dictionary mapping key to values into a
|
|
dictionary mapping key to values. Values are encoded into a tuple
|
|
made of the grist_type of the first value that is not None and the
|
|
collection of values.
|
|
"""
|
|
transpose = OrderedDict()
|
|
values = OrderedDict()
|
|
for row in reversed(rows):
|
|
values.update(row)
|
|
for key, val in six.iteritems(values):
|
|
transpose[key] = Col(_grist_type(val), [row.get(key, None) for row in rows])
|
|
return transpose
|
|
|
|
|
|
def _dump_value(value):
|
|
" Serialize a value."
|
|
if isinstance(value, Ref):
|
|
return value.rowid
|
|
return value
|
|
|
|
|
|
def _grist_type(value):
|
|
" Returns the grist type for value. "
|
|
val_type = type(value)
|
|
if val_type == Ref:
|
|
return 'Ref:{}'.format(value.table_name)
|
|
return GRIST_TYPES.get(val_type, 'Text')
|