mirror of
https://github.com/gristlabs/grist-core.git
synced 2026-03-02 04:09:24 +00:00
(core) Switch excel import parsing from messytables+xlrd to openpyxl, and ignore empty rows
Summary: Use openpyxl instead of messytables (which used xlrd internally) in import_xls.py. Skip empty rows since excel files can easily contain huge numbers of them. Drop support for xls files (which openpyxl doesn't support) in favour of the newer xlsx format. Fix some details relating to python virtualenvs and dependencies, as Jenkins was failing to find new Python dependencies. Test Plan: Mostly relying on existing tests. Updated various tests which referred to xls files instead of xlsx. Added a Python test for skipping empty rows. Reviewers: georgegevoian Reviewed By: georgegevoian Differential Revision: https://phab.getgrist.com/D3406
This commit is contained in:
@@ -10,7 +10,6 @@ of values. All "data" lists will have the same length.
|
||||
import datetime
|
||||
import logging
|
||||
import re
|
||||
import messytables
|
||||
import moment # TODO grist internal libraries might not be available to plugins in the future.
|
||||
import six
|
||||
from six.moves import zip, xrange
|
||||
@@ -59,12 +58,16 @@ class BaseConverter(object):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
numeric_types = six.integer_types + (float, complex, type(None))
|
||||
|
||||
class NumericConverter(BaseConverter):
|
||||
"""Handles the Grist Numeric type"""
|
||||
|
||||
@classmethod
|
||||
def convert(cls, value):
|
||||
if type(value) in six.integer_types + (float, complex):
|
||||
if type(value) is bool:
|
||||
return int(value)
|
||||
elif type(value) in numeric_types:
|
||||
return value
|
||||
raise ValueError()
|
||||
|
||||
@@ -80,7 +83,7 @@ class SimpleDateTimeConverter(BaseConverter):
|
||||
def convert(cls, value):
|
||||
if type(value) is datetime.datetime:
|
||||
return value
|
||||
elif value == "":
|
||||
elif not value:
|
||||
return None
|
||||
raise ValueError()
|
||||
|
||||
@@ -103,6 +106,8 @@ class AnyConverter(BaseConverter):
|
||||
"""
|
||||
@classmethod
|
||||
def convert(cls, value):
|
||||
if value is None:
|
||||
return u''
|
||||
return six.text_type(value)
|
||||
|
||||
@classmethod
|
||||
@@ -156,7 +161,7 @@ def _guess_basic_types(rows, num_columns):
|
||||
column_detectors = [ColumnDetector() for i in xrange(num_columns)]
|
||||
for row in rows:
|
||||
for cell, detector in zip(row, column_detectors):
|
||||
detector.add_value(cell.value)
|
||||
detector.add_value(cell)
|
||||
|
||||
return [detector.get_converter() for detector in column_detectors]
|
||||
|
||||
@@ -194,10 +199,10 @@ class ColumnConverter(object):
|
||||
return {"type": grist_type, "data": self._all_col_values}
|
||||
|
||||
|
||||
def get_table_data(row_set, num_columns, num_rows=0):
|
||||
converters = _guess_basic_types(row_set.sample, num_columns)
|
||||
def get_table_data(rows, num_columns, num_rows=0):
|
||||
converters = _guess_basic_types(rows[:1000], num_columns)
|
||||
col_converters = [ColumnConverter(c) for c in converters]
|
||||
for num, row in enumerate(row_set):
|
||||
for num, row in enumerate(rows):
|
||||
if num_rows and num == num_rows:
|
||||
break
|
||||
|
||||
@@ -207,9 +212,9 @@ def get_table_data(row_set, num_columns, num_rows=0):
|
||||
# Make sure we have a value for every column.
|
||||
missing_values = len(converters) - len(row)
|
||||
if missing_values > 0:
|
||||
row.extend([messytables.Cell("")] * missing_values)
|
||||
row.extend([""] * missing_values)
|
||||
|
||||
for cell, conv in zip(row, col_converters):
|
||||
conv.convert_and_add(cell.value)
|
||||
conv.convert_and_add(cell)
|
||||
|
||||
return [conv.get_grist_column() for conv in col_converters]
|
||||
|
||||
Reference in New Issue
Block a user