mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
(core) Handle importing xls files with invalid dimensions
Summary: This addresses a rare bug where xls files with invalid dimensions could not be imported into Grist due to how openpyxl handles parsing them. Test Plan: Server test. Reviewers: alexmojaki Reviewed By: alexmojaki Differential Revision: https://phab.getgrist.com/D3485
This commit is contained in:
parent
561d9696aa
commit
9b08666f96
BIN
sandbox/grist/imports/fixtures/test_invalid_dimensions.xlsx
Normal file
BIN
sandbox/grist/imports/fixtures/test_invalid_dimensions.xlsx
Normal file
Binary file not shown.
@ -39,6 +39,10 @@ def parse_open_file(file_obj):
|
||||
export_list = []
|
||||
# A table set is a collection of tables:
|
||||
for sheet in workbook:
|
||||
# openpyxl fails to read xlsx files with incorrect dimensions; we reset here as a precaution.
|
||||
# See https://openpyxl.readthedocs.io/en/stable/optimized.html#worksheet-dimensions.
|
||||
sheet.reset_dimensions()
|
||||
|
||||
table_name = sheet.title
|
||||
rows = [
|
||||
list(row)
|
||||
@ -50,7 +54,9 @@ def parse_open_file(file_obj):
|
||||
sample = [
|
||||
# Create messytables.Cells for the sake of messytables.headers_guess
|
||||
[messytables.Cell(cell) for cell in row]
|
||||
for row in rows[:1000]
|
||||
# Resetting dimensions via openpyxl causes rows to not be padded. Make sure
|
||||
# sample rows are padded; get_table_data will handle padding the rest.
|
||||
for row in _with_padding(rows[:1000])
|
||||
]
|
||||
offset, headers = messytables.headers_guess(sample)
|
||||
data_offset = offset + 1 # Add the header line
|
||||
@ -100,3 +106,14 @@ def parse_open_file(file_obj):
|
||||
|
||||
parse_options = {}
|
||||
return parse_options, export_list
|
||||
|
||||
def _with_padding(rows):
|
||||
if not rows:
|
||||
return []
|
||||
max_width = max(len(row) for row in rows)
|
||||
min_width = min(len(row) for row in rows)
|
||||
if min_width == max_width:
|
||||
return rows
|
||||
for row in rows:
|
||||
row.extend([""] * (max_width - len(row)))
|
||||
return rows
|
||||
|
@ -163,7 +163,27 @@ class TestImportXLS(unittest.TestCase):
|
||||
],
|
||||
'table_data': [
|
||||
[0, None, 1],
|
||||
[None, 0, 2],
|
||||
[u'', 0, 2],
|
||||
],
|
||||
}])
|
||||
|
||||
def test_invalid_dimensions(self):
|
||||
# Check that files with invalid dimensions (typically a result of software
|
||||
# incorrectly writing the xlsx file) are imported correctly. Previously, Grist
|
||||
# would fail to import any rows from such files due to how openpyxl parses them.
|
||||
parsed_file = import_xls.parse_file(*_get_fixture('test_invalid_dimensions.xlsx'))
|
||||
tables = parsed_file[1]
|
||||
self.assertEqual(tables, [{
|
||||
'table_name': 'Sheet1',
|
||||
'column_metadata': [
|
||||
{'id': u'A', 'type': 'Numeric'},
|
||||
{'id': u'B', 'type': 'Numeric'},
|
||||
{'id': u'C', 'type': 'Numeric'},
|
||||
],
|
||||
'table_data': [
|
||||
[1, 2, 3],
|
||||
[4, 5, 6],
|
||||
[7, 8, 9],
|
||||
],
|
||||
}])
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user