mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
(core) Handle importing xls files with invalid dimensions
Summary: This addresses a rare bug where xls files with invalid dimensions could not be imported into Grist due to how openpyxl handles parsing them. Test Plan: Server test. Reviewers: alexmojaki Reviewed By: alexmojaki Differential Revision: https://phab.getgrist.com/D3485
This commit is contained in:
parent
561d9696aa
commit
9b08666f96
BIN
sandbox/grist/imports/fixtures/test_invalid_dimensions.xlsx
Normal file
BIN
sandbox/grist/imports/fixtures/test_invalid_dimensions.xlsx
Normal file
Binary file not shown.
@ -39,6 +39,10 @@ def parse_open_file(file_obj):
|
|||||||
export_list = []
|
export_list = []
|
||||||
# A table set is a collection of tables:
|
# A table set is a collection of tables:
|
||||||
for sheet in workbook:
|
for sheet in workbook:
|
||||||
|
# openpyxl fails to read xlsx files with incorrect dimensions; we reset here as a precaution.
|
||||||
|
# See https://openpyxl.readthedocs.io/en/stable/optimized.html#worksheet-dimensions.
|
||||||
|
sheet.reset_dimensions()
|
||||||
|
|
||||||
table_name = sheet.title
|
table_name = sheet.title
|
||||||
rows = [
|
rows = [
|
||||||
list(row)
|
list(row)
|
||||||
@ -50,7 +54,9 @@ def parse_open_file(file_obj):
|
|||||||
sample = [
|
sample = [
|
||||||
# Create messytables.Cells for the sake of messytables.headers_guess
|
# Create messytables.Cells for the sake of messytables.headers_guess
|
||||||
[messytables.Cell(cell) for cell in row]
|
[messytables.Cell(cell) for cell in row]
|
||||||
for row in rows[:1000]
|
# Resetting dimensions via openpyxl causes rows to not be padded. Make sure
|
||||||
|
# sample rows are padded; get_table_data will handle padding the rest.
|
||||||
|
for row in _with_padding(rows[:1000])
|
||||||
]
|
]
|
||||||
offset, headers = messytables.headers_guess(sample)
|
offset, headers = messytables.headers_guess(sample)
|
||||||
data_offset = offset + 1 # Add the header line
|
data_offset = offset + 1 # Add the header line
|
||||||
@ -100,3 +106,14 @@ def parse_open_file(file_obj):
|
|||||||
|
|
||||||
parse_options = {}
|
parse_options = {}
|
||||||
return parse_options, export_list
|
return parse_options, export_list
|
||||||
|
|
||||||
|
def _with_padding(rows):
|
||||||
|
if not rows:
|
||||||
|
return []
|
||||||
|
max_width = max(len(row) for row in rows)
|
||||||
|
min_width = min(len(row) for row in rows)
|
||||||
|
if min_width == max_width:
|
||||||
|
return rows
|
||||||
|
for row in rows:
|
||||||
|
row.extend([""] * (max_width - len(row)))
|
||||||
|
return rows
|
||||||
|
@ -163,7 +163,27 @@ class TestImportXLS(unittest.TestCase):
|
|||||||
],
|
],
|
||||||
'table_data': [
|
'table_data': [
|
||||||
[0, None, 1],
|
[0, None, 1],
|
||||||
[None, 0, 2],
|
[u'', 0, 2],
|
||||||
|
],
|
||||||
|
}])
|
||||||
|
|
||||||
|
def test_invalid_dimensions(self):
|
||||||
|
# Check that files with invalid dimensions (typically a result of software
|
||||||
|
# incorrectly writing the xlsx file) are imported correctly. Previously, Grist
|
||||||
|
# would fail to import any rows from such files due to how openpyxl parses them.
|
||||||
|
parsed_file = import_xls.parse_file(*_get_fixture('test_invalid_dimensions.xlsx'))
|
||||||
|
tables = parsed_file[1]
|
||||||
|
self.assertEqual(tables, [{
|
||||||
|
'table_name': 'Sheet1',
|
||||||
|
'column_metadata': [
|
||||||
|
{'id': u'A', 'type': 'Numeric'},
|
||||||
|
{'id': u'B', 'type': 'Numeric'},
|
||||||
|
{'id': u'C', 'type': 'Numeric'},
|
||||||
|
],
|
||||||
|
'table_data': [
|
||||||
|
[1, 2, 3],
|
||||||
|
[4, 5, 6],
|
||||||
|
[7, 8, 9],
|
||||||
],
|
],
|
||||||
}])
|
}])
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user