Be more accepting with None value in headers candidate (#331)

We already filter out a line will only None values, and sometimes
Excel of LibreOffice mistakes the real number of columns adding
one or more that have no value at all.
This commit is contained in:
Yohan Boniface 2022-10-31 20:57:26 +01:00 committed by GitHub
parent 7c9cb9843e
commit 4ff5a2eaa7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 10 deletions

View File

@ -52,7 +52,7 @@ def _is_numeric(text):
try: try:
t(text) t(text)
return True return True
except (ValueError, OverflowError): except (ValueError, OverflowError, TypeError):
pass pass
return False return False
@ -63,7 +63,7 @@ def _is_header(header, data_rows):
""" """
# See if the row has any non-text values. # See if the row has any non-text values.
for cell in header: for cell in header:
if not isinstance(cell, six.string_types) or _is_numeric(cell): if not (isinstance(cell, six.string_types) or cell is None) or _is_numeric(cell):
return False return False
@ -109,7 +109,7 @@ def expand_headers(headers, data_offset, rows):
row_length = max(itertools.chain([len(headers)], row_length = max(itertools.chain([len(headers)],
(_count_nonempty(r) for r in itertools.islice(rows, data_offset, (_count_nonempty(r) for r in itertools.islice(rows, data_offset,
None)))) None))))
header_values = [h.strip() for h in headers] + [u''] * (row_length - len(headers)) header_values = [h.strip() if h else '' for h in headers] + [u''] * (row_length - len(headers))
return header_values return header_values

View File

@ -105,17 +105,17 @@ class TestImportXLS(unittest.TestCase):
'table_name': u'Transaction Report', 'table_name': u'Transaction Report',
'column_metadata': [ 'column_metadata': [
{'type': 'Any', 'id': u''}, {'type': 'Any', 'id': u''},
{'type': 'Any', 'id': u''}, {'type': 'Numeric', 'id': u'Start'},
{'type': 'Numeric', 'id': u''}, {'type': 'Numeric', 'id': u''},
{'type': 'Numeric', 'id': u''}, {'type': 'Numeric', 'id': u''},
{'type': 'Any', 'id': u''}, {'type': 'Any', 'id': u'Seek no easy ways'},
], ],
'table_data': [ 'table_data': [
['', u'SINGLE MERGED', u'The End'], [u'SINGLE MERGED', u'The End'],
['Start', '1637384.52', ''], [1637384.52, None],
[None, 2444344.06, None], [2444344.06, None],
[None, 2444344.06, None], [2444344.06, None],
['Seek no easy ways', u'', u''], [u'', u''],
], ],
}]) }])
@ -225,6 +225,25 @@ class TestImportXLS(unittest.TestCase):
], ],
}]) }])
def test_header_with_none_cell(self):
parsed_file = import_xls.parse_file(*_get_fixture('test_headers_with_none_cell.xlsx'))
tables = parsed_file[1]
self.assertEqual(tables, [{
'table_name': 'Sheet1',
'column_metadata': [
{'id': u'header1', 'type': 'Any'},
{'id': u'header2', 'type': 'Any'},
{'id': u'header3', 'type': 'Any'},
{'id': u'header4', 'type': 'Any'},
],
'table_data': [
['foo1', 'foo2'],
['bar1', 'bar2'],
['baz1', 'baz2'],
['boz1', 'boz2'],
],
}])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()