Be more accepting with None value in headers candidate (#331)

We already filter out a line will only None values, and sometimes
Excel of LibreOffice mistakes the real number of columns adding
one or more that have no value at all.
This commit is contained in:
Yohan Boniface 2022-10-31 20:57:26 +01:00 committed by GitHub
parent 7c9cb9843e
commit 4ff5a2eaa7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 10 deletions

View File

@ -52,7 +52,7 @@ def _is_numeric(text):
try:
t(text)
return True
except (ValueError, OverflowError):
except (ValueError, OverflowError, TypeError):
pass
return False
@ -63,7 +63,7 @@ def _is_header(header, data_rows):
"""
# See if the row has any non-text values.
for cell in header:
if not isinstance(cell, six.string_types) or _is_numeric(cell):
if not (isinstance(cell, six.string_types) or cell is None) or _is_numeric(cell):
return False
@ -109,7 +109,7 @@ def expand_headers(headers, data_offset, rows):
row_length = max(itertools.chain([len(headers)],
(_count_nonempty(r) for r in itertools.islice(rows, data_offset,
None))))
header_values = [h.strip() for h in headers] + [u''] * (row_length - len(headers))
header_values = [h.strip() if h else '' for h in headers] + [u''] * (row_length - len(headers))
return header_values

View File

@ -105,17 +105,17 @@ class TestImportXLS(unittest.TestCase):
'table_name': u'Transaction Report',
'column_metadata': [
{'type': 'Any', 'id': u''},
{'type': 'Any', 'id': u''},
{'type': 'Numeric', 'id': u'Start'},
{'type': 'Numeric', 'id': u''},
{'type': 'Numeric', 'id': u''},
{'type': 'Any', 'id': u''},
{'type': 'Any', 'id': u'Seek no easy ways'},
],
'table_data': [
['', u'SINGLE MERGED', u'The End'],
['Start', '1637384.52', ''],
[None, 2444344.06, None],
[None, 2444344.06, None],
['Seek no easy ways', u'', u''],
[u'SINGLE MERGED', u'The End'],
[1637384.52, None],
[2444344.06, None],
[2444344.06, None],
[u'', u''],
],
}])
@ -225,6 +225,25 @@ class TestImportXLS(unittest.TestCase):
],
}])
def test_header_with_none_cell(self):
parsed_file = import_xls.parse_file(*_get_fixture('test_headers_with_none_cell.xlsx'))
tables = parsed_file[1]
self.assertEqual(tables, [{
'table_name': 'Sheet1',
'column_metadata': [
{'id': u'header1', 'type': 'Any'},
{'id': u'header2', 'type': 'Any'},
{'id': u'header3', 'type': 'Any'},
{'id': u'header4', 'type': 'Any'},
],
'table_data': [
['foo1', 'foo2'],
['bar1', 'bar2'],
['baz1', 'baz2'],
['boz1', 'boz2'],
],
}])
if __name__ == '__main__':
unittest.main()