Be more accepting with None value in headers candidate (#331)

We already filter out a line will only None values, and sometimes
Excel of LibreOffice mistakes the real number of columns adding
one or more that have no value at all.
friendly-locale
Yohan Boniface 2 years ago committed by GitHub
parent 7c9cb9843e
commit 4ff5a2eaa7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -52,7 +52,7 @@ def _is_numeric(text):
try:
t(text)
return True
except (ValueError, OverflowError):
except (ValueError, OverflowError, TypeError):
pass
return False
@ -63,7 +63,7 @@ def _is_header(header, data_rows):
"""
# See if the row has any non-text values.
for cell in header:
if not isinstance(cell, six.string_types) or _is_numeric(cell):
if not (isinstance(cell, six.string_types) or cell is None) or _is_numeric(cell):
return False
@ -109,7 +109,7 @@ def expand_headers(headers, data_offset, rows):
row_length = max(itertools.chain([len(headers)],
(_count_nonempty(r) for r in itertools.islice(rows, data_offset,
None))))
header_values = [h.strip() for h in headers] + [u''] * (row_length - len(headers))
header_values = [h.strip() if h else '' for h in headers] + [u''] * (row_length - len(headers))
return header_values

@ -105,17 +105,17 @@ class TestImportXLS(unittest.TestCase):
'table_name': u'Transaction Report',
'column_metadata': [
{'type': 'Any', 'id': u''},
{'type': 'Any', 'id': u''},
{'type': 'Numeric', 'id': u'Start'},
{'type': 'Numeric', 'id': u''},
{'type': 'Numeric', 'id': u''},
{'type': 'Any', 'id': u''},
{'type': 'Any', 'id': u'Seek no easy ways'},
],
'table_data': [
['', u'SINGLE MERGED', u'The End'],
['Start', '1637384.52', ''],
[None, 2444344.06, None],
[None, 2444344.06, None],
['Seek no easy ways', u'', u''],
[u'SINGLE MERGED', u'The End'],
[1637384.52, None],
[2444344.06, None],
[2444344.06, None],
[u'', u''],
],
}])
@ -225,6 +225,25 @@ class TestImportXLS(unittest.TestCase):
],
}])
def test_header_with_none_cell(self):
parsed_file = import_xls.parse_file(*_get_fixture('test_headers_with_none_cell.xlsx'))
tables = parsed_file[1]
self.assertEqual(tables, [{
'table_name': 'Sheet1',
'column_metadata': [
{'id': u'header1', 'type': 'Any'},
{'id': u'header2', 'type': 'Any'},
{'id': u'header3', 'type': 'Any'},
{'id': u'header4', 'type': 'Any'},
],
'table_data': [
['foo1', 'foo2'],
['bar1', 'bar2'],
['baz1', 'baz2'],
['boz1', 'boz2'],
],
}])
if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save