Applying review from @alexmojaki

pull/265/head
Yohan Boniface 2 years ago
parent 9bbf66e50e
commit 2544736aa8

@ -92,10 +92,10 @@ def parse_file(file_path, parse_options=None):
with codecs.open(file_path, "rb") as f: with codecs.open(file_path, "rb") as f:
sample = f.read(100000) sample = f.read(100000)
encoding = chardet.detect(sample)['encoding'] or "utf8" encoding = chardet.detect(sample)['encoding'] or "utf8"
# In addition, always prefer UTF8 over ASCII. # In addition, always prefer UTF8 over ASCII.
if encoding == 'ascii': if encoding == 'ascii':
encoding = 'utf8' encoding = 'utf8'
log.info("Using encoding %s" % encoding) log.info("Using encoding %s" % encoding)
with codecs.open(file_path, mode="r", encoding=encoding) as f: with codecs.open(file_path, mode="r", encoding=encoding) as f:
@ -108,11 +108,10 @@ def _guess_dialect(file_obj):
# Restrict allowed delimiters to prevent guessing other char than this list. # Restrict allowed delimiters to prevent guessing other char than this list.
dialect = csv.Sniffer().sniff(file_obj.read(100000), delimiters=['\t', ',', ';', '|']) dialect = csv.Sniffer().sniff(file_obj.read(100000), delimiters=['\t', ',', ';', '|'])
log.info("Guessed dialect %s" % dict(dialect.__dict__)) log.info("Guessed dialect %s" % dict(dialect.__dict__))
return dialect
except csv.Error: except csv.Error:
log.info("Cannot guess dialect using Excel as fallback.") log.info("Cannot guess dialect using Excel as fallback.")
return csv.excel return csv.excel
else:
return dialect
finally: finally:
file_obj.seek(0) file_obj.seek(0)
@ -121,18 +120,14 @@ def _parse_open_file(file_obj, parse_options=None):
options = {} options = {}
dialect = _guess_dialect(file_obj) dialect = _guess_dialect(file_obj)
csv_options = {k: parse_options.get(k, getattr(dialect, k, None)) for k in csv_keys} csv_options = {}
if six.PY2: for key in csv_keys:
csv_options = {k: v.encode('utf8') if isinstance(v, six.text_type) else v value = parse_options.get(key, getattr(dialect, key, None))
for k, v in csv_options.items()} if value is not None:
csv_options[key] = value
csv_options = {k: v for k, v in csv_options.items() if v is not None}
reader = csv.reader(file_obj, **csv_options) reader = csv.reader(file_obj, **csv_options)
num_rows = parse_options.get('NUM_ROWS', 0)
table_name = None
rows = list(reader) rows = list(reader)
sample_len = 100 sample_len = 100
sample_rows = rows[:sample_len] sample_rows = rows[:sample_len]
@ -160,7 +155,8 @@ def _parse_open_file(file_obj, parse_options=None):
data_offset -= 1 data_offset -= 1
headers = [''] * len(headers) headers = [''] * len(headers)
rows = rows[data_offset:] # Use row.pop instead to make it faster ? rows = rows[data_offset:]
num_rows = parse_options.get('NUM_ROWS', 0)
table_data_with_types = parse_data.get_table_data(rows, len(headers), num_rows) table_data_with_types = parse_data.get_table_data(rows, len(headers), num_rows)
# Identify and remove empty columns, and populate separate metadata and data lists. # Identify and remove empty columns, and populate separate metadata and data lists.
@ -192,12 +188,12 @@ def _parse_open_file(file_obj, parse_options=None):
"SCHEMA": SCHEMA "SCHEMA": SCHEMA
} }
log.info("Output table %r with %d columns", table_name, len(column_metadata)) log.info("Output table with %d columns", len(column_metadata))
for c in column_metadata: for c in column_metadata:
log.debug("Output column %s", c) log.debug("Output column %s", c)
export_list = [{ export_list = [{
"table_name": table_name, "table_name": None,
"column_metadata": column_metadata, "column_metadata": column_metadata,
"table_data": table_data "table_data": table_data
}] }]

@ -21,9 +21,7 @@ def empty(value):
return True return True
if not isinstance(value, six.string_types): if not isinstance(value, six.string_types):
value = six.text_type(value) value = six.text_type(value)
if len(value.strip()): return not value.strip()
return False
return True
# Get path to an imported file. # Get path to an imported file.
def get_path(file_source): def get_path(file_source):

Loading…
Cancel
Save