mirror of
https://github.com/gristlabs/grist-core.git
synced 2024-10-27 20:44:07 +00:00
(core) Improve encoding detection for csv imports, and make encoding an editable option.
Summary: - Using a sample of data was causing poor detection if the sample were cut mid-character. Switch to using line-based detection. - Add a simple option for changing encoding. No convenient UI is offered since config UI is auto-generated, but this at least makes it possible to recover from bad guesses. - Upgrades chardet library for good measure. - Also fixes python3-building step, to more reliably rebuild Python dependencies when requirements3.* files change. Test Plan: Added a python-side test case, and a browser test that encodings can be switched, errors are displayed, and wrong encodings fail recoverably. Reviewers: alexmojaki Reviewed By: alexmojaki Differential Revision: https://phab.getgrist.com/D3979
This commit is contained in:
parent
b9adcefcce
commit
d5a4605d2a
@ -1082,6 +1082,7 @@ export class Importer extends DisposableWithEvents {
|
|||||||
options,
|
options,
|
||||||
)
|
)
|
||||||
]),
|
]),
|
||||||
|
cssWarningText(dom.text(use => use(this._parseOptions)?.WARNING || ""), testId('warning')),
|
||||||
dom.domComputed(use => {
|
dom.domComputed(use => {
|
||||||
if (use(isSkipTable)) {
|
if (use(isSkipTable)) {
|
||||||
return cssOverlay(t('Skip Table on Import'), testId('preview-overlay'));
|
return cssOverlay(t('Skip Table on Import'), testId('preview-overlay'));
|
||||||
@ -1098,6 +1099,7 @@ export class Importer extends DisposableWithEvents {
|
|||||||
)
|
)
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
const buttons = cssImportButtons(cssImportButtonsLine(
|
const buttons = cssImportButtons(cssImportButtonsLine(
|
||||||
bigPrimaryButton('Import',
|
bigPrimaryButton('Import',
|
||||||
dom.on('click', () => this._maybeFinishImport(upload)),
|
dom.on('click', () => this._maybeFinishImport(upload)),
|
||||||
@ -1369,6 +1371,10 @@ export class Importer extends DisposableWithEvents {
|
|||||||
(p: ParseOptions) => {
|
(p: ParseOptions) => {
|
||||||
anotherScreen.dispose();
|
anotherScreen.dispose();
|
||||||
this._parseOptions.set(p);
|
this._parseOptions.set(p);
|
||||||
|
// Drop what we previously matched because we may have different columns.
|
||||||
|
// If user manually matched, then changed import options, they'll have to re-match; when
|
||||||
|
// columns change at all, the alternative has incorrect columns in UI and is more confusing.
|
||||||
|
this._sourceInfoArray.set([]);
|
||||||
this._reImport(upload).catch((err) => reportError(err));
|
this._reImport(upload).catch((err) => reportError(err));
|
||||||
},
|
},
|
||||||
() => {
|
() => {
|
||||||
@ -1517,6 +1523,12 @@ const cssTabsWrapper = styled('div', `
|
|||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
`);
|
`);
|
||||||
|
|
||||||
|
const cssWarningText = styled('div', `
|
||||||
|
margin-bottom: 8px;
|
||||||
|
color: ${theme.errorText};
|
||||||
|
white-space: pre-line;
|
||||||
|
`);
|
||||||
|
|
||||||
const cssTableList = styled('div', `
|
const cssTableList = styled('div', `
|
||||||
align-self: flex-start;
|
align-self: flex-start;
|
||||||
max-width: 100%;
|
max-width: 100%;
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import {bigBasicButton, bigPrimaryButton} from 'app/client/ui2018/buttons';
|
import {bigBasicButton, bigPrimaryButton} from 'app/client/ui2018/buttons';
|
||||||
import {squareCheckbox} from 'app/client/ui2018/checkbox';
|
import {squareCheckbox} from 'app/client/ui2018/checkbox';
|
||||||
import {testId, theme} from 'app/client/ui2018/cssVars';
|
import {testId, theme} from 'app/client/ui2018/cssVars';
|
||||||
|
import {makeLinks} from 'app/client/ui2018/links';
|
||||||
import {cssModalButtons} from 'app/client/ui2018/modals';
|
import {cssModalButtons} from 'app/client/ui2018/modals';
|
||||||
import {ParseOptionSchema} from 'app/plugin/FileParserAPI';
|
import {ParseOptionSchema} from 'app/plugin/FileParserAPI';
|
||||||
import {Computed, dom, DomContents, IDisposableOwner, input, Observable, styled} from 'grainjs';
|
import {Computed, dom, DomContents, IDisposableOwner, input, Observable, styled} from 'grainjs';
|
||||||
@ -60,7 +61,7 @@ export function buildParseOptionsForm(
|
|||||||
return [
|
return [
|
||||||
cssParseOptionForm(
|
cssParseOptionForm(
|
||||||
items.map((item) => cssParseOption(
|
items.map((item) => cssParseOption(
|
||||||
cssParseOptionName(item.label),
|
cssParseOptionName(makeLinks(item.label)),
|
||||||
optionToInput(owner, item.type, optionsMap.get(item.name)!),
|
optionToInput(owner, item.type, optionsMap.get(item.name)!),
|
||||||
testId('parseopts-opt'),
|
testId('parseopts-opt'),
|
||||||
)),
|
)),
|
||||||
|
@ -15,6 +15,7 @@ export const ParseFileAPI = t.iface([], {
|
|||||||
export const ParseOptions = t.iface([], {
|
export const ParseOptions = t.iface([], {
|
||||||
"NUM_ROWS": t.opt("number"),
|
"NUM_ROWS": t.opt("number"),
|
||||||
"SCHEMA": t.opt(t.array("ParseOptionSchema")),
|
"SCHEMA": t.opt(t.array("ParseOptionSchema")),
|
||||||
|
"WARNING": t.opt("string"),
|
||||||
});
|
});
|
||||||
|
|
||||||
export const ParseOptionSchema = t.iface([], {
|
export const ParseOptionSchema = t.iface([], {
|
||||||
|
@ -20,6 +20,7 @@ export interface ParseFileAPI {
|
|||||||
export interface ParseOptions {
|
export interface ParseOptions {
|
||||||
NUM_ROWS?: number;
|
NUM_ROWS?: number;
|
||||||
SCHEMA?: ParseOptionSchema[];
|
SCHEMA?: ParseOptionSchema[];
|
||||||
|
WARNING?: string; // Only on response, includes a warning from parsing, if any.
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
4
sandbox/grist/imports/fixtures/test_encoding_utf8.csv
Normal file
4
sandbox/grist/imports/fixtures/test_encoding_utf8.csv
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
Name,Age,Επάγγελμα,Πόλη
|
||||||
|
John Smith,30,Γιατρός,Athens
|
||||||
|
Μαρία Παπαδοπούλου,25,Engineer,Thessaloniki
|
||||||
|
Δημήτρης Johnson,40,Δικηγόρος,Piraeus
|
|
@ -14,7 +14,7 @@ from imports import import_utils
|
|||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
log.setLevel(logging.WARNING)
|
log.setLevel(logging.INFO)
|
||||||
|
|
||||||
SCHEMA = [
|
SCHEMA = [
|
||||||
{
|
{
|
||||||
@ -77,7 +77,14 @@ SCHEMA = [
|
|||||||
'label': 'Number of rows',
|
'label': 'Number of rows',
|
||||||
'type': 'number',
|
'type': 'number',
|
||||||
'visible': False,
|
'visible': False,
|
||||||
}]
|
},
|
||||||
|
{
|
||||||
|
'name': 'encoding',
|
||||||
|
'label': 'Character encoding. See https://tinyurl.com/py3codecs',
|
||||||
|
'type': 'string',
|
||||||
|
'visible': True,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
def parse_file_source(file_source, options):
|
def parse_file_source(file_source, options):
|
||||||
parsing_options, export_list = parse_file(import_utils.get_path(file_source["path"]), options)
|
parsing_options, export_list = parse_file(import_utils.get_path(file_source["path"]), options)
|
||||||
@ -91,16 +98,32 @@ def parse_file(file_path, parse_options=None):
|
|||||||
"""
|
"""
|
||||||
parse_options = parse_options or {}
|
parse_options = parse_options or {}
|
||||||
|
|
||||||
with codecs.open(file_path, "rb") as f:
|
given_encoding = parse_options.get('encoding')
|
||||||
sample = f.read(100000)
|
encoding = given_encoding or detect_encoding(file_path)
|
||||||
encoding = chardet.detect(sample)['encoding'] or "utf8"
|
log.info("Using encoding %s (%s)", encoding, "given" if given_encoding else "detected")
|
||||||
# In addition, always prefer UTF8 over ASCII.
|
|
||||||
if encoding == 'ascii':
|
|
||||||
encoding = 'utf8'
|
|
||||||
log.info("Using encoding %s", encoding)
|
|
||||||
|
|
||||||
with codecs.open(file_path, mode="r", encoding=encoding) as f:
|
try:
|
||||||
|
return _parse_with_encoding(file_path, parse_options, encoding)
|
||||||
|
except Exception as e:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
# For valid encodings, we can do our best and report count of errors. But an invalid encoding
|
||||||
|
# or one with a BOM will produce an exception. For those, fall back to utf-8.
|
||||||
|
parsing_options, export_list = _parse_with_encoding(file_path, parse_options, encoding)
|
||||||
|
parsing_options["WARNING"] = "{}: {}. Falling back to {}.\n{}".format(
|
||||||
|
type(e).__name__, e, encoding, parsing_options.get("WARNING", ""))
|
||||||
|
return parsing_options, export_list
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_with_encoding(file_path, parse_options, encoding):
|
||||||
|
codec_errors = CodecErrorsReplace()
|
||||||
|
codecs.register_error('custom', codec_errors)
|
||||||
|
with codecs.open(file_path, mode="r", encoding=encoding, errors="custom") as f:
|
||||||
parsing_options, export_list = _parse_open_file(f, parse_options=parse_options)
|
parsing_options, export_list = _parse_open_file(f, parse_options=parse_options)
|
||||||
|
parsing_options["encoding"] = encoding
|
||||||
|
if codec_errors.error_count:
|
||||||
|
parsing_options["WARNING"] = (
|
||||||
|
"Using encoding %s, encountered %s errors. Use Import Options to change" %
|
||||||
|
(encoding, codec_errors.error_count))
|
||||||
return parsing_options, export_list
|
return parsing_options, export_list
|
||||||
|
|
||||||
|
|
||||||
@ -204,6 +227,32 @@ def _parse_open_file(file_obj, parse_options=None):
|
|||||||
|
|
||||||
return options, export_list
|
return options, export_list
|
||||||
|
|
||||||
def get_version():
|
|
||||||
""" Return name and version of plug-in"""
|
class CodecErrorsReplace(object):
|
||||||
pass
|
def __init__(self):
|
||||||
|
self.error_count = 0
|
||||||
|
self.first_error = None
|
||||||
|
|
||||||
|
def __call__(self, error):
|
||||||
|
self.error_count += 1
|
||||||
|
if not self.first_error:
|
||||||
|
self.first_error = error
|
||||||
|
return codecs.replace_errors(error)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_encoding(file_path):
|
||||||
|
# Use line-by-line detection as suggested in
|
||||||
|
# https://chardet.readthedocs.io/en/latest/usage.html#advanced-usage.
|
||||||
|
# Using a fixed-sized sample is worse as the sample may end mid-character.
|
||||||
|
detector = chardet.UniversalDetector()
|
||||||
|
with codecs.open(file_path, "rb") as f:
|
||||||
|
for line in f.readlines():
|
||||||
|
detector.feed(line)
|
||||||
|
if detector.done:
|
||||||
|
break
|
||||||
|
detector.close()
|
||||||
|
encoding = detector.result["encoding"]
|
||||||
|
# Default to utf-8, and always prefer it over ASCII as the most common superset.
|
||||||
|
if not encoding or encoding == 'ascii':
|
||||||
|
encoding = 'utf-8'
|
||||||
|
return encoding
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
# This Python file uses the following encoding: utf-8
|
# This Python file uses the following encoding: utf-8
|
||||||
|
# pylint:disable=line-too-long
|
||||||
|
import csv
|
||||||
import os
|
import os
|
||||||
import textwrap
|
import textwrap
|
||||||
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
from six import StringIO, text_type
|
from six import StringIO, text_type
|
||||||
import csv
|
|
||||||
|
|
||||||
from imports import import_csv
|
from imports import import_csv
|
||||||
|
|
||||||
@ -11,6 +13,20 @@ from imports import import_csv
|
|||||||
def _get_fixture(filename):
|
def _get_fixture(filename):
|
||||||
return os.path.join(os.path.dirname(__file__), "fixtures", filename)
|
return os.path.join(os.path.dirname(__file__), "fixtures", filename)
|
||||||
|
|
||||||
|
# For a non-utf8 fixture, there is a problem with 'arc diff' which can't handle files with
|
||||||
|
# non-utf8 encodings. So create one on the fly.
|
||||||
|
non_utf8_fixture = None
|
||||||
|
non_utf8_file = None
|
||||||
|
def setUpModule():
|
||||||
|
global non_utf8_file, non_utf8_fixture # pylint:disable=global-statement
|
||||||
|
with open(_get_fixture('test_encoding_utf8.csv')) as f:
|
||||||
|
non_utf8_file = tempfile.NamedTemporaryFile(mode='wb')
|
||||||
|
non_utf8_file.write(f.read().encode('iso-8859-7'))
|
||||||
|
non_utf8_file.flush()
|
||||||
|
non_utf8_fixture = non_utf8_file.name
|
||||||
|
|
||||||
|
def tearDownModule():
|
||||||
|
non_utf8_file.close()
|
||||||
|
|
||||||
class TestImportCSV(unittest.TestCase):
|
class TestImportCSV(unittest.TestCase):
|
||||||
|
|
||||||
@ -53,7 +69,7 @@ class TestImportCSV(unittest.TestCase):
|
|||||||
def test_csv_types(self):
|
def test_csv_types(self):
|
||||||
options, parsed_file = import_csv.parse_file(_get_fixture('test_excel_types.csv'), parse_options='')
|
options, parsed_file = import_csv.parse_file(_get_fixture('test_excel_types.csv'), parse_options='')
|
||||||
sheet = parsed_file[0]
|
sheet = parsed_file[0]
|
||||||
self._check_options(options)
|
self._check_options(options, encoding='utf-8')
|
||||||
|
|
||||||
self._check_col(sheet, 0, "int1", "Int", [-1234123, '', ''])
|
self._check_col(sheet, 0, "int1", "Int", [-1234123, '', ''])
|
||||||
self._check_col(sheet, 1, "int2", "Int", [5, '', ''])
|
self._check_col(sheet, 1, "int2", "Int", [5, '', ''])
|
||||||
@ -84,8 +100,9 @@ class TestImportCSV(unittest.TestCase):
|
|||||||
options["parse_options"].pop("limit_rows")
|
options["parse_options"].pop("limit_rows")
|
||||||
options["parse_options"].pop("quoting")
|
options["parse_options"].pop("quoting")
|
||||||
options["parse_options"].pop("escapechar")
|
options["parse_options"].pop("escapechar")
|
||||||
|
options["parse_options"]["encoding"] = "utf-8" # Expected encoding
|
||||||
self.assertEqual(options["parse_options"], parsed_options)
|
self.assertEqual(options["parse_options"], parsed_options)
|
||||||
self._check_options(parsed_options)
|
self._check_options(parsed_options, encoding='utf-8')
|
||||||
parsed_file = parsed_file[0]
|
parsed_file = parsed_file[0]
|
||||||
|
|
||||||
self._check_num_cols(parsed_file, 5)
|
self._check_num_cols(parsed_file, 5)
|
||||||
@ -385,7 +402,7 @@ class TestImportCSV(unittest.TestCase):
|
|||||||
|
|
||||||
def test_csv_with_very_long_cell(self):
|
def test_csv_with_very_long_cell(self):
|
||||||
options, parsed_file = import_csv.parse_file(_get_fixture('test_long_cell.csv'), parse_options='')
|
options, parsed_file = import_csv.parse_file(_get_fixture('test_long_cell.csv'), parse_options='')
|
||||||
self._check_options(options)
|
self._check_options(options, encoding='utf-8')
|
||||||
sheet = parsed_file[0]
|
sheet = parsed_file[0]
|
||||||
long_cell = sheet["table_data"][1][0]
|
long_cell = sheet["table_data"][1][0]
|
||||||
self.assertEqual(len(long_cell), 8058)
|
self.assertEqual(len(long_cell), 8058)
|
||||||
@ -394,13 +411,61 @@ class TestImportCSV(unittest.TestCase):
|
|||||||
|
|
||||||
def test_csv_with_surprising_isdigit(self):
|
def test_csv_with_surprising_isdigit(self):
|
||||||
options, parsed_file = import_csv.parse_file(_get_fixture('test_isdigit.csv'), parse_options='')
|
options, parsed_file = import_csv.parse_file(_get_fixture('test_isdigit.csv'), parse_options='')
|
||||||
self._check_options(options)
|
self._check_options(options, encoding='utf-8')
|
||||||
sheet = parsed_file[0]
|
sheet = parsed_file[0]
|
||||||
self._check_num_cols(sheet, 3)
|
self._check_num_cols(sheet, 3)
|
||||||
self._check_col(sheet, 0, "PHONE", "Text", [u'201-¾᠓𑄺꤈꤈꧐꤆'])
|
self._check_col(sheet, 0, "PHONE", "Text", [u'201-¾᠓𑄺꤈꤈꧐꤆'])
|
||||||
self._check_col(sheet, 1, "VALUE", "Text", [u'¹5'])
|
self._check_col(sheet, 1, "VALUE", "Text", [u'¹5'])
|
||||||
self._check_col(sheet, 2, "DATE", "Text", [u'2018-0²-27 16:08:39 +0000'])
|
self._check_col(sheet, 2, "DATE", "Text", [u'2018-0²-27 16:08:39 +0000'])
|
||||||
|
|
||||||
|
def test_csv_encoding_detection_utf8(self):
|
||||||
|
options, parsed_file = import_csv.parse_file(_get_fixture('test_encoding_utf8.csv'), parse_options='')
|
||||||
|
self._check_options(options, encoding='utf-8')
|
||||||
|
sheet = parsed_file[0]
|
||||||
|
self._check_col(sheet, 0, "Name", "Text", [u'John Smith', u'Μαρία Παπαδοπούλου', u'Δημήτρης Johnson'])
|
||||||
|
self._check_col(sheet, 2, "Επάγγελμα", "Text", [u'Γιατρός', u'Engineer', u'Δικηγόρος'])
|
||||||
|
|
||||||
|
def test_csv_encoding_detection_greek(self):
|
||||||
|
# ISO-8859-7 is close to CP1253, and this fixure file would be identical in these two.
|
||||||
|
options, parsed_file = import_csv.parse_file(non_utf8_fixture, parse_options='')
|
||||||
|
self._check_options(options, encoding='ISO-8859-7')
|
||||||
|
sheet = parsed_file[0]
|
||||||
|
self._check_col(sheet, 0, "Name", "Text", [u'John Smith', u'Μαρία Παπαδοπούλου', u'Δημήτρης Johnson'])
|
||||||
|
self._check_col(sheet, 2, "Επάγγελμα", "Text", [u'Γιατρός', u'Engineer', u'Δικηγόρος'])
|
||||||
|
|
||||||
|
# Similar enough encoding that the result is correct.
|
||||||
|
options, parsed_file = import_csv.parse_file(non_utf8_fixture, parse_options={"encoding": "cp1253"})
|
||||||
|
self._check_options(options, encoding='cp1253') # The encoding should be respected
|
||||||
|
sheet = parsed_file[0]
|
||||||
|
self._check_col(sheet, 0, "Name", "Text", [u'John Smith', u'Μαρία Παπαδοπούλου', u'Δημήτρης Johnson'])
|
||||||
|
self._check_col(sheet, 2, "Επάγγελμα", "Text", [u'Γιατρός', u'Engineer', u'Δικηγόρος'])
|
||||||
|
|
||||||
|
def test_csv_encoding_errors_are_handled(self):
|
||||||
|
# With ascii, we'll get many decoding errors, but parsing should still succeed.
|
||||||
|
parse_options = {
|
||||||
|
"encoding": "ascii",
|
||||||
|
"include_col_names_as_headers": True,
|
||||||
|
}
|
||||||
|
options, parsed_file = import_csv.parse_file(non_utf8_fixture, parse_options=parse_options)
|
||||||
|
self._check_options(options,
|
||||||
|
encoding='ascii',
|
||||||
|
WARNING='Using encoding ascii, encountered 108 errors. Use Import Options to change')
|
||||||
|
sheet = parsed_file[0]
|
||||||
|
self._check_col(sheet, 0, "Name", "Text", [u'John Smith', u'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>', u'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> Johnson'])
|
||||||
|
self._check_col(sheet, 2, "<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>", "Text", [u'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>', u'Engineer', u'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>'])
|
||||||
|
|
||||||
|
def test_csv_encoding_mismatch(self):
|
||||||
|
# Here we use a wrong single-byte encoding, to check that it succeeds even if with nonsense.
|
||||||
|
parse_options = {
|
||||||
|
"encoding": "cp1254",
|
||||||
|
"include_col_names_as_headers": True,
|
||||||
|
}
|
||||||
|
options, parsed_file = import_csv.parse_file(non_utf8_fixture, parse_options=parse_options)
|
||||||
|
self._check_options(options, encoding='cp1254')
|
||||||
|
sheet = parsed_file[0]
|
||||||
|
self._check_col(sheet, 0, "Name", "Text", [u'John Smith', u'Ìáñßá Ğáğáäïğïıëïõ', u'ÄçìŞôñçò Johnson'])
|
||||||
|
self._check_col(sheet, 2, "ÅğÜããåëìá", "Text", [u'Ãéáôñüò', u'Engineer', u'Äéêçãüñïò'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -10,7 +10,7 @@ asttokens==2.2.1
|
|||||||
# via
|
# via
|
||||||
# friendly-traceback
|
# friendly-traceback
|
||||||
# stack-data
|
# stack-data
|
||||||
chardet==4.0.0
|
chardet==5.1.0
|
||||||
# via -r core/sandbox/requirements3.in
|
# via -r core/sandbox/requirements3.in
|
||||||
et-xmlfile==1.0.1
|
et-xmlfile==1.0.1
|
||||||
# via openpyxl
|
# via openpyxl
|
||||||
|
@ -17,6 +17,7 @@ import * as path from 'path';
|
|||||||
import * as fse from 'fs-extra';
|
import * as fse from 'fs-extra';
|
||||||
import clone = require('lodash/clone');
|
import clone = require('lodash/clone');
|
||||||
import * as tmp from 'tmp-promise';
|
import * as tmp from 'tmp-promise';
|
||||||
|
import {Options as TmpOptions} from 'tmp';
|
||||||
import * as winston from 'winston';
|
import * as winston from 'winston';
|
||||||
import { serialize } from 'winston/lib/winston/common';
|
import { serialize } from 'winston/lib/winston/common';
|
||||||
|
|
||||||
@ -27,15 +28,15 @@ import { getAppRoot } from 'app/server/lib/places';
|
|||||||
/**
|
/**
|
||||||
* Creates a temporary file with the given contents.
|
* Creates a temporary file with the given contents.
|
||||||
* @param {String} content. Data to store in the file.
|
* @param {String} content. Data to store in the file.
|
||||||
* @param {[Boolean]} optKeep. Optionally pass in true to keep the file from being deleted, which
|
* @param {[Boolean]} options.keep. Optionally pass in true to keep the file from being deleted, which
|
||||||
* is useful to see the content while debugging a test.
|
* is useful to see the content while debugging a test.
|
||||||
* @returns {Promise} A promise for the path of the new file.
|
* @returns {Promise} A promise for the path of the new file.
|
||||||
*/
|
*/
|
||||||
export async function writeTmpFile(content: any, optKeep?: boolean) {
|
export async function writeTmpFile(content: any, options: TmpOptions = {}) {
|
||||||
// discardDescriptor ensures tmp module closes it. It can lead to horrible bugs to close this
|
// discardDescriptor ensures tmp module closes it. It can lead to horrible bugs to close this
|
||||||
// descriptor yourself, since tmp also closes it on exit, and if it's a different descriptor by
|
// descriptor yourself, since tmp also closes it on exit, and if it's a different descriptor by
|
||||||
// that time, it can lead to a crash. See https://github.com/raszi/node-tmp/issues/168
|
// that time, it can lead to a crash. See https://github.com/raszi/node-tmp/issues/168
|
||||||
const obj = await tmp.file({keep: optKeep, discardDescriptor: true});
|
const obj = await tmp.file({discardDescriptor: true, ...options});
|
||||||
await fse.writeFile(obj.path, content);
|
await fse.writeFile(obj.path, content);
|
||||||
return obj.path;
|
return obj.path;
|
||||||
}
|
}
|
||||||
@ -44,17 +45,17 @@ export async function writeTmpFile(content: any, optKeep?: boolean) {
|
|||||||
* Creates a temporary file with `numLines` of generated data, each line about 30 bytes long.
|
* Creates a temporary file with `numLines` of generated data, each line about 30 bytes long.
|
||||||
* This is useful for testing operations with large files.
|
* This is useful for testing operations with large files.
|
||||||
* @param {Number} numLines. How many lines to store in the file.
|
* @param {Number} numLines. How many lines to store in the file.
|
||||||
* @param {[Boolean]} optKeep. Optionally pass in true to keep the file from being deleted, which
|
* @param {[Boolean]} options.keep. Optionally pass in true to keep the file from being deleted, which
|
||||||
* is useful to see the content while debugging a test.
|
* is useful to see the content while debugging a test.
|
||||||
* @returns {Promise} A promise for the path of the new file.
|
* @returns {Promise} A promise for the path of the new file.
|
||||||
*/
|
*/
|
||||||
export async function generateTmpFile(numLines: number, optKeep?: boolean) {
|
export async function generateTmpFile(numLines: number, options: TmpOptions = {}) {
|
||||||
// Generate a bigger data file.
|
// Generate a bigger data file.
|
||||||
const data = [];
|
const data = [];
|
||||||
for (let i = 0; i < numLines; i++) {
|
for (let i = 0; i < numLines; i++) {
|
||||||
data.push(i + " abcdefghijklmnopqrstuvwxyz\n");
|
data.push(i + " abcdefghijklmnopqrstuvwxyz\n");
|
||||||
}
|
}
|
||||||
return writeTmpFile(data.join(""), optKeep);
|
return writeTmpFile(data.join(""), options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user