(core) Improve encoding detection for csv imports, and make encoding an editable option.

Summary:
- Using a sample of data was causing poor detection if the sample were
  cut mid-character. Switch to using line-based detection.
- Add a simple option for changing encoding. No convenient UI is offered
  since config UI is auto-generated, but this at least makes it possible to
  recover from bad guesses.
- Upgrades chardet library for good measure.

- Also fixes python3-building step, to more reliably rebuild Python
  dependencies when requirements3.* files change.

Test Plan:
Added a python-side test case, and a browser test that encodings can
be switched, errors are displayed, and wrong encodings fail recoverably.

Reviewers: alexmojaki

Reviewed By: alexmojaki

Differential Revision: https://phab.getgrist.com/D3979
This commit is contained in:
Dmitry S
2023-08-18 17:03:27 -04:00
parent b9adcefcce
commit d5a4605d2a
9 changed files with 160 additions and 26 deletions

View File

@@ -1082,6 +1082,7 @@ export class Importer extends DisposableWithEvents {
options,
)
]),
cssWarningText(dom.text(use => use(this._parseOptions)?.WARNING || ""), testId('warning')),
dom.domComputed(use => {
if (use(isSkipTable)) {
return cssOverlay(t('Skip Table on Import'), testId('preview-overlay'));
@@ -1098,6 +1099,7 @@ export class Importer extends DisposableWithEvents {
)
);
});
const buttons = cssImportButtons(cssImportButtonsLine(
bigPrimaryButton('Import',
dom.on('click', () => this._maybeFinishImport(upload)),
@@ -1369,6 +1371,10 @@ export class Importer extends DisposableWithEvents {
(p: ParseOptions) => {
anotherScreen.dispose();
this._parseOptions.set(p);
// Drop what we previously matched because we may have different columns.
// If user manually matched, then changed import options, they'll have to re-match; when
// columns change at all, the alternative has incorrect columns in UI and is more confusing.
this._sourceInfoArray.set([]);
this._reImport(upload).catch((err) => reportError(err));
},
() => {
@@ -1517,6 +1523,12 @@ const cssTabsWrapper = styled('div', `
flex-direction: column;
`);
const cssWarningText = styled('div', `
margin-bottom: 8px;
color: ${theme.errorText};
white-space: pre-line;
`);
const cssTableList = styled('div', `
align-self: flex-start;
max-width: 100%;

View File

@@ -1,6 +1,7 @@
import {bigBasicButton, bigPrimaryButton} from 'app/client/ui2018/buttons';
import {squareCheckbox} from 'app/client/ui2018/checkbox';
import {testId, theme} from 'app/client/ui2018/cssVars';
import {makeLinks} from 'app/client/ui2018/links';
import {cssModalButtons} from 'app/client/ui2018/modals';
import {ParseOptionSchema} from 'app/plugin/FileParserAPI';
import {Computed, dom, DomContents, IDisposableOwner, input, Observable, styled} from 'grainjs';
@@ -60,7 +61,7 @@ export function buildParseOptionsForm(
return [
cssParseOptionForm(
items.map((item) => cssParseOption(
cssParseOptionName(item.label),
cssParseOptionName(makeLinks(item.label)),
optionToInput(owner, item.type, optionsMap.get(item.name)!),
testId('parseopts-opt'),
)),