(core) Use slugify module to construct slugs in urlIds, addressing a long-standing TODO.

Summary:
This offers better handling for accented and non-English characters,
e.g. "Événements" becomes "Evenements" (rather than "vnements") and
"таблиця" becomes "tablicya" rather than an empty string.

Test Plan:
Added a test case. Existing documents will auto-redirect to
newly-generated names. In cases where it's different, that's probably for the
best.

Reviewers: georgegevoian

Reviewed By: georgegevoian

Differential Revision: https://phab.getgrist.com/D4176
This commit is contained in:
Dmitry S 2024-01-25 23:58:52 -05:00
parent fb276bade7
commit be0b4a1968
4 changed files with 35 additions and 7 deletions

View File

@ -12,6 +12,7 @@ import {IAttachedCustomWidget} from "app/common/widgetTypes";
import {UIRowId} from 'app/plugin/GristAPI'; import {UIRowId} from 'app/plugin/GristAPI';
import clone = require('lodash/clone'); import clone = require('lodash/clone');
import pickBy = require('lodash/pickBy'); import pickBy = require('lodash/pickBy');
import slugify from 'slugify';
export const SpecialDocPage = StringUnion('code', 'acl', 'data', 'GristDocTour', 'settings', 'webhook'); export const SpecialDocPage = StringUnion('code', 'acl', 'data', 'GristDocTour', 'settings', 'webhook');
type SpecialDocPage = typeof SpecialDocPage.type; type SpecialDocPage = typeof SpecialDocPage.type;
@ -1051,13 +1052,11 @@ function shouldIncludeSlug(doc: {id: string, urlId: string|null}): boolean {
return doc.id.startsWith(doc.urlId) || doc.urlId.startsWith(SHARE_KEY_PREFIX); return doc.id.startsWith(doc.urlId) || doc.urlId.startsWith(SHARE_KEY_PREFIX);
} }
// Convert the name of a document into a slug. Only alphanumerics are retained, // Convert the name of a document into a slug. The slugify library normalizes unicode characters,
// and spaces are replaced with hyphens. // replaces those with a reasonable ascii representation. Only alphanumerics are retained, and
// TODO: investigate whether there's a better option with unicode than just // spaces are replaced with hyphens.
// deleting it, seems unfair to languages using anything other than unaccented
// Latin characters.
function nameToSlug(name: string): string { function nameToSlug(name: string): string {
return name.trim().replace(/ /g, '-').replace(/[^-a-zA-Z0-9]/g, '').replace(/---*/g, '-'); return slugify(name, {strict: true});
} }
// Returns a slug for the given docId/urlId/name, or undefined if a slug should // Returns a slug for the given docId/urlId/name, or undefined if a slug should

View File

@ -182,6 +182,7 @@
"redlock": "3.1.2", "redlock": "3.1.2",
"saml2-js": "2.0.5", "saml2-js": "2.0.5",
"short-uuid": "3.1.1", "short-uuid": "3.1.1",
"slugify": "1.6.6",
"tmp": "0.0.33", "tmp": "0.0.33",
"ts-interface-checker": "1.0.2", "ts-interface-checker": "1.0.2",
"typeorm": "0.3.9", "typeorm": "0.3.9",

View File

@ -1,4 +1,4 @@
import {decodeUrl, getHostType, IGristUrlState, parseFirstUrlPart} from 'app/common/gristUrls'; import {decodeUrl, getHostType, getSlugIfNeeded, IGristUrlState, parseFirstUrlPart} from 'app/common/gristUrls';
import {assert} from 'chai'; import {assert} from 'chai';
import * as testUtils from 'test/server/testUtils'; import * as testUtils from 'test/server/testUtils';
@ -129,4 +129,27 @@ describe('gristUrls', function() {
assert.equal(getHostType('doc-worker-123.internal:8079', defaultOptions), 'custom'); assert.equal(getHostType('doc-worker-123.internal:8079', defaultOptions), 'custom');
}); });
}); });
describe('getSlugIfNeeded', function() {
it('should only return a slug when a valid urlId is used', function() {
assert.strictEqual(getSlugIfNeeded({id: '1234567890abcdef', urlId: '1234567890ab', name: 'Foo'}), 'Foo');
// urlId too short
assert.strictEqual(getSlugIfNeeded({id: '1234567890abcdef', urlId: '12345678', name: 'Foo'}), undefined);
// urlId doesn't match docId
assert.strictEqual(getSlugIfNeeded({id: '1234567890abcdef', urlId: '1234567890ac', name: 'Foo'}), undefined);
// no urlId
assert.strictEqual(getSlugIfNeeded({id: '1234567890abcdef', urlId: '', name: 'Foo'}), undefined);
assert.strictEqual(getSlugIfNeeded({id: '1234567890abcdef', urlId: null, name: 'Foo'}), undefined);
});
it('should leave only alphamerics after replacing reasonable unicode chars', function() {
const id = '1234567890abcdef', urlId = '1234567890ab';
// This is mainly a test of the `slugify` library we now use. What matters isn't the
// specific result, but that the result is reasonable.
assert.strictEqual(getSlugIfNeeded({id, urlId, name: 'Foo'}), 'Foo');
assert.strictEqual(getSlugIfNeeded({id, urlId, name: "Hélène's résumé"}), 'Helenes-resume');
assert.strictEqual(getSlugIfNeeded({id, urlId, name: "Привіт, Їжак!"}), 'Privit-Yizhak');
assert.strictEqual(getSlugIfNeeded({id, urlId, name: "S&P500 is ~$4,894.16"}), 'SandP500-is-dollar489416');
});
});
}); });

View File

@ -7150,6 +7150,11 @@ slash@^3.0.0:
resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634" resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634"
integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q== integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==
slugify@1.6.6:
version "1.6.6"
resolved "https://registry.yarnpkg.com/slugify/-/slugify-1.6.6.tgz#2d4ac0eacb47add6af9e04d3be79319cbcc7924b"
integrity sha512-h+z7HKHYXj6wJU+AnS/+IH8Uh9fdcX1Lrhg1/VMdf9PwoBQXFcXiAdsy2tSK0P6gKwJLXp02r90ahUCqHk9rrw==
smart-buffer@^4.2.0: smart-buffer@^4.2.0:
version "4.2.0" version "4.2.0"
resolved "https://registry.yarnpkg.com/smart-buffer/-/smart-buffer-4.2.0.tgz#6e1d71fa4f18c05f7d0ff216dd16a481d0e8d9ae" resolved "https://registry.yarnpkg.com/smart-buffer/-/smart-buffer-4.2.0.tgz#6e1d71fa4f18c05f7d0ff216dd16a481d0e8d9ae"