mirror of
https://github.com/gristlabs/grist-core.git
synced 2026-03-02 04:09:24 +00:00
(core) Remove accents when picking identifiers
Summary: Uses python unicodedata module to normalise a string and remove combining characters, leaving behind more ascii letters and fewer underscores Test Plan: Added unit test Reviewers: paulfitz Reviewed By: paulfitz Subscribers: dsagal Differential Revision: https://phab.getgrist.com/D2994
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
# coding=utf-8
|
||||
|
||||
import unittest
|
||||
import difflib
|
||||
import re
|
||||
@@ -6,7 +8,6 @@ from six.moves import xrange
|
||||
|
||||
import gencode
|
||||
import identifiers
|
||||
import records
|
||||
import schema
|
||||
import table
|
||||
import testutil
|
||||
@@ -83,6 +84,48 @@ class TestGenCode(unittest.TestCase):
|
||||
module = gcode.usercode
|
||||
self.assertTrue(isinstance(module.Students, table.UserTable))
|
||||
|
||||
def test_ident_combining_chars(self):
|
||||
def check(label, ident):
|
||||
self.assertEqual(ident, identifiers.pick_table_ident(label))
|
||||
self.assertEqual(ident, identifiers.pick_col_ident(label))
|
||||
self.assertEqual(ident.lower(), identifiers.pick_col_ident(label.lower()))
|
||||
|
||||
# Actual example table name from a user
|
||||
# unicodedata.normalize can separate accents but doesn't help with Đ
|
||||
check(
|
||||
u"Bảng_Đặc_Thù",
|
||||
u"Bang__ac_Thu",
|
||||
)
|
||||
|
||||
check(
|
||||
u"Noëlle",
|
||||
u"Noelle",
|
||||
)
|
||||
check(
|
||||
u"Séamus",
|
||||
u"Seamus",
|
||||
)
|
||||
check(
|
||||
u"Hélène",
|
||||
u"Helene",
|
||||
)
|
||||
check(
|
||||
u"Dilâçar",
|
||||
u"Dilacar",
|
||||
)
|
||||
check(
|
||||
u"Erdoğan",
|
||||
u"Erdogan",
|
||||
)
|
||||
check(
|
||||
u"Ñwalme",
|
||||
u"Nwalme",
|
||||
)
|
||||
check(
|
||||
u"Árvíztűrő tükörfúrógép",
|
||||
u"Arvizturo_tukorfurogep",
|
||||
)
|
||||
|
||||
def test_pick_col_ident(self):
|
||||
self.assertEqual(identifiers.pick_col_ident("asdf"), "asdf")
|
||||
self.assertEqual(identifiers.pick_col_ident(" a s==d!~@#$%^f"), "a_s_d_f")
|
||||
|
||||
Reference in New Issue
Block a user