mirror of
https://github.com/gristlabs/grist-core.git
synced 2026-03-02 04:09:24 +00:00
(core) Remove accents when picking identifiers
Summary: Uses python unicodedata module to normalise a string and remove combining characters, leaving behind more ascii letters and fewer underscores Test Plan: Added unit test Reviewers: paulfitz Reviewed By: paulfitz Subscribers: dsagal Differential Revision: https://phab.getgrist.com/D2994
This commit is contained in:
@@ -1,11 +1,17 @@
|
||||
# coding=utf-8
|
||||
"""
|
||||
A module for creating and sanitizing identifiers
|
||||
"""
|
||||
import re
|
||||
from string import ascii_uppercase
|
||||
import itertools
|
||||
import re
|
||||
import unicodedata
|
||||
from keyword import iskeyword
|
||||
from string import ascii_uppercase
|
||||
|
||||
import six
|
||||
|
||||
import logger
|
||||
|
||||
log = logger.Logger(__name__, logger.INFO)
|
||||
|
||||
_invalid_ident_char_re = re.compile(r'[^a-zA-Z0-9_]+')
|
||||
@@ -20,7 +26,16 @@ def _sanitize_ident(ident, prefix="c", capitalize=False):
|
||||
Returns empty string if there are no valid identifier characters, so consider using as
|
||||
(_sanitize_ident(...) or "your_default").
|
||||
"""
|
||||
ident = "" if ident is None else str(ident)
|
||||
ident = u"" if ident is None else six.text_type(ident)
|
||||
|
||||
# https://stackoverflow.com/a/517974/2482744
|
||||
# Separate out combining characters (e.g. accents)
|
||||
ident = unicodedata.normalize('NFKD', ident)
|
||||
# then remove them completely
|
||||
# This means that 'é' becomes 'e' instead of '_' or 'e_'
|
||||
ident = "".join(c for c in ident if not unicodedata.combining(c))
|
||||
|
||||
# TODO allow non-ascii characters in identifiers when using Python 3
|
||||
ident = _invalid_ident_char_re.sub('_', ident).lstrip('_')
|
||||
ident = _invalid_ident_start_re.sub(prefix, ident)
|
||||
if not ident:
|
||||
|
||||
Reference in New Issue
Block a user