1
0
mirror of https://github.com/wting/autojump synced 2024-10-27 20:34:07 +00:00

Fix encoding issues.

The original implementation used str.encode() on input and str.decode() on
output. However this would cause UnicodeDecodeError since certain characters
can't be encoded / decoded in ASCII.

The new solution is to use unicode() on all input strings and output UTF-8
encoded strings. This makes the assumption that the shell can handle UTF-8
strings.
This commit is contained in:
William Ting 2014-01-07 11:44:44 -06:00
parent 3f460fb3e9
commit 1a0003d852
4 changed files with 24 additions and 44 deletions

View File

@ -45,8 +45,7 @@ from autojump_data import entriefy
from autojump_data import Entry from autojump_data import Entry
from autojump_data import load from autojump_data import load
from autojump_data import save from autojump_data import save
from autojump_utils import decode from autojump_utils import encode
from autojump_utils import encode_local
from autojump_utils import first from autojump_utils import first
from autojump_utils import get_tab_entry_info from autojump_utils import get_tab_entry_info
from autojump_utils import get_pwd from autojump_utils import get_pwd
@ -131,7 +130,7 @@ def add_path(data, path, weight=10):
with resulting duplicate entries in the database than a single canonical with resulting duplicate entries in the database than a single canonical
path. path.
""" """
path = decode(path).rstrip(os.sep) path = encode(path).rstrip(os.sep)
if path == os.path.expanduser('~'): if path == os.path.expanduser('~'):
return data, Entry(path, 0) return data, Entry(path, 0)
@ -142,7 +141,7 @@ def add_path(data, path, weight=10):
def decrease_path(data, path, weight=15): def decrease_path(data, path, weight=15):
"""Decrease or zero out a path.""" """Decrease or zero out a path."""
path = decode(path).rstrip(os.sep) path = encode(path).rstrip(os.sep)
data[path] = max(0, data.get(path, 0) - weight) data[path] = max(0, data.get(path, 0) - weight)
return data, Entry(path, data[path]) return data, Entry(path, data[path])
@ -189,11 +188,10 @@ def handle_tab_completion(needle, entries):
tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR) tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR)
if tab_path: if tab_path:
print(encode_local(tab_path)) print(tab_path)
elif tab_index: elif tab_index:
get_ith_path = lambda i, iterable: last(take(i, iterable)).path get_ith_path = lambda i, iterable: last(take(i, iterable)).path
print(encode_local( print(get_ith_path(tab_index, find_matches(entries, tab_needle)))
get_ith_path(tab_index, find_matches(entries, tab_needle))))
elif tab_needle: elif tab_needle:
# found partial tab completion entry # found partial tab completion entry
print_tab_menu( print_tab_menu(
@ -362,7 +360,7 @@ def main(args): # noqa
elif not args.directory: elif not args.directory:
# default return value so calling shell functions have an argument # default return value so calling shell functions have an argument
# to `cd` to # to `cd` to
print(encode_local('.')) print('.')
else: else:
entries = entriefy(load(config)) entries = entriefy(load(config))
needles = sanitize(args.directory) needles = sanitize(args.directory)
@ -370,13 +368,12 @@ def main(args): # noqa
get_tab_entry_info(first(needles), TAB_SEPARATOR) get_tab_entry_info(first(needles), TAB_SEPARATOR)
if tab_path: if tab_path:
print(encode_local(tab_path)) print(tab_path)
elif tab_index: elif tab_index:
get_ith_path = lambda i, iterable: last(take(i, iterable)).path get_ith_path = lambda i, iterable: last(take(i, iterable)).path
print(encode_local( print(get_ith_path(tab_index, find_matches(entries, tab_needle)))
get_ith_path(tab_index, find_matches(entries, tab_needle))))
else: else:
print(encode_local(first(find_matches(entries, needles)).path)) print(first(find_matches(entries, needles)).path)
return 0 return 0

View File

@ -17,6 +17,7 @@ else:
from itertools import imap from itertools import imap
from autojump_utils import create_dir from autojump_utils import create_dir
from autojump_utils import encode
from autojump_utils import is_osx from autojump_utils import is_osx
from autojump_utils import is_python3 from autojump_utils import is_python3
from autojump_utils import move_file from autojump_utils import move_file
@ -124,11 +125,7 @@ def save(config, data):
encoding='utf-8', encoding='utf-8',
errors='replace') as f: errors='replace') as f:
for path, weight in data.items(): for path, weight in data.items():
if is_python3(): f.write(encode("%s\t%s\n" % (weight, path)))
f.write(("%s\t%s\n" % (weight, path)))
else:
f.write(unicode(
"%s\t%s\n" % (weight, path)).encode('utf-8'))
f.flush() f.flush()
os.fsync(f) os.fsync(f)

View File

@ -28,26 +28,10 @@ def create_dir(path):
raise raise
def decode(string):
"""Converts byte string to Unicode string."""
if is_python2():
# Python 2.6 does not support kwargs
return string.decode('utf-8', 'replace')
return string
def encode(string): def encode(string):
"""Converts Unicode string to byte string.""" """Converts into Unicode string."""
if is_python2(): if is_python2() and not isinstance(string, unicode):
# Python 2.6 does not support kwargs return unicode(string, encoding='utf-8', errors='replace')
return string.encode('utf-8', 'replace')
return string
def encode_local(string, encoding=None):
"""Converts string into local filesystem encoding."""
if is_python2():
return decode(string).encode(encoding or sys.getfilesystemencoding())
return string return string
@ -153,7 +137,7 @@ def move_file(src, dst):
def print_entry(entry): def print_entry(entry):
print(encode_local("%.1f:\t%s" % (entry.weight, entry.path))) print("%.1f:\t%s" % (entry.weight, entry.path))
def print_tab_menu(needle, tab_entries, separator): def print_tab_menu(needle, tab_entries, separator):
@ -166,17 +150,18 @@ def print_tab_menu(needle, tab_entries, separator):
on subsequent calls. on subsequent calls.
""" """
for i, entry in enumerate(tab_entries): for i, entry in enumerate(tab_entries):
print(encode_local( print(
'%s%s%d%s%s' % ( '%s%s%d%s%s' % (
needle, needle,
separator, separator,
i + 1, i + 1,
separator, separator,
entry.path))) entry.path))
def sanitize(directories): def sanitize(directories):
clean = lambda x: decode(x) if len(x) == 1 else decode(x).rstrip(os.sep) # edge case to allow '/' as a valid path
clean = lambda x: encode(x) if x == os.sep else encode(x).rstrip(os.sep)
return list(imap(clean, directories)) return list(imap(clean, directories))

View File

@ -20,7 +20,7 @@ from testify import teardown
import autojump_utils import autojump_utils
from autojump_utils import create_dir from autojump_utils import create_dir
from autojump_utils import decode from autojump_utils import encode
from autojump_utils import first from autojump_utils import first
from autojump_utils import get_pwd from autojump_utils import get_pwd
from autojump_utils import get_tab_entry_info from autojump_utils import get_tab_entry_info
@ -35,9 +35,10 @@ from autojump_utils import take
class StringUnitTests(TestCase): class StringUnitTests(TestCase):
def test_decode(self): def test_encode(self):
assert_equal(decode(r'blah'), u'blah') assert_equal(encode(b'blah'), u'blah')
assert_equal(decode(r'日本語'), u'日本語') assert_equal(encode(b'日本語'), u'日本語')
assert_equal(encode(u'でもおれは中国人だ。'), u'でもおれは中国人だ。')
def test_has_uppercase(self): def test_has_uppercase(self):
assert_true(has_uppercase('Foo')) assert_true(has_uppercase('Foo'))