mirror of
https://github.com/wting/autojump
synced 2024-10-27 20:34:07 +00:00
Fix encoding issues.
The original implementation used str.encode() on input and str.decode() on output. However this would cause UnicodeDecodeError since certain characters can't be encoded / decoded in ASCII. The new solution is to use unicode() on all input strings and output UTF-8 encoded strings. This makes the assumption that the shell can handle UTF-8 strings.
This commit is contained in:
parent
3f460fb3e9
commit
35bc63c66e
3
Makefile
3
Makefile
@ -39,4 +39,5 @@ tar:
|
|||||||
sha1sum autojump_v$(VERSION).tar.gz
|
sha1sum autojump_v$(VERSION).tar.gz
|
||||||
|
|
||||||
test:
|
test:
|
||||||
testify -v tests
|
@find . -type f -iname "*.pyc" -delete
|
||||||
|
testify -v tests -x disabled
|
||||||
|
26
bin/autojump
26
bin/autojump
@ -45,8 +45,6 @@ from autojump_data import entriefy
|
|||||||
from autojump_data import Entry
|
from autojump_data import Entry
|
||||||
from autojump_data import load
|
from autojump_data import load
|
||||||
from autojump_data import save
|
from autojump_data import save
|
||||||
from autojump_utils import decode
|
|
||||||
from autojump_utils import encode_local
|
|
||||||
from autojump_utils import first
|
from autojump_utils import first
|
||||||
from autojump_utils import get_tab_entry_info
|
from autojump_utils import get_tab_entry_info
|
||||||
from autojump_utils import get_pwd
|
from autojump_utils import get_pwd
|
||||||
@ -54,9 +52,11 @@ from autojump_utils import has_uppercase
|
|||||||
from autojump_utils import is_osx
|
from autojump_utils import is_osx
|
||||||
from autojump_utils import last
|
from autojump_utils import last
|
||||||
from autojump_utils import print_entry
|
from autojump_utils import print_entry
|
||||||
|
from autojump_utils import print_local
|
||||||
from autojump_utils import print_tab_menu
|
from autojump_utils import print_tab_menu
|
||||||
from autojump_utils import sanitize
|
from autojump_utils import sanitize
|
||||||
from autojump_utils import take
|
from autojump_utils import take
|
||||||
|
from autojump_utils import unico
|
||||||
|
|
||||||
VERSION = '22.0.0-alpha'
|
VERSION = '22.0.0-alpha'
|
||||||
FUZZY_MATCH_THRESHOLD = 0.6
|
FUZZY_MATCH_THRESHOLD = 0.6
|
||||||
@ -131,7 +131,7 @@ def add_path(data, path, weight=10):
|
|||||||
with resulting duplicate entries in the database than a single canonical
|
with resulting duplicate entries in the database than a single canonical
|
||||||
path.
|
path.
|
||||||
"""
|
"""
|
||||||
path = decode(path).rstrip(os.sep)
|
path = unico(path).rstrip(os.sep)
|
||||||
if path == os.path.expanduser('~'):
|
if path == os.path.expanduser('~'):
|
||||||
return data, Entry(path, 0)
|
return data, Entry(path, 0)
|
||||||
|
|
||||||
@ -142,7 +142,7 @@ def add_path(data, path, weight=10):
|
|||||||
|
|
||||||
def decrease_path(data, path, weight=15):
|
def decrease_path(data, path, weight=15):
|
||||||
"""Decrease or zero out a path."""
|
"""Decrease or zero out a path."""
|
||||||
path = decode(path).rstrip(os.sep)
|
path = unico(path).rstrip(os.sep)
|
||||||
data[path] = max(0, data.get(path, 0) - weight)
|
data[path] = max(0, data.get(path, 0) - weight)
|
||||||
return data, Entry(path, data[path])
|
return data, Entry(path, data[path])
|
||||||
|
|
||||||
@ -189,11 +189,10 @@ def handle_tab_completion(needle, entries):
|
|||||||
tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR)
|
tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR)
|
||||||
|
|
||||||
if tab_path:
|
if tab_path:
|
||||||
print(encode_local(tab_path))
|
print_local(tab_path)
|
||||||
elif tab_index:
|
elif tab_index:
|
||||||
get_ith_path = lambda i, iterable: last(take(i, iterable)).path
|
get_ith_path = lambda i, iterable: last(take(i, iterable)).path
|
||||||
print(encode_local(
|
print_local(get_ith_path(tab_index, find_matches(entries, tab_needle)))
|
||||||
get_ith_path(tab_index, find_matches(entries, tab_needle))))
|
|
||||||
elif tab_needle:
|
elif tab_needle:
|
||||||
# found partial tab completion entry
|
# found partial tab completion entry
|
||||||
print_tab_menu(
|
print_tab_menu(
|
||||||
@ -326,7 +325,8 @@ def print_stats(data, data_path):
|
|||||||
print("%d:\t number of entries" % len(data))
|
print("%d:\t number of entries" % len(data))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print("%.2f:\t current directory weight" % data.get(os.getcwdu(), 0))
|
print_local(
|
||||||
|
"%.2f:\t current directory weight" % data.get(os.getcwdu(), 0))
|
||||||
except OSError:
|
except OSError:
|
||||||
# current directory no longer exists
|
# current directory no longer exists
|
||||||
pass
|
pass
|
||||||
@ -362,7 +362,7 @@ def main(args): # noqa
|
|||||||
elif not args.directory:
|
elif not args.directory:
|
||||||
# default return value so calling shell functions have an argument
|
# default return value so calling shell functions have an argument
|
||||||
# to `cd` to
|
# to `cd` to
|
||||||
print(encode_local('.'))
|
print_local('.')
|
||||||
else:
|
else:
|
||||||
entries = entriefy(load(config))
|
entries = entriefy(load(config))
|
||||||
needles = sanitize(args.directory)
|
needles = sanitize(args.directory)
|
||||||
@ -370,13 +370,13 @@ def main(args): # noqa
|
|||||||
get_tab_entry_info(first(needles), TAB_SEPARATOR)
|
get_tab_entry_info(first(needles), TAB_SEPARATOR)
|
||||||
|
|
||||||
if tab_path:
|
if tab_path:
|
||||||
print(encode_local(tab_path))
|
print_local(tab_path)
|
||||||
elif tab_index:
|
elif tab_index:
|
||||||
get_ith_path = lambda i, iterable: last(take(i, iterable)).path
|
get_ith_path = lambda i, iterable: last(take(i, iterable)).path
|
||||||
print(encode_local(
|
print_local(
|
||||||
get_ith_path(tab_index, find_matches(entries, tab_needle))))
|
get_ith_path(tab_index, find_matches(entries, tab_needle)))
|
||||||
else:
|
else:
|
||||||
print(encode_local(first(find_matches(entries, needles)).path))
|
print_local(first(find_matches(entries, needles)).path)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@ else:
|
|||||||
from itertools import imap
|
from itertools import imap
|
||||||
|
|
||||||
from autojump_utils import create_dir
|
from autojump_utils import create_dir
|
||||||
|
from autojump_utils import unico
|
||||||
from autojump_utils import is_osx
|
from autojump_utils import is_osx
|
||||||
from autojump_utils import is_python3
|
from autojump_utils import is_python3
|
||||||
from autojump_utils import move_file
|
from autojump_utils import move_file
|
||||||
@ -124,11 +125,7 @@ def save(config, data):
|
|||||||
encoding='utf-8',
|
encoding='utf-8',
|
||||||
errors='replace') as f:
|
errors='replace') as f:
|
||||||
for path, weight in data.items():
|
for path, weight in data.items():
|
||||||
if is_python3():
|
f.write(unico("%s\t%s\n" % (weight, path)))
|
||||||
f.write(("%s\t%s\n" % (weight, path)))
|
|
||||||
else:
|
|
||||||
f.write(unicode(
|
|
||||||
"%s\t%s\n" % (weight, path)).encode('utf-8'))
|
|
||||||
|
|
||||||
f.flush()
|
f.flush()
|
||||||
os.fsync(f)
|
os.fsync(f)
|
||||||
|
@ -28,27 +28,9 @@ def create_dir(path):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def decode(string):
|
def encode_local(string):
|
||||||
"""Converts byte string to Unicode string."""
|
"""Converts string into user's preferred encoding."""
|
||||||
if is_python2():
|
return string.encode(sys.getfilesystemencoding() or 'utf-8')
|
||||||
# Python 2.6 does not support kwargs
|
|
||||||
return string.decode('utf-8', 'replace')
|
|
||||||
return string
|
|
||||||
|
|
||||||
|
|
||||||
def encode(string):
|
|
||||||
"""Converts Unicode string to byte string."""
|
|
||||||
if is_python2():
|
|
||||||
# Python 2.6 does not support kwargs
|
|
||||||
return string.encode('utf-8', 'replace')
|
|
||||||
return string
|
|
||||||
|
|
||||||
|
|
||||||
def encode_local(string, encoding=None):
|
|
||||||
"""Converts string into local filesystem encoding."""
|
|
||||||
if is_python2():
|
|
||||||
return decode(string).encode(encoding or sys.getfilesystemencoding())
|
|
||||||
return string
|
|
||||||
|
|
||||||
|
|
||||||
def first(xs):
|
def first(xs):
|
||||||
@ -153,7 +135,11 @@ def move_file(src, dst):
|
|||||||
|
|
||||||
|
|
||||||
def print_entry(entry):
|
def print_entry(entry):
|
||||||
print(encode_local("%.1f:\t%s" % (entry.weight, entry.path)))
|
print_local("%.1f:\t%s" % (entry.weight, entry.path))
|
||||||
|
|
||||||
|
|
||||||
|
def print_local(string):
|
||||||
|
print(encode_local(string))
|
||||||
|
|
||||||
|
|
||||||
def print_tab_menu(needle, tab_entries, separator):
|
def print_tab_menu(needle, tab_entries, separator):
|
||||||
@ -166,17 +152,18 @@ def print_tab_menu(needle, tab_entries, separator):
|
|||||||
on subsequent calls.
|
on subsequent calls.
|
||||||
"""
|
"""
|
||||||
for i, entry in enumerate(tab_entries):
|
for i, entry in enumerate(tab_entries):
|
||||||
print(encode_local(
|
print_local(
|
||||||
'%s%s%d%s%s' % (
|
'%s%s%d%s%s' % (
|
||||||
needle,
|
needle,
|
||||||
separator,
|
separator,
|
||||||
i + 1,
|
i + 1,
|
||||||
separator,
|
separator,
|
||||||
entry.path)))
|
entry.path))
|
||||||
|
|
||||||
|
|
||||||
def sanitize(directories):
|
def sanitize(directories):
|
||||||
clean = lambda x: decode(x) if len(x) == 1 else decode(x).rstrip(os.sep)
|
# edge case to allow '/' as a valid path
|
||||||
|
clean = lambda x: unico(x) if x == os.sep else unico(x).rstrip(os.sep)
|
||||||
return list(imap(clean, directories))
|
return list(imap(clean, directories))
|
||||||
|
|
||||||
|
|
||||||
@ -203,3 +190,10 @@ def surround_quotes(string):
|
|||||||
def take(n, iterable):
|
def take(n, iterable):
|
||||||
"""Return first n items of an iterable."""
|
"""Return first n items of an iterable."""
|
||||||
return islice(iterable, n)
|
return islice(iterable, n)
|
||||||
|
|
||||||
|
|
||||||
|
def unico(string):
|
||||||
|
"""Converts into Unicode string."""
|
||||||
|
if is_python2() and not isinstance(string, unicode):
|
||||||
|
return unicode(string, encoding='utf-8', errors='replace')
|
||||||
|
return string
|
||||||
|
@ -5,6 +5,7 @@ from shutil import rmtree
|
|||||||
from tempfile import gettempdir
|
from tempfile import gettempdir
|
||||||
from tempfile import mkdtemp
|
from tempfile import mkdtemp
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
import mock
|
import mock
|
||||||
from testify import TestCase
|
from testify import TestCase
|
||||||
@ -16,11 +17,12 @@ from testify import class_setup
|
|||||||
from testify import class_teardown
|
from testify import class_teardown
|
||||||
from testify import run
|
from testify import run
|
||||||
from testify import setup
|
from testify import setup
|
||||||
|
from testify import suite
|
||||||
from testify import teardown
|
from testify import teardown
|
||||||
|
|
||||||
import autojump_utils
|
import autojump_utils
|
||||||
from autojump_utils import create_dir
|
from autojump_utils import create_dir
|
||||||
from autojump_utils import decode
|
from autojump_utils import encode_local
|
||||||
from autojump_utils import first
|
from autojump_utils import first
|
||||||
from autojump_utils import get_pwd
|
from autojump_utils import get_pwd
|
||||||
from autojump_utils import get_tab_entry_info
|
from autojump_utils import get_tab_entry_info
|
||||||
@ -32,12 +34,31 @@ from autojump_utils import sanitize
|
|||||||
from autojump_utils import second
|
from autojump_utils import second
|
||||||
from autojump_utils import surround_quotes
|
from autojump_utils import surround_quotes
|
||||||
from autojump_utils import take
|
from autojump_utils import take
|
||||||
|
from autojump_utils import unico
|
||||||
|
|
||||||
|
|
||||||
class StringUnitTests(TestCase):
|
class StringUnitTests(TestCase):
|
||||||
def test_decode(self):
|
@mock.patch.object(sys, 'getfilesystemencoding', return_value='ascii')
|
||||||
assert_equal(decode(r'blah'), u'blah')
|
def test_encode_local_ascii(self, _):
|
||||||
assert_equal(decode(r'日本語'), u'日本語')
|
assert_equal(encode_local(u'foo'), b'foo')
|
||||||
|
|
||||||
|
@suite('disabled', reason='#246')
|
||||||
|
def test_encode_local_ascii_fails(self):
|
||||||
|
with assert_raises(UnicodeDecodeError):
|
||||||
|
with mock.patch.object(
|
||||||
|
sys,
|
||||||
|
'getfilesystemencoding',
|
||||||
|
return_value='ascii'):
|
||||||
|
encode_local(u'日本語')
|
||||||
|
|
||||||
|
@mock.patch.object(sys, 'getfilesystemencoding', return_value=None)
|
||||||
|
def test_encode_local_empty(self, _):
|
||||||
|
assert_equal(encode_local(b'foo'), u'foo')
|
||||||
|
|
||||||
|
@mock.patch.object(sys, 'getfilesystemencoding', return_value='utf-8')
|
||||||
|
def test_encode_local_unicode(self, _):
|
||||||
|
assert_equal(encode_local(b'foo'), u'foo')
|
||||||
|
assert_equal(encode_local(u'foo'), u'foo')
|
||||||
|
|
||||||
def test_has_uppercase(self):
|
def test_has_uppercase(self):
|
||||||
assert_true(has_uppercase('Foo'))
|
assert_true(has_uppercase('Foo'))
|
||||||
@ -57,6 +78,11 @@ class StringUnitTests(TestCase):
|
|||||||
assert_equal(sanitize([]), [])
|
assert_equal(sanitize([]), [])
|
||||||
assert_equal(sanitize([r'/foo/bar/', r'/']), [u'/foo/bar', u'/'])
|
assert_equal(sanitize([r'/foo/bar/', r'/']), [u'/foo/bar', u'/'])
|
||||||
|
|
||||||
|
def test_unico(self):
|
||||||
|
assert_equal(unico(b'blah'), u'blah')
|
||||||
|
assert_equal(unico(b'日本語'), u'日本語')
|
||||||
|
assert_equal(unico(u'でもおれは中国人だ。'), u'でもおれは中国人だ。')
|
||||||
|
|
||||||
|
|
||||||
class IterationUnitTests(TestCase):
|
class IterationUnitTests(TestCase):
|
||||||
def test_first(self):
|
def test_first(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user