mirror of
				https://github.com/wting/autojump
				synced 2025-06-13 12:54:07 +00:00 
			
		
		
		
	Fix encoding issues.
The original implementation used str.encode() on input and str.decode() on output. However this would cause UnicodeDecodeError since certain characters can't be encoded / decoded in ASCII. The new solution is to use unicode() on all input strings and output UTF-8 encoded strings. This makes the assumption that the shell can handle UTF-8 strings.
This commit is contained in:
		
							parent
							
								
									3f460fb3e9
								
							
						
					
					
						commit
						35bc63c66e
					
				
							
								
								
									
										3
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								Makefile
									
									
									
									
									
								
							| @ -39,4 +39,5 @@ tar: | ||||
| 	sha1sum autojump_v$(VERSION).tar.gz | ||||
| 
 | ||||
| test: | ||||
| 	testify -v tests | ||||
| 	@find . -type f -iname "*.pyc" -delete | ||||
| 	testify -v tests -x disabled | ||||
|  | ||||
							
								
								
									
										26
									
								
								bin/autojump
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								bin/autojump
									
									
									
									
									
								
							| @ -45,8 +45,6 @@ from autojump_data import entriefy | ||||
| from autojump_data import Entry | ||||
| from autojump_data import load | ||||
| from autojump_data import save | ||||
| from autojump_utils import decode | ||||
| from autojump_utils import encode_local | ||||
| from autojump_utils import first | ||||
| from autojump_utils import get_tab_entry_info | ||||
| from autojump_utils import get_pwd | ||||
| @ -54,9 +52,11 @@ from autojump_utils import has_uppercase | ||||
| from autojump_utils import is_osx | ||||
| from autojump_utils import last | ||||
| from autojump_utils import print_entry | ||||
| from autojump_utils import print_local | ||||
| from autojump_utils import print_tab_menu | ||||
| from autojump_utils import sanitize | ||||
| from autojump_utils import take | ||||
| from autojump_utils import unico | ||||
| 
 | ||||
| VERSION = '22.0.0-alpha' | ||||
| FUZZY_MATCH_THRESHOLD = 0.6 | ||||
| @ -131,7 +131,7 @@ def add_path(data, path, weight=10): | ||||
|     with resulting duplicate entries in the database than a single canonical | ||||
|     path. | ||||
|     """ | ||||
|     path = decode(path).rstrip(os.sep) | ||||
|     path = unico(path).rstrip(os.sep) | ||||
|     if path == os.path.expanduser('~'): | ||||
|         return data, Entry(path, 0) | ||||
| 
 | ||||
| @ -142,7 +142,7 @@ def add_path(data, path, weight=10): | ||||
| 
 | ||||
| def decrease_path(data, path, weight=15): | ||||
|     """Decrease or zero out a path.""" | ||||
|     path = decode(path).rstrip(os.sep) | ||||
|     path = unico(path).rstrip(os.sep) | ||||
|     data[path] = max(0, data.get(path, 0) - weight) | ||||
|     return data, Entry(path, data[path]) | ||||
| 
 | ||||
| @ -189,11 +189,10 @@ def handle_tab_completion(needle, entries): | ||||
|     tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR) | ||||
| 
 | ||||
|     if tab_path: | ||||
|         print(encode_local(tab_path)) | ||||
|         print_local(tab_path) | ||||
|     elif tab_index: | ||||
|         get_ith_path = lambda i, iterable: last(take(i, iterable)).path | ||||
|         print(encode_local( | ||||
|             get_ith_path(tab_index, find_matches(entries, tab_needle)))) | ||||
|         print_local(get_ith_path(tab_index, find_matches(entries, tab_needle))) | ||||
|     elif tab_needle: | ||||
|         # found partial tab completion entry | ||||
|         print_tab_menu( | ||||
| @ -326,7 +325,8 @@ def print_stats(data, data_path): | ||||
|     print("%d:\t number of entries" % len(data)) | ||||
| 
 | ||||
|     try: | ||||
|         print("%.2f:\t current directory weight" % data.get(os.getcwdu(), 0)) | ||||
|         print_local( | ||||
|                 "%.2f:\t current directory weight" % data.get(os.getcwdu(), 0)) | ||||
|     except OSError: | ||||
|         # current directory no longer exists | ||||
|         pass | ||||
| @ -362,7 +362,7 @@ def main(args):  # noqa | ||||
|     elif not args.directory: | ||||
|         # default return value so calling shell functions have an argument | ||||
|         # to `cd` to | ||||
|         print(encode_local('.')) | ||||
|         print_local('.') | ||||
|     else: | ||||
|         entries = entriefy(load(config)) | ||||
|         needles = sanitize(args.directory) | ||||
| @ -370,13 +370,13 @@ def main(args):  # noqa | ||||
|             get_tab_entry_info(first(needles), TAB_SEPARATOR) | ||||
| 
 | ||||
|         if tab_path: | ||||
|             print(encode_local(tab_path)) | ||||
|             print_local(tab_path) | ||||
|         elif tab_index: | ||||
|             get_ith_path = lambda i, iterable: last(take(i, iterable)).path | ||||
|             print(encode_local( | ||||
|                 get_ith_path(tab_index, find_matches(entries, tab_needle)))) | ||||
|             print_local( | ||||
|                     get_ith_path(tab_index, find_matches(entries, tab_needle))) | ||||
|         else: | ||||
|             print(encode_local(first(find_matches(entries, needles)).path)) | ||||
|             print_local(first(find_matches(entries, needles)).path) | ||||
| 
 | ||||
|     return 0 | ||||
| 
 | ||||
|  | ||||
| @ -17,6 +17,7 @@ else: | ||||
|     from itertools import imap | ||||
| 
 | ||||
| from autojump_utils import create_dir | ||||
| from autojump_utils import unico | ||||
| from autojump_utils import is_osx | ||||
| from autojump_utils import is_python3 | ||||
| from autojump_utils import move_file | ||||
| @ -124,11 +125,7 @@ def save(config, data): | ||||
|                 encoding='utf-8', | ||||
|                 errors='replace') as f: | ||||
|             for path, weight in data.items(): | ||||
|                 if is_python3(): | ||||
|                     f.write(("%s\t%s\n" % (weight, path))) | ||||
|                 else: | ||||
|                     f.write(unicode( | ||||
|                         "%s\t%s\n" % (weight, path)).encode('utf-8')) | ||||
|                 f.write(unico("%s\t%s\n" % (weight, path))) | ||||
| 
 | ||||
|             f.flush() | ||||
|             os.fsync(f) | ||||
|  | ||||
| @ -28,27 +28,9 @@ def create_dir(path): | ||||
|             raise | ||||
| 
 | ||||
| 
 | ||||
| def decode(string): | ||||
|     """Converts byte string to Unicode string.""" | ||||
|     if is_python2(): | ||||
|         # Python 2.6 does not support kwargs | ||||
|         return string.decode('utf-8', 'replace') | ||||
|     return string | ||||
| 
 | ||||
| 
 | ||||
| def encode(string): | ||||
|     """Converts Unicode string to byte string.""" | ||||
|     if is_python2(): | ||||
|         # Python 2.6 does not support kwargs | ||||
|         return string.encode('utf-8', 'replace') | ||||
|     return string | ||||
| 
 | ||||
| 
 | ||||
| def encode_local(string, encoding=None): | ||||
|     """Converts string into local filesystem encoding.""" | ||||
|     if is_python2(): | ||||
|         return decode(string).encode(encoding or sys.getfilesystemencoding()) | ||||
|     return string | ||||
| def encode_local(string): | ||||
|     """Converts string into user's preferred encoding.""" | ||||
|     return string.encode(sys.getfilesystemencoding() or 'utf-8') | ||||
| 
 | ||||
| 
 | ||||
| def first(xs): | ||||
| @ -153,7 +135,11 @@ def move_file(src, dst): | ||||
| 
 | ||||
| 
 | ||||
| def print_entry(entry): | ||||
|     print(encode_local("%.1f:\t%s" % (entry.weight, entry.path))) | ||||
|     print_local("%.1f:\t%s" % (entry.weight, entry.path)) | ||||
| 
 | ||||
| 
 | ||||
| def print_local(string): | ||||
|     print(encode_local(string)) | ||||
| 
 | ||||
| 
 | ||||
| def print_tab_menu(needle, tab_entries, separator): | ||||
| @ -166,17 +152,18 @@ def print_tab_menu(needle, tab_entries, separator): | ||||
|     on subsequent calls. | ||||
|     """ | ||||
|     for i, entry in enumerate(tab_entries): | ||||
|         print(encode_local( | ||||
|         print_local( | ||||
|             '%s%s%d%s%s' % ( | ||||
|                 needle, | ||||
|                 separator, | ||||
|                 i + 1, | ||||
|                 separator, | ||||
|                 entry.path))) | ||||
|                 entry.path)) | ||||
| 
 | ||||
| 
 | ||||
| def sanitize(directories): | ||||
|     clean = lambda x: decode(x) if len(x) == 1 else decode(x).rstrip(os.sep) | ||||
|     # edge case to allow '/' as a valid path | ||||
|     clean = lambda x: unico(x) if x == os.sep else unico(x).rstrip(os.sep) | ||||
|     return list(imap(clean, directories)) | ||||
| 
 | ||||
| 
 | ||||
| @ -203,3 +190,10 @@ def surround_quotes(string): | ||||
| def take(n, iterable): | ||||
|     """Return first n items of an iterable.""" | ||||
|     return islice(iterable, n) | ||||
| 
 | ||||
| 
 | ||||
| def unico(string): | ||||
|     """Converts into Unicode string.""" | ||||
|     if is_python2() and not isinstance(string, unicode): | ||||
|         return unicode(string, encoding='utf-8', errors='replace') | ||||
|     return string | ||||
|  | ||||
| @ -5,6 +5,7 @@ from shutil import rmtree | ||||
| from tempfile import gettempdir | ||||
| from tempfile import mkdtemp | ||||
| import os | ||||
| import sys | ||||
| 
 | ||||
| import mock | ||||
| from testify import TestCase | ||||
| @ -16,11 +17,12 @@ from testify import class_setup | ||||
| from testify import class_teardown | ||||
| from testify import run | ||||
| from testify import setup | ||||
| from testify import suite | ||||
| from testify import teardown | ||||
| 
 | ||||
| import autojump_utils | ||||
| from autojump_utils import create_dir | ||||
| from autojump_utils import decode | ||||
| from autojump_utils import encode_local | ||||
| from autojump_utils import first | ||||
| from autojump_utils import get_pwd | ||||
| from autojump_utils import get_tab_entry_info | ||||
| @ -32,12 +34,31 @@ from autojump_utils import sanitize | ||||
| from autojump_utils import second | ||||
| from autojump_utils import surround_quotes | ||||
| from autojump_utils import take | ||||
| from autojump_utils import unico | ||||
| 
 | ||||
| 
 | ||||
| class StringUnitTests(TestCase): | ||||
|     def test_decode(self): | ||||
|         assert_equal(decode(r'blah'), u'blah') | ||||
|         assert_equal(decode(r'日本語'), u'日本語') | ||||
|     @mock.patch.object(sys, 'getfilesystemencoding', return_value='ascii') | ||||
|     def test_encode_local_ascii(self, _): | ||||
|         assert_equal(encode_local(u'foo'), b'foo') | ||||
| 
 | ||||
|     @suite('disabled', reason='#246') | ||||
|     def test_encode_local_ascii_fails(self): | ||||
|         with assert_raises(UnicodeDecodeError): | ||||
|             with mock.patch.object( | ||||
|                     sys, | ||||
|                     'getfilesystemencoding', | ||||
|                     return_value='ascii'): | ||||
|                 encode_local(u'日本語') | ||||
| 
 | ||||
|     @mock.patch.object(sys, 'getfilesystemencoding', return_value=None) | ||||
|     def test_encode_local_empty(self, _): | ||||
|         assert_equal(encode_local(b'foo'), u'foo') | ||||
| 
 | ||||
|     @mock.patch.object(sys, 'getfilesystemencoding', return_value='utf-8') | ||||
|     def test_encode_local_unicode(self, _): | ||||
|         assert_equal(encode_local(b'foo'), u'foo') | ||||
|         assert_equal(encode_local(u'foo'), u'foo') | ||||
| 
 | ||||
|     def test_has_uppercase(self): | ||||
|         assert_true(has_uppercase('Foo')) | ||||
| @ -57,6 +78,11 @@ class StringUnitTests(TestCase): | ||||
|         assert_equal(sanitize([]), []) | ||||
|         assert_equal(sanitize([r'/foo/bar/', r'/']), [u'/foo/bar', u'/']) | ||||
| 
 | ||||
|     def test_unico(self): | ||||
|         assert_equal(unico(b'blah'), u'blah') | ||||
|         assert_equal(unico(b'日本語'), u'日本語') | ||||
|         assert_equal(unico(u'でもおれは中国人だ。'), u'でもおれは中国人だ。') | ||||
| 
 | ||||
| 
 | ||||
| class IterationUnitTests(TestCase): | ||||
|     def test_first(self): | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user