diff --git a/bin/autojump b/bin/autojump index 5595d14..51b92eb 100755 --- a/bin/autojump +++ b/bin/autojump @@ -21,13 +21,11 @@ from __future__ import print_function -from difflib import SequenceMatcher from itertools import chain from math import sqrt from operator import attrgetter from operator import itemgetter import os -import re import sys if sys.version_info[0] == 3: @@ -48,7 +46,6 @@ from autojump_data import save from autojump_utils import first from autojump_utils import get_pwd from autojump_utils import get_tab_entry_info -from autojump_utils import has_uppercase from autojump_utils import is_autojump_sourced from autojump_utils import is_osx from autojump_utils import is_windows @@ -59,9 +56,9 @@ from autojump_utils import print_tab_menu from autojump_utils import sanitize from autojump_utils import take from autojump_utils import unico +from autojump_path_match import find_matches VERSION = '22.3.0' -FUZZY_MATCH_THRESHOLD = 0.6 TAB_ENTRIES_COUNT = 9 TAB_SEPARATOR = '__' @@ -151,47 +148,6 @@ def decrease_path(data, path, weight=15): return data, Entry(path, data[path]) -def detect_smartcase(needles): - """ - If any needles contain an uppercase letter then use case sensitive - searching. Otherwise use case insensitive searching. - """ - return not any(imap(has_uppercase, needles)) - - -def find_matches(entries, needles, check_entries=True): - """Return an iterator to matching entries.""" - # TODO(wting|2014-02-24): replace assertion with unit test - assert isinstance(needles, list), "Needles must be a list." - ignore_case = detect_smartcase(needles) - - try: - pwd = os.getcwdu() - except OSError: - pwd = None - - # using closure to prevent constantly hitting hdd - def is_cwd(entry): - return os.path.realpath(entry.path) == pwd - - if check_entries: - path_exists = lambda entry: os.path.exists(entry.path) - else: - path_exists = lambda _: True - - data = sorted( - entries, - key=attrgetter('weight'), - reverse=True) - - return ifilter( - lambda entry: not is_cwd(entry) and path_exists(entry), - chain( - match_consecutive(needles, data, ignore_case), - match_fuzzy(needles, data, ignore_case), - match_anywhere(needles, data, ignore_case))) - - def handle_tab_completion(needle, entries): tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR) @@ -221,111 +177,6 @@ def handle_tab_completion(needle, entries): TAB_SEPARATOR) -def match_anywhere(needles, haystack, ignore_case=False): - """ - Matches needles anywhere in the path as long as they're in the same (but - not necessary consecutive) order. - - For example: - needles = ['foo', 'baz'] - regex needle = r'.*foo.*baz.*' - haystack = [ - (path="/foo/bar/baz", weight=10), - (path="/baz/foo/bar", weight=10), - (path="/foo/baz", weight=10)] - - result = [ - (path="/moo/foo/baz", weight=10), - (path="/foo/baz", weight=10)] - """ - regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*' - regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE - found = lambda haystack: re.search( - regex_needle, - haystack.path, - flags=regex_flags) - return ifilter(found, haystack) - - -def match_consecutive(needles, haystack, ignore_case=False): - """ - Matches consecutive needles at the end of a path. - - For example: - needles = ['foo', 'baz'] - haystack = [ - (path="/foo/bar/baz", weight=10), - (path="/foo/baz/moo", weight=10), - (path="/moo/foo/baz", weight=10), - (path="/foo/baz", weight=10)] - - regex_needle = re.compile(r''' - foo # needle #1 - [^/]* # all characters except os.sep zero or more times - / # os.sep - [^/]* # all characters except os.sep zero or more times - baz # needle #2 - [^/]* # all characters except os.sep zero or more times - $ # end of string - ''') - - result = [ - (path="/moo/foo/baz", weight=10), - (path="/foo/baz", weight=10)] - """ - # The normal \\ separator needs to be escaped again for use in regex. - sep = '\\\\' if is_windows() else os.sep - regex_no_sep = '[^' + sep + ']*' - regex_no_sep_end = regex_no_sep + '$' - regex_one_sep = regex_no_sep + sep + regex_no_sep - # can't use compiled regex because of flags - regex_needle = regex_one_sep.join(needles).replace('\\', '\\\\') + regex_no_sep_end # noqa - regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE - found = lambda entry: re.search( - regex_needle, - entry.path, - flags=regex_flags) - return ifilter(found, haystack) - - -def match_fuzzy(needles, haystack, ignore_case=False): - """ - Performs an approximate match with the last needle against the end of - every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD). - - For example: - needles = ['foo', 'bar'] - haystack = [ - (path="/foo/bar/baz", weight=11), - (path="/foo/baz/moo", weight=10), - (path="/moo/foo/baz", weight=10), - (path="/foo/baz", weight=10), - (path="/foo/bar", weight=10)] - - result = [ - (path="/foo/bar/baz", weight=11), - (path="/moo/foo/baz", weight=10), - (path="/foo/baz", weight=10), - (path="/foo/bar", weight=10)] - - This is a weak heuristic and used as a last resort to find matches. - """ - end_dir = lambda path: last(os.path.split(path)) - if ignore_case: - needle = last(needles).lower() - match_percent = lambda entry: SequenceMatcher( - a=needle, - b=end_dir(entry.path.lower())).ratio() - else: - needle = last(needles) - match_percent = lambda entry: SequenceMatcher( - a=needle, - b=end_dir(entry.path)).ratio() - meets_threshold = lambda entry: match_percent(entry) >= \ - FUZZY_MATCH_THRESHOLD - return ifilter(meets_threshold, haystack) - - def purge_missing_paths(entries): """Remove non-existent paths from a list of entries.""" exists = lambda entry: os.path.exists(entry.path) diff --git a/bin/autojump_data.py b/bin/autojump_data.py index 33987b3..72d7549 100644 --- a/bin/autojump_data.py +++ b/bin/autojump_data.py @@ -34,18 +34,23 @@ def dictify(entries): key = path value = weight """ - result = {} - for entry in entries: - result[entry.path] = entry.weight - return result + return dict((e.path, e.weight) for e in entries) def entriefy(data): """Converts a dictionary into an iterator of entries.""" - convert = lambda tup: Entry(*tup) - if is_python3(): - return map(convert, data.items()) - return imap(convert, data.iteritems()) + iteritems = data.items if is_python3() else data.iteritems + return (Entry(k, v) for k, v in iteritems()) + + +def parse_data(data): + # example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user'] + parsed = (l.strip().split('\t') for l in data) + valid = (x for x in parsed if len(x) == 2) + return dict( + (path, float(weight)) + for weight, path in valid + ) def load(config): @@ -62,23 +67,12 @@ def load(config): if not os.path.exists(config['data_path']): return {} - # example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user'] - parse = lambda line: line.strip().split('\t') - - correct_length = lambda x: len(x) == 2 - - # example: ['10.0', u'/home/user'] -> (u'/home/user', 10.0) - tupleize = lambda x: (x[1], float(x[0])) - try: with open( config['data_path'], 'r', encoding='utf-8', errors='replace') as f: - return dict( - imap( - tupleize, - ifilter(correct_length, imap(parse, f)))) + return parse_data(f) except (IOError, EOFError): return load_backup(config) diff --git a/bin/autojump_path_match.py b/bin/autojump_path_match.py new file mode 100644 index 0000000..35955c8 --- /dev/null +++ b/bin/autojump_path_match.py @@ -0,0 +1,163 @@ +import os +import re +import sys +from itertools import chain +from operator import attrgetter +from difflib import SequenceMatcher + +from autojump_utils import ( + last, + has_uppercase, +) + +if sys.version_info[0] == 3: + ifilter = filter + imap = map + os.getcwdu = os.getcwd +else: + from itertools import ifilter + from itertools import imap + +FUZZY_MATCH_THRESHOLD = 0.6 + + +def find_matches(entries, needles, check_entries=True): + """Return an iterator to matching entries.""" + # TODO(wting|2014-02-24): replace assertion with unit test + assert isinstance(needles, list), "Needles must be a list." + ignore_case = detect_smartcase(needles) + + try: + pwd = os.getcwdu() + except OSError: + pwd = None + + # using closure to prevent constantly hitting hdd + def is_cwd(entry): + return os.path.realpath(entry.path) == pwd + + if check_entries: + path_exists = lambda entry: os.path.exists(entry.path) + else: + path_exists = lambda _: True + + data = sorted( + entries, + key=attrgetter('weight'), + reverse=True) + + return ifilter( + lambda entry: not is_cwd(entry) and path_exists(entry), + chain( + match_consecutive(needles, data, ignore_case), + match_fuzzy(needles, data, ignore_case), + match_anywhere(needles, data, ignore_case))) + + +def match_anywhere(needles, haystack, ignore_case=False): + """ + Matches needles anywhere in the path as long as they're in the same (but + not necessary consecutive) order. + + For example: + needles = ['foo', 'baz'] + regex needle = r'.*foo.*baz.*' + haystack = [ + (path="/foo/bar/baz", weight=10), + (path="/baz/foo/bar", weight=10), + (path="/foo/baz", weight=10)] + + result = [ + (path="/moo/foo/baz", weight=10), + (path="/foo/baz", weight=10)] + """ + regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*' + regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE + found = lambda haystack: re.search( + regex_needle, + haystack.path, + flags=regex_flags) + return ifilter(found, haystack) + + +def match_consecutive(needles, haystack, ignore_case=False): + """ + Matches consecutive needles at the end of a path. + + For example: + needles = ['foo', 'baz'] + haystack = [ + (path="/foo/bar/baz", weight=10), + (path="/foo/baz/moo", weight=10), + (path="/moo/foo/baz", weight=10), + (path="/foo/baz", weight=10)] + + regex_needle = re.compile(r''' + foo # needle #1 + [^/]* # all characters except os.sep zero or more times + / # os.sep + [^/]* # all characters except os.sep zero or more times + baz # needle #2 + [^/]* # all characters except os.sep zero or more times + $ # end of string + ''') + + result = [ + (path="/moo/foo/baz", weight=10), + (path="/foo/baz", weight=10)] + """ + reversed_needles = list(reversed(needles)) + for entry in haystack: + path_segments = entry.path.split(os.sep) + for target, needle_part in zip( + reversed(path_segments), reversed_needles + ): + if ignore_case: + needle_part = needle_part.lower() + target = target.lower() + if needle_part not in target: + break + else: + yield entry + + +def match_fuzzy(needles, haystack, ignore_case=False): + """ + Performs an approximate match with the last needle against the end of + every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD). + + For example: + needles = ['foo', 'bar'] + haystack = [ + (path="/foo/bar/baz", weight=11), + (path="/foo/baz/moo", weight=10), + (path="/moo/foo/baz", weight=10), + (path="/foo/baz", weight=10), + (path="/foo/bar", weight=10)] + + result = [ + (path="/foo/bar/baz", weight=11), + (path="/moo/foo/baz", weight=10), + (path="/foo/baz", weight=10), + (path="/foo/bar", weight=10)] + + This is a weak heuristic and used as a last resort to find matches. + """ + needle = last(needles) + if ignore_case: + needle = needle.lower() + + for entry in haystack: + _, tail = os.path.split(entry.path) + path = tail.lower() if ignore_case else tail + matcher = SequenceMatcher(a=needle, b=path) + if matcher.ratio() >= FUZZY_MATCH_THRESHOLD: + yield entry + + +def detect_smartcase(needles): + """ + If any needles contain an uppercase letter then use case sensitive + searching. Otherwise use case insensitive searching. + """ + return not any(imap(has_uppercase, needles)) diff --git a/bin/autojump_utils.py b/bin/autojump_utils.py index 329e721..894af3c 100644 --- a/bin/autojump_utils.py +++ b/bin/autojump_utils.py @@ -13,10 +13,7 @@ import sys import unicodedata if sys.version_info[0] == 3: - imap = map os.getcwdu = os.getcwd -else: - from itertools import imap def create_dir(path): @@ -37,12 +34,7 @@ def encode_local(string): def first(xs): it = iter(xs) - try: - if is_python3(): - return it.__next__() - return it.next() - except StopIteration: - return None + return next(it, None) def get_tab_entry_info(entry, separator): @@ -51,23 +43,16 @@ def get_tab_entry_info(entry, separator): [needle]__[index]__[path] """ - needle, index, path = None, None, None - - match_needle = re.search(r'(.*?)' + separator, entry) - match_index = re.search(separator + r'([0-9]{1})', entry) - match_path = re.search( - separator + r'[0-9]{1}' + separator + r'(.*)', - entry) - - if match_needle: - needle = match_needle.group(1) - - if match_index: - index = int(match_index.group(1)) - - if match_path: - path = match_path.group(1) - + needle = index = path = None + parts = entry.split('__', 2) + if len(parts) > 1: + needle = parts[0] + try: + index = int(parts[1]) + except ValueError: + index = None + if len(parts) > 2: + path = parts[2] return needle, index, path @@ -114,17 +99,10 @@ def is_windows(): def last(xs): - it = iter(xs) - tmp = None - try: - if is_python3(): - while True: - tmp = it.__next__() - else: - while True: - tmp = it.next() - except StopIteration: - return tmp + v = None + for i in iter(xs): + v = i + return v def move_file(src, dst): @@ -169,21 +147,15 @@ def print_tab_menu(needle, tab_entries, separator): def sanitize(directories): # edge case to allow '/' as a valid path - clean = lambda x: unico(x) if x == os.sep else unico(x).rstrip(os.sep) - return list(imap(clean, directories)) + def clean(x): + return unico(x) if x == os.sep else unico(x).rstrip(os.sep) + return [clean(d) for d in directories] def second(xs): it = iter(xs) - try: - if is_python2(): - it.next() - return it.next() - elif is_python3(): - next(it) - return next(it) - except StopIteration: - return None + next(it, None) + return next(it, None) def surround_quotes(string): diff --git a/tests/autojump_data_test.py b/tests/autojump_data_test.py index e69de29..e9fee7d 100644 --- a/tests/autojump_data_test.py +++ b/tests/autojump_data_test.py @@ -0,0 +1,52 @@ +import os +import sys + +sys.path.append(os.path.join(os.getcwd(), 'bin')) +from autojump_data import ( + entriefy, + dictify, + parse_data, + Entry, +) + + +def test_entriefy(): + assert list(entriefy({})) == [] + data = { + "path1": 10, + "path2": 12 + } + r = entriefy(data) + assert set(r) == set([Entry("path1", 10), Entry("path2", 12)]) + + +def test_dictify(): + assert dictify([]) == {} + entries = [Entry("path1", 10), Entry("path2", 12)] + assert dictify(entries) == { + "path1": 10, + "path2": 12 + } + + +class TestParseData: + + def test_valid_data_should_be_parsed(self): + data = [ + "10.0\tpath_a", + "12.3\tpath_a/path_b" + ] + assert parse_data(data) == { + "path_a": 10.0, + "path_a/path_b": 12.3 + } + + def test_invalid_data_should_be_ignored(self): + data = [ + "10.0\tpath_a\tnada", + "12.3", + "10.0\tpath_a", + ] + assert parse_data(data) == { + "path_a": 10.0 + } diff --git a/tests/autojump_path_match_test.py b/tests/autojump_path_match_test.py new file mode 100644 index 0000000..a8c9ad1 --- /dev/null +++ b/tests/autojump_path_match_test.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- + +import os +import sys + +sys.path.append(os.path.join(os.getcwd(), 'bin')) + +from autojump_data import Entry +import autojump_path_match as m + + +def test_match_fuzzy(): + needles = ['foo', 'bar'] + haystack = [ + Entry("/foo/bar/baz", 11), + Entry("/foo/baz/moo", 10), + Entry("/moo/foo/baz", 10), + ] + result = list(m.match_fuzzy(needles, haystack)) + assert result == [ + Entry("/foo/bar/baz", 11), + Entry("/moo/foo/baz", 10), + ] + + +def test_match_consecutive(): + needles = ['foo', 'baz'] + haystack = [ + Entry("/foo/bar/baz", 10), + Entry("/foo/baz/moo", 10), + Entry("/moo/foo/Baz", 10), + Entry("/foo/bazar", 10), + Entry("/foo/xxbaz", 10) + ] + result = list(m.match_consecutive(needles, haystack)) + assert result == [ + Entry("/foo/bazar", 10), + Entry("/foo/xxbaz", 10) + ] + result = list(m.match_consecutive(needles, haystack, ignore_case=True)) + assert result == [ + Entry("/moo/foo/Baz", 10), + Entry("/foo/bazar", 10), + Entry("/foo/xxbaz", 10) + ] diff --git a/tests/autojump_test.py b/tests/autojump_test.py index e69de29..437f3b2 100644 --- a/tests/autojump_test.py +++ b/tests/autojump_test.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- + +import os +import sys + +sys.path.append(os.path.join(os.getcwd(), 'bin')) diff --git a/tox.ini b/tox.ini index 971d67a..4b0ee9b 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,8 @@ envlist = py27, py32, py33, - py34 + py34, + py35 # ignore missing setup.py skipsdist = True