Refactor code and add unit tests

2025-06-13 12:54:07 +00:00 · 2016-04-18 10:40:10 +08:00 · 2016-04-18 10:40:10 +08:00 · 723d7a69cf
commit 723d7a69cf
parent 4230bbe2d1
8 changed files with 303 additions and 219 deletions
--- a/bin/autojump
+++ b/bin/autojump
@ -21,13 +21,11 @@
 from __future__ import print_function
 from difflib import SequenceMatcher
 from itertools import chain
 from math import sqrt
 from operator import attrgetter
 from operator import itemgetter
 import os
 import re
 import sys
 if sys.version_info[0] == 3:
@ -48,7 +46,6 @@ from autojump_data import save
 from autojump_utils import first
 from autojump_utils import get_pwd
 from autojump_utils import get_tab_entry_info
 from autojump_utils import has_uppercase
 from autojump_utils import is_autojump_sourced
 from autojump_utils import is_osx
 from autojump_utils import is_windows
@ -59,9 +56,9 @@ from autojump_utils import print_tab_menu
 from autojump_utils import sanitize
 from autojump_utils import take
 from autojump_utils import unico
 from autojump_path_match import find_matches
 VERSION = '22.3.0'
 FUZZY_MATCH_THRESHOLD = 0.6
 TAB_ENTRIES_COUNT = 9
 TAB_SEPARATOR = '__'
@ -151,47 +148,6 @@ def decrease_path(data, path, weight=15):
    return data, Entry(path, data[path])
 def detect_smartcase(needles):
    """
    If any needles contain an uppercase letter then use case sensitive
    searching. Otherwise use case insensitive searching.
    """
    return not any(imap(has_uppercase, needles))
 def find_matches(entries, needles, check_entries=True):
    """Return an iterator to matching entries."""
    # TODO(wting|2014-02-24): replace assertion with unit test
    assert isinstance(needles, list), "Needles must be a list."
    ignore_case = detect_smartcase(needles)
    try:
        pwd = os.getcwdu()
    except OSError:
        pwd = None
    # using closure to prevent constantly hitting hdd
    def is_cwd(entry):
        return os.path.realpath(entry.path) == pwd
    if check_entries:
        path_exists = lambda entry: os.path.exists(entry.path)
    else:
        path_exists = lambda _: True
    data = sorted(
        entries,
        key=attrgetter('weight'),
        reverse=True)
    return ifilter(
        lambda entry: not is_cwd(entry) and path_exists(entry),
        chain(
            match_consecutive(needles, data, ignore_case),
            match_fuzzy(needles, data, ignore_case),
            match_anywhere(needles, data, ignore_case)))
 def handle_tab_completion(needle, entries):
    tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR)
@ -221,111 +177,6 @@ def handle_tab_completion(needle, entries):
            TAB_SEPARATOR)
 def match_anywhere(needles, haystack, ignore_case=False):
    """
    Matches needles anywhere in the path as long as they're in the same (but
    not necessary consecutive) order.
    For example:
        needles = ['foo', 'baz']
        regex needle = r'.*foo.*baz.*'
        haystack = [
            (path="/foo/bar/baz", weight=10),
            (path="/baz/foo/bar", weight=10),
            (path="/foo/baz", weight=10)]
        result = [
            (path="/moo/foo/baz", weight=10),
            (path="/foo/baz", weight=10)]
    """
    regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
    regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
    found = lambda haystack: re.search(
        regex_needle,
        haystack.path,
        flags=regex_flags)
    return ifilter(found, haystack)
 def match_consecutive(needles, haystack, ignore_case=False):
    """
    Matches consecutive needles at the end of a path.
    For example:
        needles = ['foo', 'baz']
        haystack = [
            (path="/foo/bar/baz", weight=10),
            (path="/foo/baz/moo", weight=10),
            (path="/moo/foo/baz", weight=10),
            (path="/foo/baz", weight=10)]
        regex_needle = re.compile(r'''
            foo     # needle #1
            [^/]*   # all characters except os.sep zero or more times
            /       # os.sep
            [^/]*   # all characters except os.sep zero or more times
            baz     # needle #2
            [^/]*   # all characters except os.sep zero or more times
            $       # end of string
            ''')
        result = [
            (path="/moo/foo/baz", weight=10),
            (path="/foo/baz", weight=10)]
    """
    # The normal \\ separator needs to be escaped again for use in regex.
    sep = '\\\\' if is_windows() else os.sep
    regex_no_sep = '[^' + sep + ']*'
    regex_no_sep_end = regex_no_sep + '$'
    regex_one_sep = regex_no_sep + sep + regex_no_sep
    # can't use compiled regex because of flags
    regex_needle = regex_one_sep.join(needles).replace('\\', '\\\\') + regex_no_sep_end  # noqa
    regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
    found = lambda entry: re.search(
        regex_needle,
        entry.path,
        flags=regex_flags)
    return ifilter(found, haystack)
 def match_fuzzy(needles, haystack, ignore_case=False):
    """
    Performs an approximate match with the last needle against the end of
    every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD).
    For example:
        needles = ['foo', 'bar']
        haystack = [
            (path="/foo/bar/baz", weight=11),
            (path="/foo/baz/moo", weight=10),
            (path="/moo/foo/baz", weight=10),
            (path="/foo/baz", weight=10),
            (path="/foo/bar", weight=10)]
    result = [
            (path="/foo/bar/baz", weight=11),
            (path="/moo/foo/baz", weight=10),
            (path="/foo/baz", weight=10),
            (path="/foo/bar", weight=10)]
    This is a weak heuristic and used as a last resort to find matches.
    """
    end_dir = lambda path: last(os.path.split(path))
    if ignore_case:
        needle = last(needles).lower()
        match_percent = lambda entry: SequenceMatcher(
            a=needle,
            b=end_dir(entry.path.lower())).ratio()
    else:
        needle = last(needles)
        match_percent = lambda entry: SequenceMatcher(
            a=needle,
            b=end_dir(entry.path)).ratio()
    meets_threshold = lambda entry: match_percent(entry) >= \
        FUZZY_MATCH_THRESHOLD
    return ifilter(meets_threshold, haystack)
 def purge_missing_paths(entries):
    """Remove non-existent paths from a list of entries."""
    exists = lambda entry: os.path.exists(entry.path)
--- a/bin/autojump_data.py
+++ b/bin/autojump_data.py
@ -34,18 +34,23 @@ def dictify(entries):
        key = path
        value = weight
    """
-    result = {}
+    return dict((e.path, e.weight) for e in entries)
    for entry in entries:
        result[entry.path] = entry.weight
    return result
 def entriefy(data):
    """Converts a dictionary into an iterator of entries."""
-    convert = lambda tup: Entry(*tup)
+    iteritems = data.items if is_python3() else data.iteritems
-    if is_python3():
+    return (Entry(k, v) for k, v in iteritems())
-        return map(convert, data.items())
+
-    return imap(convert, data.iteritems())
+
 def parse_data(data):
    # example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user']
    parsed = (l.strip().split('\t') for l in data)
    valid = (x for x in parsed if len(x) == 2)
    return dict(
        (path, float(weight))
        for weight, path in valid
    )
 def load(config):
@ -62,23 +67,12 @@ def load(config):
    if not os.path.exists(config['data_path']):
        return {}
    # example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user']
    parse = lambda line: line.strip().split('\t')
    correct_length = lambda x: len(x) == 2
    # example: ['10.0', u'/home/user'] -> (u'/home/user', 10.0)
    tupleize = lambda x: (x[1], float(x[0]))
    try:
        with open(
                config['data_path'],
                'r', encoding='utf-8',
                errors='replace') as f:
-            return dict(
+            return parse_data(f)
                imap(
                    tupleize,
                    ifilter(correct_length, imap(parse, f))))
    except (IOError, EOFError):
        return load_backup(config)
--- a/bin/autojump_path_match.py
+++ b/bin/autojump_path_match.py
@ -0,0 +1,163 @@
 import os
 import re
 import sys
 from itertools import chain
 from operator import attrgetter
 from difflib import SequenceMatcher
 from autojump_utils import (
    last,
    has_uppercase,
 )
 if sys.version_info[0] == 3:
    ifilter = filter
    imap = map
    os.getcwdu = os.getcwd
 else:
    from itertools import ifilter
    from itertools import imap
 FUZZY_MATCH_THRESHOLD = 0.6
 def find_matches(entries, needles, check_entries=True):
    """Return an iterator to matching entries."""
    # TODO(wting|2014-02-24): replace assertion with unit test
    assert isinstance(needles, list), "Needles must be a list."
    ignore_case = detect_smartcase(needles)
    try:
        pwd = os.getcwdu()
    except OSError:
        pwd = None
    # using closure to prevent constantly hitting hdd
    def is_cwd(entry):
        return os.path.realpath(entry.path) == pwd
    if check_entries:
        path_exists = lambda entry: os.path.exists(entry.path)
    else:
        path_exists = lambda _: True
    data = sorted(
        entries,
        key=attrgetter('weight'),
        reverse=True)
    return ifilter(
        lambda entry: not is_cwd(entry) and path_exists(entry),
        chain(
            match_consecutive(needles, data, ignore_case),
            match_fuzzy(needles, data, ignore_case),
            match_anywhere(needles, data, ignore_case)))
 def match_anywhere(needles, haystack, ignore_case=False):
    """
    Matches needles anywhere in the path as long as they're in the same (but
    not necessary consecutive) order.
    For example:
        needles = ['foo', 'baz']
        regex needle = r'.*foo.*baz.*'
        haystack = [
            (path="/foo/bar/baz", weight=10),
            (path="/baz/foo/bar", weight=10),
            (path="/foo/baz", weight=10)]
        result = [
            (path="/moo/foo/baz", weight=10),
            (path="/foo/baz", weight=10)]
    """
    regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
    regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
    found = lambda haystack: re.search(
        regex_needle,
        haystack.path,
        flags=regex_flags)
    return ifilter(found, haystack)
 def match_consecutive(needles, haystack, ignore_case=False):
    """
    Matches consecutive needles at the end of a path.
    For example:
        needles = ['foo', 'baz']
        haystack = [
            (path="/foo/bar/baz", weight=10),
            (path="/foo/baz/moo", weight=10),
            (path="/moo/foo/baz", weight=10),
            (path="/foo/baz", weight=10)]
        regex_needle = re.compile(r'''
            foo     # needle #1
            [^/]*   # all characters except os.sep zero or more times
            /       # os.sep
            [^/]*   # all characters except os.sep zero or more times
            baz     # needle #2
            [^/]*   # all characters except os.sep zero or more times
            $       # end of string
            ''')
        result = [
            (path="/moo/foo/baz", weight=10),
            (path="/foo/baz", weight=10)]
    """
    reversed_needles = list(reversed(needles))
    for entry in haystack:
        path_segments = entry.path.split(os.sep)
        for target, needle_part in zip(
            reversed(path_segments), reversed_needles
        ):
            if ignore_case:
                needle_part = needle_part.lower()
                target = target.lower()
            if needle_part not in target:
                break
        else:
            yield entry
 def match_fuzzy(needles, haystack, ignore_case=False):
    """
    Performs an approximate match with the last needle against the end of
    every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD).
    For example:
        needles = ['foo', 'bar']
        haystack = [
            (path="/foo/bar/baz", weight=11),
            (path="/foo/baz/moo", weight=10),
            (path="/moo/foo/baz", weight=10),
            (path="/foo/baz", weight=10),
            (path="/foo/bar", weight=10)]
    result = [
            (path="/foo/bar/baz", weight=11),
            (path="/moo/foo/baz", weight=10),
            (path="/foo/baz", weight=10),
            (path="/foo/bar", weight=10)]
    This is a weak heuristic and used as a last resort to find matches.
    """
    needle = last(needles)
    if ignore_case:
        needle = needle.lower()
    for entry in haystack:
        _, tail = os.path.split(entry.path)
        path = tail.lower() if ignore_case else tail
        matcher = SequenceMatcher(a=needle, b=path)
        if matcher.ratio() >= FUZZY_MATCH_THRESHOLD:
            yield entry
 def detect_smartcase(needles):
    """
    If any needles contain an uppercase letter then use case sensitive
    searching. Otherwise use case insensitive searching.
    """
    return not any(imap(has_uppercase, needles))
--- a/bin/autojump_utils.py
+++ b/bin/autojump_utils.py
@ -13,10 +13,7 @@ import sys
 import unicodedata
 if sys.version_info[0] == 3:
    imap = map
    os.getcwdu = os.getcwd
 else:
    from itertools import imap
 def create_dir(path):
@ -37,12 +34,7 @@ def encode_local(string):
 def first(xs):
    it = iter(xs)
-    try:
+    return next(it, None)
        if is_python3():
            return it.__next__()
        return it.next()
    except StopIteration:
        return None
 def get_tab_entry_info(entry, separator):
@ -51,23 +43,16 @@ def get_tab_entry_info(entry, separator):
        [needle]__[index]__[path]
    """
-    needle, index, path = None, None, None
+    needle = index = path = None
-
+    parts = entry.split('__', 2)
-    match_needle = re.search(r'(.*?)' + separator, entry)
+    if len(parts) > 1:
-    match_index = re.search(separator + r'([0-9]{1})', entry)
+        needle = parts[0]
-    match_path = re.search(
+        try:
-        separator + r'[0-9]{1}' + separator + r'(.*)',
+            index = int(parts[1])
-        entry)
+        except ValueError:
-
+            index = None
-    if match_needle:
+        if len(parts) > 2:
-        needle = match_needle.group(1)
+            path = parts[2]
    if match_index:
        index = int(match_index.group(1))
    if match_path:
        path = match_path.group(1)
    return needle, index, path
@ -114,17 +99,10 @@ def is_windows():
 def last(xs):
-    it = iter(xs)
+    v = None
-    tmp = None
+    for i in iter(xs):
-    try:
+        v = i
-        if is_python3():
+    return v
            while True:
                tmp = it.__next__()
        else:
            while True:
                tmp = it.next()
    except StopIteration:
        return tmp
 def move_file(src, dst):
@ -169,21 +147,15 @@ def print_tab_menu(needle, tab_entries, separator):
 def sanitize(directories):
    # edge case to allow '/' as a valid path
-    clean = lambda x: unico(x) if x == os.sep else unico(x).rstrip(os.sep)
+    def clean(x):
-    return list(imap(clean, directories))
+        return unico(x) if x == os.sep else unico(x).rstrip(os.sep)
    return [clean(d) for d in directories]
 def second(xs):
    it = iter(xs)
-    try:
+    next(it, None)
-        if is_python2():
+    return next(it, None)
            it.next()
            return it.next()
        elif is_python3():
            next(it)
            return next(it)
    except StopIteration:
        return None
 def surround_quotes(string):
--- a/tests/autojump_data_test.py
+++ b/tests/autojump_data_test.py
@ -0,0 +1,52 @@
 import os
 import sys
 sys.path.append(os.path.join(os.getcwd(), 'bin'))
 from autojump_data import (
    entriefy,
    dictify,
    parse_data,
    Entry,
 )
 def test_entriefy():
    assert list(entriefy({})) == []
    data = {
        "path1": 10,
        "path2": 12
    }
    r = entriefy(data)
    assert set(r) == set([Entry("path1", 10), Entry("path2", 12)])
 def test_dictify():
    assert dictify([]) == {}
    entries = [Entry("path1", 10), Entry("path2", 12)]
    assert dictify(entries) == {
        "path1": 10,
        "path2": 12
    }
 class TestParseData:
    def test_valid_data_should_be_parsed(self):
        data = [
            "10.0\tpath_a",
            "12.3\tpath_a/path_b"
        ]
        assert parse_data(data) == {
            "path_a": 10.0,
            "path_a/path_b": 12.3
        }
    def test_invalid_data_should_be_ignored(self):
        data = [
            "10.0\tpath_a\tnada",
            "12.3",
            "10.0\tpath_a",
        ]
        assert parse_data(data) == {
            "path_a": 10.0
        }
--- a/tests/autojump_path_match_test.py
+++ b/tests/autojump_path_match_test.py
@ -0,0 +1,45 @@
 # -*- coding: utf-8 -*-
 import os
 import sys
 sys.path.append(os.path.join(os.getcwd(), 'bin'))
 from autojump_data import Entry
 import autojump_path_match as m
 def test_match_fuzzy():
    needles = ['foo', 'bar']
    haystack = [
        Entry("/foo/bar/baz", 11),
        Entry("/foo/baz/moo", 10),
        Entry("/moo/foo/baz", 10),
    ]
    result = list(m.match_fuzzy(needles, haystack))
    assert result == [
        Entry("/foo/bar/baz", 11),
        Entry("/moo/foo/baz", 10),
    ]
 def test_match_consecutive():
    needles = ['foo', 'baz']
    haystack = [
        Entry("/foo/bar/baz", 10),
        Entry("/foo/baz/moo", 10),
        Entry("/moo/foo/Baz", 10),
        Entry("/foo/bazar", 10),
        Entry("/foo/xxbaz", 10)
    ]
    result = list(m.match_consecutive(needles, haystack))
    assert result == [
        Entry("/foo/bazar", 10),
        Entry("/foo/xxbaz", 10)
    ]
    result = list(m.match_consecutive(needles, haystack, ignore_case=True))
    assert result == [
        Entry("/moo/foo/Baz", 10),
        Entry("/foo/bazar", 10),
        Entry("/foo/xxbaz", 10)
    ]
--- a/tests/autojump_test.py
+++ b/tests/autojump_test.py
@ -0,0 +1,6 @@
 # -*- coding: utf-8 -*-
 import os
 import sys
 sys.path.append(os.path.join(os.getcwd(), 'bin'))
--- a/tox.ini
+++ b/tox.ini
@ -4,7 +4,8 @@ envlist =
 	py27,
 	py32,
 	py33,
-	py34
+	py34,
 	py35
 # ignore missing setup.py
 skipsdist = True