From 723d7a69cf775e8f04b3485839fa03b313064b47 Mon Sep 17 00:00:00 2001
From: satoru <satorulogic@gmail.com>
Date: Mon, 18 Apr 2016 10:40:10 +0800
Subject: [PATCH] Refactor code and add unit tests

---
 bin/autojump                      | 151 +--------------------------
 bin/autojump_data.py              |  34 +++----
 bin/autojump_path_match.py        | 163 ++++++++++++++++++++++++++++++
 bin/autojump_utils.py             |  68 ++++---------
 tests/autojump_data_test.py       |  52 ++++++++++
 tests/autojump_path_match_test.py |  45 +++++++++
 tests/autojump_test.py            |   6 ++
 tox.ini                           |   3 +-
 8 files changed, 303 insertions(+), 219 deletions(-)
 create mode 100644 bin/autojump_path_match.py
 create mode 100644 tests/autojump_path_match_test.py

diff --git a/bin/autojump b/bin/autojump
index 5595d14..51b92eb 100755
--- a/bin/autojump
+++ b/bin/autojump
@@ -21,13 +21,11 @@
 
 from __future__ import print_function
 
-from difflib import SequenceMatcher
 from itertools import chain
 from math import sqrt
 from operator import attrgetter
 from operator import itemgetter
 import os
-import re
 import sys
 
 if sys.version_info[0] == 3:
@@ -48,7 +46,6 @@ from autojump_data import save
 from autojump_utils import first
 from autojump_utils import get_pwd
 from autojump_utils import get_tab_entry_info
-from autojump_utils import has_uppercase
 from autojump_utils import is_autojump_sourced
 from autojump_utils import is_osx
 from autojump_utils import is_windows
@@ -59,9 +56,9 @@ from autojump_utils import print_tab_menu
 from autojump_utils import sanitize
 from autojump_utils import take
 from autojump_utils import unico
+from autojump_path_match import find_matches
 
 VERSION = '22.3.0'
-FUZZY_MATCH_THRESHOLD = 0.6
 TAB_ENTRIES_COUNT = 9
 TAB_SEPARATOR = '__'
 
@@ -151,47 +148,6 @@ def decrease_path(data, path, weight=15):
     return data, Entry(path, data[path])
 
 
-def detect_smartcase(needles):
-    """
-    If any needles contain an uppercase letter then use case sensitive
-    searching. Otherwise use case insensitive searching.
-    """
-    return not any(imap(has_uppercase, needles))
-
-
-def find_matches(entries, needles, check_entries=True):
-    """Return an iterator to matching entries."""
-    # TODO(wting|2014-02-24): replace assertion with unit test
-    assert isinstance(needles, list), "Needles must be a list."
-    ignore_case = detect_smartcase(needles)
-
-    try:
-        pwd = os.getcwdu()
-    except OSError:
-        pwd = None
-
-    # using closure to prevent constantly hitting hdd
-    def is_cwd(entry):
-        return os.path.realpath(entry.path) == pwd
-
-    if check_entries:
-        path_exists = lambda entry: os.path.exists(entry.path)
-    else:
-        path_exists = lambda _: True
-
-    data = sorted(
-        entries,
-        key=attrgetter('weight'),
-        reverse=True)
-
-    return ifilter(
-        lambda entry: not is_cwd(entry) and path_exists(entry),
-        chain(
-            match_consecutive(needles, data, ignore_case),
-            match_fuzzy(needles, data, ignore_case),
-            match_anywhere(needles, data, ignore_case)))
-
-
 def handle_tab_completion(needle, entries):
     tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR)
 
@@ -221,111 +177,6 @@ def handle_tab_completion(needle, entries):
             TAB_SEPARATOR)
 
 
-def match_anywhere(needles, haystack, ignore_case=False):
-    """
-    Matches needles anywhere in the path as long as they're in the same (but
-    not necessary consecutive) order.
-
-    For example:
-        needles = ['foo', 'baz']
-        regex needle = r'.*foo.*baz.*'
-        haystack = [
-            (path="/foo/bar/baz", weight=10),
-            (path="/baz/foo/bar", weight=10),
-            (path="/foo/baz", weight=10)]
-
-        result = [
-            (path="/moo/foo/baz", weight=10),
-            (path="/foo/baz", weight=10)]
-    """
-    regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
-    regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
-    found = lambda haystack: re.search(
-        regex_needle,
-        haystack.path,
-        flags=regex_flags)
-    return ifilter(found, haystack)
-
-
-def match_consecutive(needles, haystack, ignore_case=False):
-    """
-    Matches consecutive needles at the end of a path.
-
-    For example:
-        needles = ['foo', 'baz']
-        haystack = [
-            (path="/foo/bar/baz", weight=10),
-            (path="/foo/baz/moo", weight=10),
-            (path="/moo/foo/baz", weight=10),
-            (path="/foo/baz", weight=10)]
-
-        regex_needle = re.compile(r'''
-            foo     # needle #1
-            [^/]*   # all characters except os.sep zero or more times
-            /       # os.sep
-            [^/]*   # all characters except os.sep zero or more times
-            baz     # needle #2
-            [^/]*   # all characters except os.sep zero or more times
-            $       # end of string
-            ''')
-
-        result = [
-            (path="/moo/foo/baz", weight=10),
-            (path="/foo/baz", weight=10)]
-    """
-    # The normal \\ separator needs to be escaped again for use in regex.
-    sep = '\\\\' if is_windows() else os.sep
-    regex_no_sep = '[^' + sep + ']*'
-    regex_no_sep_end = regex_no_sep + '$'
-    regex_one_sep = regex_no_sep + sep + regex_no_sep
-    # can't use compiled regex because of flags
-    regex_needle = regex_one_sep.join(needles).replace('\\', '\\\\') + regex_no_sep_end  # noqa
-    regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
-    found = lambda entry: re.search(
-        regex_needle,
-        entry.path,
-        flags=regex_flags)
-    return ifilter(found, haystack)
-
-
-def match_fuzzy(needles, haystack, ignore_case=False):
-    """
-    Performs an approximate match with the last needle against the end of
-    every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD).
-
-    For example:
-        needles = ['foo', 'bar']
-        haystack = [
-            (path="/foo/bar/baz", weight=11),
-            (path="/foo/baz/moo", weight=10),
-            (path="/moo/foo/baz", weight=10),
-            (path="/foo/baz", weight=10),
-            (path="/foo/bar", weight=10)]
-
-    result = [
-            (path="/foo/bar/baz", weight=11),
-            (path="/moo/foo/baz", weight=10),
-            (path="/foo/baz", weight=10),
-            (path="/foo/bar", weight=10)]
-
-    This is a weak heuristic and used as a last resort to find matches.
-    """
-    end_dir = lambda path: last(os.path.split(path))
-    if ignore_case:
-        needle = last(needles).lower()
-        match_percent = lambda entry: SequenceMatcher(
-            a=needle,
-            b=end_dir(entry.path.lower())).ratio()
-    else:
-        needle = last(needles)
-        match_percent = lambda entry: SequenceMatcher(
-            a=needle,
-            b=end_dir(entry.path)).ratio()
-    meets_threshold = lambda entry: match_percent(entry) >= \
-        FUZZY_MATCH_THRESHOLD
-    return ifilter(meets_threshold, haystack)
-
-
 def purge_missing_paths(entries):
     """Remove non-existent paths from a list of entries."""
     exists = lambda entry: os.path.exists(entry.path)
diff --git a/bin/autojump_data.py b/bin/autojump_data.py
index 33987b3..72d7549 100644
--- a/bin/autojump_data.py
+++ b/bin/autojump_data.py
@@ -34,18 +34,23 @@ def dictify(entries):
         key = path
         value = weight
     """
-    result = {}
-    for entry in entries:
-        result[entry.path] = entry.weight
-    return result
+    return dict((e.path, e.weight) for e in entries)
 
 
 def entriefy(data):
     """Converts a dictionary into an iterator of entries."""
-    convert = lambda tup: Entry(*tup)
-    if is_python3():
-        return map(convert, data.items())
-    return imap(convert, data.iteritems())
+    iteritems = data.items if is_python3() else data.iteritems
+    return (Entry(k, v) for k, v in iteritems())
+
+
+def parse_data(data):
+    # example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user']
+    parsed = (l.strip().split('\t') for l in data)
+    valid = (x for x in parsed if len(x) == 2)
+    return dict(
+        (path, float(weight))
+        for weight, path in valid
+    )
 
 
 def load(config):
@@ -62,23 +67,12 @@ def load(config):
     if not os.path.exists(config['data_path']):
         return {}
 
-    # example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user']
-    parse = lambda line: line.strip().split('\t')
-
-    correct_length = lambda x: len(x) == 2
-
-    # example: ['10.0', u'/home/user'] -> (u'/home/user', 10.0)
-    tupleize = lambda x: (x[1], float(x[0]))
-
     try:
         with open(
                 config['data_path'],
                 'r', encoding='utf-8',
                 errors='replace') as f:
-            return dict(
-                imap(
-                    tupleize,
-                    ifilter(correct_length, imap(parse, f))))
+            return parse_data(f)
     except (IOError, EOFError):
         return load_backup(config)
 
diff --git a/bin/autojump_path_match.py b/bin/autojump_path_match.py
new file mode 100644
index 0000000..35955c8
--- /dev/null
+++ b/bin/autojump_path_match.py
@@ -0,0 +1,163 @@
+import os
+import re
+import sys
+from itertools import chain
+from operator import attrgetter
+from difflib import SequenceMatcher
+
+from autojump_utils import (
+    last,
+    has_uppercase,
+)
+
+if sys.version_info[0] == 3:
+    ifilter = filter
+    imap = map
+    os.getcwdu = os.getcwd
+else:
+    from itertools import ifilter
+    from itertools import imap
+
+FUZZY_MATCH_THRESHOLD = 0.6
+
+
+def find_matches(entries, needles, check_entries=True):
+    """Return an iterator to matching entries."""
+    # TODO(wting|2014-02-24): replace assertion with unit test
+    assert isinstance(needles, list), "Needles must be a list."
+    ignore_case = detect_smartcase(needles)
+
+    try:
+        pwd = os.getcwdu()
+    except OSError:
+        pwd = None
+
+    # using closure to prevent constantly hitting hdd
+    def is_cwd(entry):
+        return os.path.realpath(entry.path) == pwd
+
+    if check_entries:
+        path_exists = lambda entry: os.path.exists(entry.path)
+    else:
+        path_exists = lambda _: True
+
+    data = sorted(
+        entries,
+        key=attrgetter('weight'),
+        reverse=True)
+
+    return ifilter(
+        lambda entry: not is_cwd(entry) and path_exists(entry),
+        chain(
+            match_consecutive(needles, data, ignore_case),
+            match_fuzzy(needles, data, ignore_case),
+            match_anywhere(needles, data, ignore_case)))
+
+
+def match_anywhere(needles, haystack, ignore_case=False):
+    """
+    Matches needles anywhere in the path as long as they're in the same (but
+    not necessary consecutive) order.
+
+    For example:
+        needles = ['foo', 'baz']
+        regex needle = r'.*foo.*baz.*'
+        haystack = [
+            (path="/foo/bar/baz", weight=10),
+            (path="/baz/foo/bar", weight=10),
+            (path="/foo/baz", weight=10)]
+
+        result = [
+            (path="/moo/foo/baz", weight=10),
+            (path="/foo/baz", weight=10)]
+    """
+    regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
+    regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
+    found = lambda haystack: re.search(
+        regex_needle,
+        haystack.path,
+        flags=regex_flags)
+    return ifilter(found, haystack)
+
+
+def match_consecutive(needles, haystack, ignore_case=False):
+    """
+    Matches consecutive needles at the end of a path.
+
+    For example:
+        needles = ['foo', 'baz']
+        haystack = [
+            (path="/foo/bar/baz", weight=10),
+            (path="/foo/baz/moo", weight=10),
+            (path="/moo/foo/baz", weight=10),
+            (path="/foo/baz", weight=10)]
+
+        regex_needle = re.compile(r'''
+            foo     # needle #1
+            [^/]*   # all characters except os.sep zero or more times
+            /       # os.sep
+            [^/]*   # all characters except os.sep zero or more times
+            baz     # needle #2
+            [^/]*   # all characters except os.sep zero or more times
+            $       # end of string
+            ''')
+
+        result = [
+            (path="/moo/foo/baz", weight=10),
+            (path="/foo/baz", weight=10)]
+    """
+    reversed_needles = list(reversed(needles))
+    for entry in haystack:
+        path_segments = entry.path.split(os.sep)
+        for target, needle_part in zip(
+            reversed(path_segments), reversed_needles
+        ):
+            if ignore_case:
+                needle_part = needle_part.lower()
+                target = target.lower()
+            if needle_part not in target:
+                break
+        else:
+            yield entry
+
+
+def match_fuzzy(needles, haystack, ignore_case=False):
+    """
+    Performs an approximate match with the last needle against the end of
+    every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD).
+
+    For example:
+        needles = ['foo', 'bar']
+        haystack = [
+            (path="/foo/bar/baz", weight=11),
+            (path="/foo/baz/moo", weight=10),
+            (path="/moo/foo/baz", weight=10),
+            (path="/foo/baz", weight=10),
+            (path="/foo/bar", weight=10)]
+
+    result = [
+            (path="/foo/bar/baz", weight=11),
+            (path="/moo/foo/baz", weight=10),
+            (path="/foo/baz", weight=10),
+            (path="/foo/bar", weight=10)]
+
+    This is a weak heuristic and used as a last resort to find matches.
+    """
+    needle = last(needles)
+    if ignore_case:
+        needle = needle.lower()
+
+    for entry in haystack:
+        _, tail = os.path.split(entry.path)
+        path = tail.lower() if ignore_case else tail
+        matcher = SequenceMatcher(a=needle, b=path)
+        if matcher.ratio() >= FUZZY_MATCH_THRESHOLD:
+            yield entry
+
+
+def detect_smartcase(needles):
+    """
+    If any needles contain an uppercase letter then use case sensitive
+    searching. Otherwise use case insensitive searching.
+    """
+    return not any(imap(has_uppercase, needles))
diff --git a/bin/autojump_utils.py b/bin/autojump_utils.py
index 329e721..894af3c 100644
--- a/bin/autojump_utils.py
+++ b/bin/autojump_utils.py
@@ -13,10 +13,7 @@ import sys
 import unicodedata
 
 if sys.version_info[0] == 3:
-    imap = map
     os.getcwdu = os.getcwd
-else:
-    from itertools import imap
 
 
 def create_dir(path):
@@ -37,12 +34,7 @@ def encode_local(string):
 
 def first(xs):
     it = iter(xs)
-    try:
-        if is_python3():
-            return it.__next__()
-        return it.next()
-    except StopIteration:
-        return None
+    return next(it, None)
 
 
 def get_tab_entry_info(entry, separator):
@@ -51,23 +43,16 @@ def get_tab_entry_info(entry, separator):
 
         [needle]__[index]__[path]
     """
-    needle, index, path = None, None, None
-
-    match_needle = re.search(r'(.*?)' + separator, entry)
-    match_index = re.search(separator + r'([0-9]{1})', entry)
-    match_path = re.search(
-        separator + r'[0-9]{1}' + separator + r'(.*)',
-        entry)
-
-    if match_needle:
-        needle = match_needle.group(1)
-
-    if match_index:
-        index = int(match_index.group(1))
-
-    if match_path:
-        path = match_path.group(1)
-
+    needle = index = path = None
+    parts = entry.split('__', 2)
+    if len(parts) > 1:
+        needle = parts[0]
+        try:
+            index = int(parts[1])
+        except ValueError:
+            index = None
+        if len(parts) > 2:
+            path = parts[2]
     return needle, index, path
 
 
@@ -114,17 +99,10 @@ def is_windows():
 
 
 def last(xs):
-    it = iter(xs)
-    tmp = None
-    try:
-        if is_python3():
-            while True:
-                tmp = it.__next__()
-        else:
-            while True:
-                tmp = it.next()
-    except StopIteration:
-        return tmp
+    v = None
+    for i in iter(xs):
+        v = i
+    return v
 
 
 def move_file(src, dst):
@@ -169,21 +147,15 @@ def print_tab_menu(needle, tab_entries, separator):
 
 def sanitize(directories):
     # edge case to allow '/' as a valid path
-    clean = lambda x: unico(x) if x == os.sep else unico(x).rstrip(os.sep)
-    return list(imap(clean, directories))
+    def clean(x):
+        return unico(x) if x == os.sep else unico(x).rstrip(os.sep)
+    return [clean(d) for d in directories]
 
 
 def second(xs):
     it = iter(xs)
-    try:
-        if is_python2():
-            it.next()
-            return it.next()
-        elif is_python3():
-            next(it)
-            return next(it)
-    except StopIteration:
-        return None
+    next(it, None)
+    return next(it, None)
 
 
 def surround_quotes(string):
diff --git a/tests/autojump_data_test.py b/tests/autojump_data_test.py
index e69de29..e9fee7d 100644
--- a/tests/autojump_data_test.py
+++ b/tests/autojump_data_test.py
@@ -0,0 +1,52 @@
+import os
+import sys
+
+sys.path.append(os.path.join(os.getcwd(), 'bin'))
+from autojump_data import (
+    entriefy,
+    dictify,
+    parse_data,
+    Entry,
+)
+
+
+def test_entriefy():
+    assert list(entriefy({})) == []
+    data = {
+        "path1": 10,
+        "path2": 12
+    }
+    r = entriefy(data)
+    assert set(r) == set([Entry("path1", 10), Entry("path2", 12)])
+
+
+def test_dictify():
+    assert dictify([]) == {}
+    entries = [Entry("path1", 10), Entry("path2", 12)]
+    assert dictify(entries) == {
+        "path1": 10,
+        "path2": 12
+    }
+
+
+class TestParseData:
+
+    def test_valid_data_should_be_parsed(self):
+        data = [
+            "10.0\tpath_a",
+            "12.3\tpath_a/path_b"
+        ]
+        assert parse_data(data) == {
+            "path_a": 10.0,
+            "path_a/path_b": 12.3
+        }
+
+    def test_invalid_data_should_be_ignored(self):
+        data = [
+            "10.0\tpath_a\tnada",
+            "12.3",
+            "10.0\tpath_a",
+        ]
+        assert parse_data(data) == {
+            "path_a": 10.0
+        }
diff --git a/tests/autojump_path_match_test.py b/tests/autojump_path_match_test.py
new file mode 100644
index 0000000..a8c9ad1
--- /dev/null
+++ b/tests/autojump_path_match_test.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+
+sys.path.append(os.path.join(os.getcwd(), 'bin'))
+
+from autojump_data import Entry
+import autojump_path_match as m
+
+
+def test_match_fuzzy():
+    needles = ['foo', 'bar']
+    haystack = [
+        Entry("/foo/bar/baz", 11),
+        Entry("/foo/baz/moo", 10),
+        Entry("/moo/foo/baz", 10),
+    ]
+    result = list(m.match_fuzzy(needles, haystack))
+    assert result == [
+        Entry("/foo/bar/baz", 11),
+        Entry("/moo/foo/baz", 10),
+    ]
+
+
+def test_match_consecutive():
+    needles = ['foo', 'baz']
+    haystack = [
+        Entry("/foo/bar/baz", 10),
+        Entry("/foo/baz/moo", 10),
+        Entry("/moo/foo/Baz", 10),
+        Entry("/foo/bazar", 10),
+        Entry("/foo/xxbaz", 10)
+    ]
+    result = list(m.match_consecutive(needles, haystack))
+    assert result == [
+        Entry("/foo/bazar", 10),
+        Entry("/foo/xxbaz", 10)
+    ]
+    result = list(m.match_consecutive(needles, haystack, ignore_case=True))
+    assert result == [
+        Entry("/moo/foo/Baz", 10),
+        Entry("/foo/bazar", 10),
+        Entry("/foo/xxbaz", 10)
+    ]
diff --git a/tests/autojump_test.py b/tests/autojump_test.py
index e69de29..437f3b2 100644
--- a/tests/autojump_test.py
+++ b/tests/autojump_test.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+
+sys.path.append(os.path.join(os.getcwd(), 'bin'))
diff --git a/tox.ini b/tox.ini
index 971d67a..4b0ee9b 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,8 @@ envlist =
 	py27,
 	py32,
 	py33,
-	py34
+	py34,
+	py35
 # ignore missing setup.py
 skipsdist = True