1
0
mirror of https://github.com/wting/autojump synced 2024-10-27 20:34:07 +00:00

Refactor code and add unit tests

This commit is contained in:
satoru 2016-04-18 10:40:10 +08:00
parent 4230bbe2d1
commit 723d7a69cf
8 changed files with 303 additions and 219 deletions

View File

@ -21,13 +21,11 @@
from __future__ import print_function from __future__ import print_function
from difflib import SequenceMatcher
from itertools import chain from itertools import chain
from math import sqrt from math import sqrt
from operator import attrgetter from operator import attrgetter
from operator import itemgetter from operator import itemgetter
import os import os
import re
import sys import sys
if sys.version_info[0] == 3: if sys.version_info[0] == 3:
@ -48,7 +46,6 @@ from autojump_data import save
from autojump_utils import first from autojump_utils import first
from autojump_utils import get_pwd from autojump_utils import get_pwd
from autojump_utils import get_tab_entry_info from autojump_utils import get_tab_entry_info
from autojump_utils import has_uppercase
from autojump_utils import is_autojump_sourced from autojump_utils import is_autojump_sourced
from autojump_utils import is_osx from autojump_utils import is_osx
from autojump_utils import is_windows from autojump_utils import is_windows
@ -59,9 +56,9 @@ from autojump_utils import print_tab_menu
from autojump_utils import sanitize from autojump_utils import sanitize
from autojump_utils import take from autojump_utils import take
from autojump_utils import unico from autojump_utils import unico
from autojump_path_match import find_matches
VERSION = '22.3.0' VERSION = '22.3.0'
FUZZY_MATCH_THRESHOLD = 0.6
TAB_ENTRIES_COUNT = 9 TAB_ENTRIES_COUNT = 9
TAB_SEPARATOR = '__' TAB_SEPARATOR = '__'
@ -151,47 +148,6 @@ def decrease_path(data, path, weight=15):
return data, Entry(path, data[path]) return data, Entry(path, data[path])
def detect_smartcase(needles):
"""
If any needles contain an uppercase letter then use case sensitive
searching. Otherwise use case insensitive searching.
"""
return not any(imap(has_uppercase, needles))
def find_matches(entries, needles, check_entries=True):
"""Return an iterator to matching entries."""
# TODO(wting|2014-02-24): replace assertion with unit test
assert isinstance(needles, list), "Needles must be a list."
ignore_case = detect_smartcase(needles)
try:
pwd = os.getcwdu()
except OSError:
pwd = None
# using closure to prevent constantly hitting hdd
def is_cwd(entry):
return os.path.realpath(entry.path) == pwd
if check_entries:
path_exists = lambda entry: os.path.exists(entry.path)
else:
path_exists = lambda _: True
data = sorted(
entries,
key=attrgetter('weight'),
reverse=True)
return ifilter(
lambda entry: not is_cwd(entry) and path_exists(entry),
chain(
match_consecutive(needles, data, ignore_case),
match_fuzzy(needles, data, ignore_case),
match_anywhere(needles, data, ignore_case)))
def handle_tab_completion(needle, entries): def handle_tab_completion(needle, entries):
tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR) tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR)
@ -221,111 +177,6 @@ def handle_tab_completion(needle, entries):
TAB_SEPARATOR) TAB_SEPARATOR)
def match_anywhere(needles, haystack, ignore_case=False):
"""
Matches needles anywhere in the path as long as they're in the same (but
not necessary consecutive) order.
For example:
needles = ['foo', 'baz']
regex needle = r'.*foo.*baz.*'
haystack = [
(path="/foo/bar/baz", weight=10),
(path="/baz/foo/bar", weight=10),
(path="/foo/baz", weight=10)]
result = [
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
"""
regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
found = lambda haystack: re.search(
regex_needle,
haystack.path,
flags=regex_flags)
return ifilter(found, haystack)
def match_consecutive(needles, haystack, ignore_case=False):
"""
Matches consecutive needles at the end of a path.
For example:
needles = ['foo', 'baz']
haystack = [
(path="/foo/bar/baz", weight=10),
(path="/foo/baz/moo", weight=10),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
regex_needle = re.compile(r'''
foo # needle #1
[^/]* # all characters except os.sep zero or more times
/ # os.sep
[^/]* # all characters except os.sep zero or more times
baz # needle #2
[^/]* # all characters except os.sep zero or more times
$ # end of string
''')
result = [
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
"""
# The normal \\ separator needs to be escaped again for use in regex.
sep = '\\\\' if is_windows() else os.sep
regex_no_sep = '[^' + sep + ']*'
regex_no_sep_end = regex_no_sep + '$'
regex_one_sep = regex_no_sep + sep + regex_no_sep
# can't use compiled regex because of flags
regex_needle = regex_one_sep.join(needles).replace('\\', '\\\\') + regex_no_sep_end # noqa
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
found = lambda entry: re.search(
regex_needle,
entry.path,
flags=regex_flags)
return ifilter(found, haystack)
def match_fuzzy(needles, haystack, ignore_case=False):
"""
Performs an approximate match with the last needle against the end of
every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD).
For example:
needles = ['foo', 'bar']
haystack = [
(path="/foo/bar/baz", weight=11),
(path="/foo/baz/moo", weight=10),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10),
(path="/foo/bar", weight=10)]
result = [
(path="/foo/bar/baz", weight=11),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10),
(path="/foo/bar", weight=10)]
This is a weak heuristic and used as a last resort to find matches.
"""
end_dir = lambda path: last(os.path.split(path))
if ignore_case:
needle = last(needles).lower()
match_percent = lambda entry: SequenceMatcher(
a=needle,
b=end_dir(entry.path.lower())).ratio()
else:
needle = last(needles)
match_percent = lambda entry: SequenceMatcher(
a=needle,
b=end_dir(entry.path)).ratio()
meets_threshold = lambda entry: match_percent(entry) >= \
FUZZY_MATCH_THRESHOLD
return ifilter(meets_threshold, haystack)
def purge_missing_paths(entries): def purge_missing_paths(entries):
"""Remove non-existent paths from a list of entries.""" """Remove non-existent paths from a list of entries."""
exists = lambda entry: os.path.exists(entry.path) exists = lambda entry: os.path.exists(entry.path)

View File

@ -34,18 +34,23 @@ def dictify(entries):
key = path key = path
value = weight value = weight
""" """
result = {} return dict((e.path, e.weight) for e in entries)
for entry in entries:
result[entry.path] = entry.weight
return result
def entriefy(data): def entriefy(data):
"""Converts a dictionary into an iterator of entries.""" """Converts a dictionary into an iterator of entries."""
convert = lambda tup: Entry(*tup) iteritems = data.items if is_python3() else data.iteritems
if is_python3(): return (Entry(k, v) for k, v in iteritems())
return map(convert, data.items())
return imap(convert, data.iteritems())
def parse_data(data):
# example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user']
parsed = (l.strip().split('\t') for l in data)
valid = (x for x in parsed if len(x) == 2)
return dict(
(path, float(weight))
for weight, path in valid
)
def load(config): def load(config):
@ -62,23 +67,12 @@ def load(config):
if not os.path.exists(config['data_path']): if not os.path.exists(config['data_path']):
return {} return {}
# example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user']
parse = lambda line: line.strip().split('\t')
correct_length = lambda x: len(x) == 2
# example: ['10.0', u'/home/user'] -> (u'/home/user', 10.0)
tupleize = lambda x: (x[1], float(x[0]))
try: try:
with open( with open(
config['data_path'], config['data_path'],
'r', encoding='utf-8', 'r', encoding='utf-8',
errors='replace') as f: errors='replace') as f:
return dict( return parse_data(f)
imap(
tupleize,
ifilter(correct_length, imap(parse, f))))
except (IOError, EOFError): except (IOError, EOFError):
return load_backup(config) return load_backup(config)

163
bin/autojump_path_match.py Normal file
View File

@ -0,0 +1,163 @@
import os
import re
import sys
from itertools import chain
from operator import attrgetter
from difflib import SequenceMatcher
from autojump_utils import (
last,
has_uppercase,
)
if sys.version_info[0] == 3:
ifilter = filter
imap = map
os.getcwdu = os.getcwd
else:
from itertools import ifilter
from itertools import imap
FUZZY_MATCH_THRESHOLD = 0.6
def find_matches(entries, needles, check_entries=True):
"""Return an iterator to matching entries."""
# TODO(wting|2014-02-24): replace assertion with unit test
assert isinstance(needles, list), "Needles must be a list."
ignore_case = detect_smartcase(needles)
try:
pwd = os.getcwdu()
except OSError:
pwd = None
# using closure to prevent constantly hitting hdd
def is_cwd(entry):
return os.path.realpath(entry.path) == pwd
if check_entries:
path_exists = lambda entry: os.path.exists(entry.path)
else:
path_exists = lambda _: True
data = sorted(
entries,
key=attrgetter('weight'),
reverse=True)
return ifilter(
lambda entry: not is_cwd(entry) and path_exists(entry),
chain(
match_consecutive(needles, data, ignore_case),
match_fuzzy(needles, data, ignore_case),
match_anywhere(needles, data, ignore_case)))
def match_anywhere(needles, haystack, ignore_case=False):
"""
Matches needles anywhere in the path as long as they're in the same (but
not necessary consecutive) order.
For example:
needles = ['foo', 'baz']
regex needle = r'.*foo.*baz.*'
haystack = [
(path="/foo/bar/baz", weight=10),
(path="/baz/foo/bar", weight=10),
(path="/foo/baz", weight=10)]
result = [
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
"""
regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
found = lambda haystack: re.search(
regex_needle,
haystack.path,
flags=regex_flags)
return ifilter(found, haystack)
def match_consecutive(needles, haystack, ignore_case=False):
"""
Matches consecutive needles at the end of a path.
For example:
needles = ['foo', 'baz']
haystack = [
(path="/foo/bar/baz", weight=10),
(path="/foo/baz/moo", weight=10),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
regex_needle = re.compile(r'''
foo # needle #1
[^/]* # all characters except os.sep zero or more times
/ # os.sep
[^/]* # all characters except os.sep zero or more times
baz # needle #2
[^/]* # all characters except os.sep zero or more times
$ # end of string
''')
result = [
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
"""
reversed_needles = list(reversed(needles))
for entry in haystack:
path_segments = entry.path.split(os.sep)
for target, needle_part in zip(
reversed(path_segments), reversed_needles
):
if ignore_case:
needle_part = needle_part.lower()
target = target.lower()
if needle_part not in target:
break
else:
yield entry
def match_fuzzy(needles, haystack, ignore_case=False):
"""
Performs an approximate match with the last needle against the end of
every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD).
For example:
needles = ['foo', 'bar']
haystack = [
(path="/foo/bar/baz", weight=11),
(path="/foo/baz/moo", weight=10),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10),
(path="/foo/bar", weight=10)]
result = [
(path="/foo/bar/baz", weight=11),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10),
(path="/foo/bar", weight=10)]
This is a weak heuristic and used as a last resort to find matches.
"""
needle = last(needles)
if ignore_case:
needle = needle.lower()
for entry in haystack:
_, tail = os.path.split(entry.path)
path = tail.lower() if ignore_case else tail
matcher = SequenceMatcher(a=needle, b=path)
if matcher.ratio() >= FUZZY_MATCH_THRESHOLD:
yield entry
def detect_smartcase(needles):
"""
If any needles contain an uppercase letter then use case sensitive
searching. Otherwise use case insensitive searching.
"""
return not any(imap(has_uppercase, needles))

View File

@ -13,10 +13,7 @@ import sys
import unicodedata import unicodedata
if sys.version_info[0] == 3: if sys.version_info[0] == 3:
imap = map
os.getcwdu = os.getcwd os.getcwdu = os.getcwd
else:
from itertools import imap
def create_dir(path): def create_dir(path):
@ -37,12 +34,7 @@ def encode_local(string):
def first(xs): def first(xs):
it = iter(xs) it = iter(xs)
try: return next(it, None)
if is_python3():
return it.__next__()
return it.next()
except StopIteration:
return None
def get_tab_entry_info(entry, separator): def get_tab_entry_info(entry, separator):
@ -51,23 +43,16 @@ def get_tab_entry_info(entry, separator):
[needle]__[index]__[path] [needle]__[index]__[path]
""" """
needle, index, path = None, None, None needle = index = path = None
parts = entry.split('__', 2)
match_needle = re.search(r'(.*?)' + separator, entry) if len(parts) > 1:
match_index = re.search(separator + r'([0-9]{1})', entry) needle = parts[0]
match_path = re.search( try:
separator + r'[0-9]{1}' + separator + r'(.*)', index = int(parts[1])
entry) except ValueError:
index = None
if match_needle: if len(parts) > 2:
needle = match_needle.group(1) path = parts[2]
if match_index:
index = int(match_index.group(1))
if match_path:
path = match_path.group(1)
return needle, index, path return needle, index, path
@ -114,17 +99,10 @@ def is_windows():
def last(xs): def last(xs):
it = iter(xs) v = None
tmp = None for i in iter(xs):
try: v = i
if is_python3(): return v
while True:
tmp = it.__next__()
else:
while True:
tmp = it.next()
except StopIteration:
return tmp
def move_file(src, dst): def move_file(src, dst):
@ -169,21 +147,15 @@ def print_tab_menu(needle, tab_entries, separator):
def sanitize(directories): def sanitize(directories):
# edge case to allow '/' as a valid path # edge case to allow '/' as a valid path
clean = lambda x: unico(x) if x == os.sep else unico(x).rstrip(os.sep) def clean(x):
return list(imap(clean, directories)) return unico(x) if x == os.sep else unico(x).rstrip(os.sep)
return [clean(d) for d in directories]
def second(xs): def second(xs):
it = iter(xs) it = iter(xs)
try: next(it, None)
if is_python2(): return next(it, None)
it.next()
return it.next()
elif is_python3():
next(it)
return next(it)
except StopIteration:
return None
def surround_quotes(string): def surround_quotes(string):

View File

@ -0,0 +1,52 @@
import os
import sys
sys.path.append(os.path.join(os.getcwd(), 'bin'))
from autojump_data import (
entriefy,
dictify,
parse_data,
Entry,
)
def test_entriefy():
assert list(entriefy({})) == []
data = {
"path1": 10,
"path2": 12
}
r = entriefy(data)
assert set(r) == set([Entry("path1", 10), Entry("path2", 12)])
def test_dictify():
assert dictify([]) == {}
entries = [Entry("path1", 10), Entry("path2", 12)]
assert dictify(entries) == {
"path1": 10,
"path2": 12
}
class TestParseData:
def test_valid_data_should_be_parsed(self):
data = [
"10.0\tpath_a",
"12.3\tpath_a/path_b"
]
assert parse_data(data) == {
"path_a": 10.0,
"path_a/path_b": 12.3
}
def test_invalid_data_should_be_ignored(self):
data = [
"10.0\tpath_a\tnada",
"12.3",
"10.0\tpath_a",
]
assert parse_data(data) == {
"path_a": 10.0
}

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
import os
import sys
sys.path.append(os.path.join(os.getcwd(), 'bin'))
from autojump_data import Entry
import autojump_path_match as m
def test_match_fuzzy():
needles = ['foo', 'bar']
haystack = [
Entry("/foo/bar/baz", 11),
Entry("/foo/baz/moo", 10),
Entry("/moo/foo/baz", 10),
]
result = list(m.match_fuzzy(needles, haystack))
assert result == [
Entry("/foo/bar/baz", 11),
Entry("/moo/foo/baz", 10),
]
def test_match_consecutive():
needles = ['foo', 'baz']
haystack = [
Entry("/foo/bar/baz", 10),
Entry("/foo/baz/moo", 10),
Entry("/moo/foo/Baz", 10),
Entry("/foo/bazar", 10),
Entry("/foo/xxbaz", 10)
]
result = list(m.match_consecutive(needles, haystack))
assert result == [
Entry("/foo/bazar", 10),
Entry("/foo/xxbaz", 10)
]
result = list(m.match_consecutive(needles, haystack, ignore_case=True))
assert result == [
Entry("/moo/foo/Baz", 10),
Entry("/foo/bazar", 10),
Entry("/foo/xxbaz", 10)
]

View File

@ -0,0 +1,6 @@
# -*- coding: utf-8 -*-
import os
import sys
sys.path.append(os.path.join(os.getcwd(), 'bin'))

View File

@ -4,7 +4,8 @@ envlist =
py27, py27,
py32, py32,
py33, py33,
py34 py34,
py35
# ignore missing setup.py # ignore missing setup.py
skipsdist = True skipsdist = True