1
0
mirror of https://github.com/wting/autojump synced 2024-10-27 20:34:07 +00:00

Refactor code and add unit tests

This commit is contained in:
satoru 2016-04-18 10:40:10 +08:00
parent 4230bbe2d1
commit 723d7a69cf
8 changed files with 303 additions and 219 deletions

View File

@ -21,13 +21,11 @@
from __future__ import print_function
from difflib import SequenceMatcher
from itertools import chain
from math import sqrt
from operator import attrgetter
from operator import itemgetter
import os
import re
import sys
if sys.version_info[0] == 3:
@ -48,7 +46,6 @@ from autojump_data import save
from autojump_utils import first
from autojump_utils import get_pwd
from autojump_utils import get_tab_entry_info
from autojump_utils import has_uppercase
from autojump_utils import is_autojump_sourced
from autojump_utils import is_osx
from autojump_utils import is_windows
@ -59,9 +56,9 @@ from autojump_utils import print_tab_menu
from autojump_utils import sanitize
from autojump_utils import take
from autojump_utils import unico
from autojump_path_match import find_matches
VERSION = '22.3.0'
FUZZY_MATCH_THRESHOLD = 0.6
TAB_ENTRIES_COUNT = 9
TAB_SEPARATOR = '__'
@ -151,47 +148,6 @@ def decrease_path(data, path, weight=15):
return data, Entry(path, data[path])
def detect_smartcase(needles):
"""
If any needles contain an uppercase letter then use case sensitive
searching. Otherwise use case insensitive searching.
"""
return not any(imap(has_uppercase, needles))
def find_matches(entries, needles, check_entries=True):
"""Return an iterator to matching entries."""
# TODO(wting|2014-02-24): replace assertion with unit test
assert isinstance(needles, list), "Needles must be a list."
ignore_case = detect_smartcase(needles)
try:
pwd = os.getcwdu()
except OSError:
pwd = None
# using closure to prevent constantly hitting hdd
def is_cwd(entry):
return os.path.realpath(entry.path) == pwd
if check_entries:
path_exists = lambda entry: os.path.exists(entry.path)
else:
path_exists = lambda _: True
data = sorted(
entries,
key=attrgetter('weight'),
reverse=True)
return ifilter(
lambda entry: not is_cwd(entry) and path_exists(entry),
chain(
match_consecutive(needles, data, ignore_case),
match_fuzzy(needles, data, ignore_case),
match_anywhere(needles, data, ignore_case)))
def handle_tab_completion(needle, entries):
tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR)
@ -221,111 +177,6 @@ def handle_tab_completion(needle, entries):
TAB_SEPARATOR)
def match_anywhere(needles, haystack, ignore_case=False):
"""
Matches needles anywhere in the path as long as they're in the same (but
not necessary consecutive) order.
For example:
needles = ['foo', 'baz']
regex needle = r'.*foo.*baz.*'
haystack = [
(path="/foo/bar/baz", weight=10),
(path="/baz/foo/bar", weight=10),
(path="/foo/baz", weight=10)]
result = [
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
"""
regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
found = lambda haystack: re.search(
regex_needle,
haystack.path,
flags=regex_flags)
return ifilter(found, haystack)
def match_consecutive(needles, haystack, ignore_case=False):
"""
Matches consecutive needles at the end of a path.
For example:
needles = ['foo', 'baz']
haystack = [
(path="/foo/bar/baz", weight=10),
(path="/foo/baz/moo", weight=10),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
regex_needle = re.compile(r'''
foo # needle #1
[^/]* # all characters except os.sep zero or more times
/ # os.sep
[^/]* # all characters except os.sep zero or more times
baz # needle #2
[^/]* # all characters except os.sep zero or more times
$ # end of string
''')
result = [
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
"""
# The normal \\ separator needs to be escaped again for use in regex.
sep = '\\\\' if is_windows() else os.sep
regex_no_sep = '[^' + sep + ']*'
regex_no_sep_end = regex_no_sep + '$'
regex_one_sep = regex_no_sep + sep + regex_no_sep
# can't use compiled regex because of flags
regex_needle = regex_one_sep.join(needles).replace('\\', '\\\\') + regex_no_sep_end # noqa
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
found = lambda entry: re.search(
regex_needle,
entry.path,
flags=regex_flags)
return ifilter(found, haystack)
def match_fuzzy(needles, haystack, ignore_case=False):
"""
Performs an approximate match with the last needle against the end of
every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD).
For example:
needles = ['foo', 'bar']
haystack = [
(path="/foo/bar/baz", weight=11),
(path="/foo/baz/moo", weight=10),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10),
(path="/foo/bar", weight=10)]
result = [
(path="/foo/bar/baz", weight=11),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10),
(path="/foo/bar", weight=10)]
This is a weak heuristic and used as a last resort to find matches.
"""
end_dir = lambda path: last(os.path.split(path))
if ignore_case:
needle = last(needles).lower()
match_percent = lambda entry: SequenceMatcher(
a=needle,
b=end_dir(entry.path.lower())).ratio()
else:
needle = last(needles)
match_percent = lambda entry: SequenceMatcher(
a=needle,
b=end_dir(entry.path)).ratio()
meets_threshold = lambda entry: match_percent(entry) >= \
FUZZY_MATCH_THRESHOLD
return ifilter(meets_threshold, haystack)
def purge_missing_paths(entries):
"""Remove non-existent paths from a list of entries."""
exists = lambda entry: os.path.exists(entry.path)

View File

@ -34,18 +34,23 @@ def dictify(entries):
key = path
value = weight
"""
result = {}
for entry in entries:
result[entry.path] = entry.weight
return result
return dict((e.path, e.weight) for e in entries)
def entriefy(data):
"""Converts a dictionary into an iterator of entries."""
convert = lambda tup: Entry(*tup)
if is_python3():
return map(convert, data.items())
return imap(convert, data.iteritems())
iteritems = data.items if is_python3() else data.iteritems
return (Entry(k, v) for k, v in iteritems())
def parse_data(data):
# example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user']
parsed = (l.strip().split('\t') for l in data)
valid = (x for x in parsed if len(x) == 2)
return dict(
(path, float(weight))
for weight, path in valid
)
def load(config):
@ -62,23 +67,12 @@ def load(config):
if not os.path.exists(config['data_path']):
return {}
# example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user']
parse = lambda line: line.strip().split('\t')
correct_length = lambda x: len(x) == 2
# example: ['10.0', u'/home/user'] -> (u'/home/user', 10.0)
tupleize = lambda x: (x[1], float(x[0]))
try:
with open(
config['data_path'],
'r', encoding='utf-8',
errors='replace') as f:
return dict(
imap(
tupleize,
ifilter(correct_length, imap(parse, f))))
return parse_data(f)
except (IOError, EOFError):
return load_backup(config)

163
bin/autojump_path_match.py Normal file
View File

@ -0,0 +1,163 @@
import os
import re
import sys
from itertools import chain
from operator import attrgetter
from difflib import SequenceMatcher
from autojump_utils import (
last,
has_uppercase,
)
if sys.version_info[0] == 3:
ifilter = filter
imap = map
os.getcwdu = os.getcwd
else:
from itertools import ifilter
from itertools import imap
FUZZY_MATCH_THRESHOLD = 0.6
def find_matches(entries, needles, check_entries=True):
"""Return an iterator to matching entries."""
# TODO(wting|2014-02-24): replace assertion with unit test
assert isinstance(needles, list), "Needles must be a list."
ignore_case = detect_smartcase(needles)
try:
pwd = os.getcwdu()
except OSError:
pwd = None
# using closure to prevent constantly hitting hdd
def is_cwd(entry):
return os.path.realpath(entry.path) == pwd
if check_entries:
path_exists = lambda entry: os.path.exists(entry.path)
else:
path_exists = lambda _: True
data = sorted(
entries,
key=attrgetter('weight'),
reverse=True)
return ifilter(
lambda entry: not is_cwd(entry) and path_exists(entry),
chain(
match_consecutive(needles, data, ignore_case),
match_fuzzy(needles, data, ignore_case),
match_anywhere(needles, data, ignore_case)))
def match_anywhere(needles, haystack, ignore_case=False):
"""
Matches needles anywhere in the path as long as they're in the same (but
not necessary consecutive) order.
For example:
needles = ['foo', 'baz']
regex needle = r'.*foo.*baz.*'
haystack = [
(path="/foo/bar/baz", weight=10),
(path="/baz/foo/bar", weight=10),
(path="/foo/baz", weight=10)]
result = [
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
"""
regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
found = lambda haystack: re.search(
regex_needle,
haystack.path,
flags=regex_flags)
return ifilter(found, haystack)
def match_consecutive(needles, haystack, ignore_case=False):
"""
Matches consecutive needles at the end of a path.
For example:
needles = ['foo', 'baz']
haystack = [
(path="/foo/bar/baz", weight=10),
(path="/foo/baz/moo", weight=10),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
regex_needle = re.compile(r'''
foo # needle #1
[^/]* # all characters except os.sep zero or more times
/ # os.sep
[^/]* # all characters except os.sep zero or more times
baz # needle #2
[^/]* # all characters except os.sep zero or more times
$ # end of string
''')
result = [
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
"""
reversed_needles = list(reversed(needles))
for entry in haystack:
path_segments = entry.path.split(os.sep)
for target, needle_part in zip(
reversed(path_segments), reversed_needles
):
if ignore_case:
needle_part = needle_part.lower()
target = target.lower()
if needle_part not in target:
break
else:
yield entry
def match_fuzzy(needles, haystack, ignore_case=False):
"""
Performs an approximate match with the last needle against the end of
every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD).
For example:
needles = ['foo', 'bar']
haystack = [
(path="/foo/bar/baz", weight=11),
(path="/foo/baz/moo", weight=10),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10),
(path="/foo/bar", weight=10)]
result = [
(path="/foo/bar/baz", weight=11),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10),
(path="/foo/bar", weight=10)]
This is a weak heuristic and used as a last resort to find matches.
"""
needle = last(needles)
if ignore_case:
needle = needle.lower()
for entry in haystack:
_, tail = os.path.split(entry.path)
path = tail.lower() if ignore_case else tail
matcher = SequenceMatcher(a=needle, b=path)
if matcher.ratio() >= FUZZY_MATCH_THRESHOLD:
yield entry
def detect_smartcase(needles):
"""
If any needles contain an uppercase letter then use case sensitive
searching. Otherwise use case insensitive searching.
"""
return not any(imap(has_uppercase, needles))

View File

@ -13,10 +13,7 @@ import sys
import unicodedata
if sys.version_info[0] == 3:
imap = map
os.getcwdu = os.getcwd
else:
from itertools import imap
def create_dir(path):
@ -37,12 +34,7 @@ def encode_local(string):
def first(xs):
it = iter(xs)
try:
if is_python3():
return it.__next__()
return it.next()
except StopIteration:
return None
return next(it, None)
def get_tab_entry_info(entry, separator):
@ -51,23 +43,16 @@ def get_tab_entry_info(entry, separator):
[needle]__[index]__[path]
"""
needle, index, path = None, None, None
match_needle = re.search(r'(.*?)' + separator, entry)
match_index = re.search(separator + r'([0-9]{1})', entry)
match_path = re.search(
separator + r'[0-9]{1}' + separator + r'(.*)',
entry)
if match_needle:
needle = match_needle.group(1)
if match_index:
index = int(match_index.group(1))
if match_path:
path = match_path.group(1)
needle = index = path = None
parts = entry.split('__', 2)
if len(parts) > 1:
needle = parts[0]
try:
index = int(parts[1])
except ValueError:
index = None
if len(parts) > 2:
path = parts[2]
return needle, index, path
@ -114,17 +99,10 @@ def is_windows():
def last(xs):
it = iter(xs)
tmp = None
try:
if is_python3():
while True:
tmp = it.__next__()
else:
while True:
tmp = it.next()
except StopIteration:
return tmp
v = None
for i in iter(xs):
v = i
return v
def move_file(src, dst):
@ -169,21 +147,15 @@ def print_tab_menu(needle, tab_entries, separator):
def sanitize(directories):
# edge case to allow '/' as a valid path
clean = lambda x: unico(x) if x == os.sep else unico(x).rstrip(os.sep)
return list(imap(clean, directories))
def clean(x):
return unico(x) if x == os.sep else unico(x).rstrip(os.sep)
return [clean(d) for d in directories]
def second(xs):
it = iter(xs)
try:
if is_python2():
it.next()
return it.next()
elif is_python3():
next(it)
return next(it)
except StopIteration:
return None
next(it, None)
return next(it, None)
def surround_quotes(string):

View File

@ -0,0 +1,52 @@
import os
import sys
sys.path.append(os.path.join(os.getcwd(), 'bin'))
from autojump_data import (
entriefy,
dictify,
parse_data,
Entry,
)
def test_entriefy():
assert list(entriefy({})) == []
data = {
"path1": 10,
"path2": 12
}
r = entriefy(data)
assert set(r) == set([Entry("path1", 10), Entry("path2", 12)])
def test_dictify():
assert dictify([]) == {}
entries = [Entry("path1", 10), Entry("path2", 12)]
assert dictify(entries) == {
"path1": 10,
"path2": 12
}
class TestParseData:
def test_valid_data_should_be_parsed(self):
data = [
"10.0\tpath_a",
"12.3\tpath_a/path_b"
]
assert parse_data(data) == {
"path_a": 10.0,
"path_a/path_b": 12.3
}
def test_invalid_data_should_be_ignored(self):
data = [
"10.0\tpath_a\tnada",
"12.3",
"10.0\tpath_a",
]
assert parse_data(data) == {
"path_a": 10.0
}

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
import os
import sys
sys.path.append(os.path.join(os.getcwd(), 'bin'))
from autojump_data import Entry
import autojump_path_match as m
def test_match_fuzzy():
needles = ['foo', 'bar']
haystack = [
Entry("/foo/bar/baz", 11),
Entry("/foo/baz/moo", 10),
Entry("/moo/foo/baz", 10),
]
result = list(m.match_fuzzy(needles, haystack))
assert result == [
Entry("/foo/bar/baz", 11),
Entry("/moo/foo/baz", 10),
]
def test_match_consecutive():
needles = ['foo', 'baz']
haystack = [
Entry("/foo/bar/baz", 10),
Entry("/foo/baz/moo", 10),
Entry("/moo/foo/Baz", 10),
Entry("/foo/bazar", 10),
Entry("/foo/xxbaz", 10)
]
result = list(m.match_consecutive(needles, haystack))
assert result == [
Entry("/foo/bazar", 10),
Entry("/foo/xxbaz", 10)
]
result = list(m.match_consecutive(needles, haystack, ignore_case=True))
assert result == [
Entry("/moo/foo/Baz", 10),
Entry("/foo/bazar", 10),
Entry("/foo/xxbaz", 10)
]

View File

@ -0,0 +1,6 @@
# -*- coding: utf-8 -*-
import os
import sys
sys.path.append(os.path.join(os.getcwd(), 'bin'))

View File

@ -4,7 +4,8 @@ envlist =
py27,
py32,
py33,
py34
py34,
py35
# ignore missing setup.py
skipsdist = True