mirror of
https://github.com/wting/autojump
synced 2024-10-27 20:34:07 +00:00
Refactor code and add unit tests
This commit is contained in:
parent
4230bbe2d1
commit
723d7a69cf
151
bin/autojump
151
bin/autojump
@ -21,13 +21,11 @@
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
from difflib import SequenceMatcher
|
||||
from itertools import chain
|
||||
from math import sqrt
|
||||
from operator import attrgetter
|
||||
from operator import itemgetter
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
@ -48,7 +46,6 @@ from autojump_data import save
|
||||
from autojump_utils import first
|
||||
from autojump_utils import get_pwd
|
||||
from autojump_utils import get_tab_entry_info
|
||||
from autojump_utils import has_uppercase
|
||||
from autojump_utils import is_autojump_sourced
|
||||
from autojump_utils import is_osx
|
||||
from autojump_utils import is_windows
|
||||
@ -59,9 +56,9 @@ from autojump_utils import print_tab_menu
|
||||
from autojump_utils import sanitize
|
||||
from autojump_utils import take
|
||||
from autojump_utils import unico
|
||||
from autojump_path_match import find_matches
|
||||
|
||||
VERSION = '22.3.0'
|
||||
FUZZY_MATCH_THRESHOLD = 0.6
|
||||
TAB_ENTRIES_COUNT = 9
|
||||
TAB_SEPARATOR = '__'
|
||||
|
||||
@ -151,47 +148,6 @@ def decrease_path(data, path, weight=15):
|
||||
return data, Entry(path, data[path])
|
||||
|
||||
|
||||
def detect_smartcase(needles):
|
||||
"""
|
||||
If any needles contain an uppercase letter then use case sensitive
|
||||
searching. Otherwise use case insensitive searching.
|
||||
"""
|
||||
return not any(imap(has_uppercase, needles))
|
||||
|
||||
|
||||
def find_matches(entries, needles, check_entries=True):
|
||||
"""Return an iterator to matching entries."""
|
||||
# TODO(wting|2014-02-24): replace assertion with unit test
|
||||
assert isinstance(needles, list), "Needles must be a list."
|
||||
ignore_case = detect_smartcase(needles)
|
||||
|
||||
try:
|
||||
pwd = os.getcwdu()
|
||||
except OSError:
|
||||
pwd = None
|
||||
|
||||
# using closure to prevent constantly hitting hdd
|
||||
def is_cwd(entry):
|
||||
return os.path.realpath(entry.path) == pwd
|
||||
|
||||
if check_entries:
|
||||
path_exists = lambda entry: os.path.exists(entry.path)
|
||||
else:
|
||||
path_exists = lambda _: True
|
||||
|
||||
data = sorted(
|
||||
entries,
|
||||
key=attrgetter('weight'),
|
||||
reverse=True)
|
||||
|
||||
return ifilter(
|
||||
lambda entry: not is_cwd(entry) and path_exists(entry),
|
||||
chain(
|
||||
match_consecutive(needles, data, ignore_case),
|
||||
match_fuzzy(needles, data, ignore_case),
|
||||
match_anywhere(needles, data, ignore_case)))
|
||||
|
||||
|
||||
def handle_tab_completion(needle, entries):
|
||||
tab_needle, tab_index, tab_path = get_tab_entry_info(needle, TAB_SEPARATOR)
|
||||
|
||||
@ -221,111 +177,6 @@ def handle_tab_completion(needle, entries):
|
||||
TAB_SEPARATOR)
|
||||
|
||||
|
||||
def match_anywhere(needles, haystack, ignore_case=False):
|
||||
"""
|
||||
Matches needles anywhere in the path as long as they're in the same (but
|
||||
not necessary consecutive) order.
|
||||
|
||||
For example:
|
||||
needles = ['foo', 'baz']
|
||||
regex needle = r'.*foo.*baz.*'
|
||||
haystack = [
|
||||
(path="/foo/bar/baz", weight=10),
|
||||
(path="/baz/foo/bar", weight=10),
|
||||
(path="/foo/baz", weight=10)]
|
||||
|
||||
result = [
|
||||
(path="/moo/foo/baz", weight=10),
|
||||
(path="/foo/baz", weight=10)]
|
||||
"""
|
||||
regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
|
||||
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
|
||||
found = lambda haystack: re.search(
|
||||
regex_needle,
|
||||
haystack.path,
|
||||
flags=regex_flags)
|
||||
return ifilter(found, haystack)
|
||||
|
||||
|
||||
def match_consecutive(needles, haystack, ignore_case=False):
|
||||
"""
|
||||
Matches consecutive needles at the end of a path.
|
||||
|
||||
For example:
|
||||
needles = ['foo', 'baz']
|
||||
haystack = [
|
||||
(path="/foo/bar/baz", weight=10),
|
||||
(path="/foo/baz/moo", weight=10),
|
||||
(path="/moo/foo/baz", weight=10),
|
||||
(path="/foo/baz", weight=10)]
|
||||
|
||||
regex_needle = re.compile(r'''
|
||||
foo # needle #1
|
||||
[^/]* # all characters except os.sep zero or more times
|
||||
/ # os.sep
|
||||
[^/]* # all characters except os.sep zero or more times
|
||||
baz # needle #2
|
||||
[^/]* # all characters except os.sep zero or more times
|
||||
$ # end of string
|
||||
''')
|
||||
|
||||
result = [
|
||||
(path="/moo/foo/baz", weight=10),
|
||||
(path="/foo/baz", weight=10)]
|
||||
"""
|
||||
# The normal \\ separator needs to be escaped again for use in regex.
|
||||
sep = '\\\\' if is_windows() else os.sep
|
||||
regex_no_sep = '[^' + sep + ']*'
|
||||
regex_no_sep_end = regex_no_sep + '$'
|
||||
regex_one_sep = regex_no_sep + sep + regex_no_sep
|
||||
# can't use compiled regex because of flags
|
||||
regex_needle = regex_one_sep.join(needles).replace('\\', '\\\\') + regex_no_sep_end # noqa
|
||||
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
|
||||
found = lambda entry: re.search(
|
||||
regex_needle,
|
||||
entry.path,
|
||||
flags=regex_flags)
|
||||
return ifilter(found, haystack)
|
||||
|
||||
|
||||
def match_fuzzy(needles, haystack, ignore_case=False):
|
||||
"""
|
||||
Performs an approximate match with the last needle against the end of
|
||||
every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD).
|
||||
|
||||
For example:
|
||||
needles = ['foo', 'bar']
|
||||
haystack = [
|
||||
(path="/foo/bar/baz", weight=11),
|
||||
(path="/foo/baz/moo", weight=10),
|
||||
(path="/moo/foo/baz", weight=10),
|
||||
(path="/foo/baz", weight=10),
|
||||
(path="/foo/bar", weight=10)]
|
||||
|
||||
result = [
|
||||
(path="/foo/bar/baz", weight=11),
|
||||
(path="/moo/foo/baz", weight=10),
|
||||
(path="/foo/baz", weight=10),
|
||||
(path="/foo/bar", weight=10)]
|
||||
|
||||
This is a weak heuristic and used as a last resort to find matches.
|
||||
"""
|
||||
end_dir = lambda path: last(os.path.split(path))
|
||||
if ignore_case:
|
||||
needle = last(needles).lower()
|
||||
match_percent = lambda entry: SequenceMatcher(
|
||||
a=needle,
|
||||
b=end_dir(entry.path.lower())).ratio()
|
||||
else:
|
||||
needle = last(needles)
|
||||
match_percent = lambda entry: SequenceMatcher(
|
||||
a=needle,
|
||||
b=end_dir(entry.path)).ratio()
|
||||
meets_threshold = lambda entry: match_percent(entry) >= \
|
||||
FUZZY_MATCH_THRESHOLD
|
||||
return ifilter(meets_threshold, haystack)
|
||||
|
||||
|
||||
def purge_missing_paths(entries):
|
||||
"""Remove non-existent paths from a list of entries."""
|
||||
exists = lambda entry: os.path.exists(entry.path)
|
||||
|
@ -34,18 +34,23 @@ def dictify(entries):
|
||||
key = path
|
||||
value = weight
|
||||
"""
|
||||
result = {}
|
||||
for entry in entries:
|
||||
result[entry.path] = entry.weight
|
||||
return result
|
||||
return dict((e.path, e.weight) for e in entries)
|
||||
|
||||
|
||||
def entriefy(data):
|
||||
"""Converts a dictionary into an iterator of entries."""
|
||||
convert = lambda tup: Entry(*tup)
|
||||
if is_python3():
|
||||
return map(convert, data.items())
|
||||
return imap(convert, data.iteritems())
|
||||
iteritems = data.items if is_python3() else data.iteritems
|
||||
return (Entry(k, v) for k, v in iteritems())
|
||||
|
||||
|
||||
def parse_data(data):
|
||||
# example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user']
|
||||
parsed = (l.strip().split('\t') for l in data)
|
||||
valid = (x for x in parsed if len(x) == 2)
|
||||
return dict(
|
||||
(path, float(weight))
|
||||
for weight, path in valid
|
||||
)
|
||||
|
||||
|
||||
def load(config):
|
||||
@ -62,23 +67,12 @@ def load(config):
|
||||
if not os.path.exists(config['data_path']):
|
||||
return {}
|
||||
|
||||
# example: u'10.0\t/home/user\n' -> ['10.0', u'/home/user']
|
||||
parse = lambda line: line.strip().split('\t')
|
||||
|
||||
correct_length = lambda x: len(x) == 2
|
||||
|
||||
# example: ['10.0', u'/home/user'] -> (u'/home/user', 10.0)
|
||||
tupleize = lambda x: (x[1], float(x[0]))
|
||||
|
||||
try:
|
||||
with open(
|
||||
config['data_path'],
|
||||
'r', encoding='utf-8',
|
||||
errors='replace') as f:
|
||||
return dict(
|
||||
imap(
|
||||
tupleize,
|
||||
ifilter(correct_length, imap(parse, f))))
|
||||
return parse_data(f)
|
||||
except (IOError, EOFError):
|
||||
return load_backup(config)
|
||||
|
||||
|
163
bin/autojump_path_match.py
Normal file
163
bin/autojump_path_match.py
Normal file
@ -0,0 +1,163 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from itertools import chain
|
||||
from operator import attrgetter
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
from autojump_utils import (
|
||||
last,
|
||||
has_uppercase,
|
||||
)
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
ifilter = filter
|
||||
imap = map
|
||||
os.getcwdu = os.getcwd
|
||||
else:
|
||||
from itertools import ifilter
|
||||
from itertools import imap
|
||||
|
||||
FUZZY_MATCH_THRESHOLD = 0.6
|
||||
|
||||
|
||||
def find_matches(entries, needles, check_entries=True):
|
||||
"""Return an iterator to matching entries."""
|
||||
# TODO(wting|2014-02-24): replace assertion with unit test
|
||||
assert isinstance(needles, list), "Needles must be a list."
|
||||
ignore_case = detect_smartcase(needles)
|
||||
|
||||
try:
|
||||
pwd = os.getcwdu()
|
||||
except OSError:
|
||||
pwd = None
|
||||
|
||||
# using closure to prevent constantly hitting hdd
|
||||
def is_cwd(entry):
|
||||
return os.path.realpath(entry.path) == pwd
|
||||
|
||||
if check_entries:
|
||||
path_exists = lambda entry: os.path.exists(entry.path)
|
||||
else:
|
||||
path_exists = lambda _: True
|
||||
|
||||
data = sorted(
|
||||
entries,
|
||||
key=attrgetter('weight'),
|
||||
reverse=True)
|
||||
|
||||
return ifilter(
|
||||
lambda entry: not is_cwd(entry) and path_exists(entry),
|
||||
chain(
|
||||
match_consecutive(needles, data, ignore_case),
|
||||
match_fuzzy(needles, data, ignore_case),
|
||||
match_anywhere(needles, data, ignore_case)))
|
||||
|
||||
|
||||
def match_anywhere(needles, haystack, ignore_case=False):
|
||||
"""
|
||||
Matches needles anywhere in the path as long as they're in the same (but
|
||||
not necessary consecutive) order.
|
||||
|
||||
For example:
|
||||
needles = ['foo', 'baz']
|
||||
regex needle = r'.*foo.*baz.*'
|
||||
haystack = [
|
||||
(path="/foo/bar/baz", weight=10),
|
||||
(path="/baz/foo/bar", weight=10),
|
||||
(path="/foo/baz", weight=10)]
|
||||
|
||||
result = [
|
||||
(path="/moo/foo/baz", weight=10),
|
||||
(path="/foo/baz", weight=10)]
|
||||
"""
|
||||
regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
|
||||
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
|
||||
found = lambda haystack: re.search(
|
||||
regex_needle,
|
||||
haystack.path,
|
||||
flags=regex_flags)
|
||||
return ifilter(found, haystack)
|
||||
|
||||
|
||||
def match_consecutive(needles, haystack, ignore_case=False):
|
||||
"""
|
||||
Matches consecutive needles at the end of a path.
|
||||
|
||||
For example:
|
||||
needles = ['foo', 'baz']
|
||||
haystack = [
|
||||
(path="/foo/bar/baz", weight=10),
|
||||
(path="/foo/baz/moo", weight=10),
|
||||
(path="/moo/foo/baz", weight=10),
|
||||
(path="/foo/baz", weight=10)]
|
||||
|
||||
regex_needle = re.compile(r'''
|
||||
foo # needle #1
|
||||
[^/]* # all characters except os.sep zero or more times
|
||||
/ # os.sep
|
||||
[^/]* # all characters except os.sep zero or more times
|
||||
baz # needle #2
|
||||
[^/]* # all characters except os.sep zero or more times
|
||||
$ # end of string
|
||||
''')
|
||||
|
||||
result = [
|
||||
(path="/moo/foo/baz", weight=10),
|
||||
(path="/foo/baz", weight=10)]
|
||||
"""
|
||||
reversed_needles = list(reversed(needles))
|
||||
for entry in haystack:
|
||||
path_segments = entry.path.split(os.sep)
|
||||
for target, needle_part in zip(
|
||||
reversed(path_segments), reversed_needles
|
||||
):
|
||||
if ignore_case:
|
||||
needle_part = needle_part.lower()
|
||||
target = target.lower()
|
||||
if needle_part not in target:
|
||||
break
|
||||
else:
|
||||
yield entry
|
||||
|
||||
|
||||
def match_fuzzy(needles, haystack, ignore_case=False):
|
||||
"""
|
||||
Performs an approximate match with the last needle against the end of
|
||||
every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD).
|
||||
|
||||
For example:
|
||||
needles = ['foo', 'bar']
|
||||
haystack = [
|
||||
(path="/foo/bar/baz", weight=11),
|
||||
(path="/foo/baz/moo", weight=10),
|
||||
(path="/moo/foo/baz", weight=10),
|
||||
(path="/foo/baz", weight=10),
|
||||
(path="/foo/bar", weight=10)]
|
||||
|
||||
result = [
|
||||
(path="/foo/bar/baz", weight=11),
|
||||
(path="/moo/foo/baz", weight=10),
|
||||
(path="/foo/baz", weight=10),
|
||||
(path="/foo/bar", weight=10)]
|
||||
|
||||
This is a weak heuristic and used as a last resort to find matches.
|
||||
"""
|
||||
needle = last(needles)
|
||||
if ignore_case:
|
||||
needle = needle.lower()
|
||||
|
||||
for entry in haystack:
|
||||
_, tail = os.path.split(entry.path)
|
||||
path = tail.lower() if ignore_case else tail
|
||||
matcher = SequenceMatcher(a=needle, b=path)
|
||||
if matcher.ratio() >= FUZZY_MATCH_THRESHOLD:
|
||||
yield entry
|
||||
|
||||
|
||||
def detect_smartcase(needles):
|
||||
"""
|
||||
If any needles contain an uppercase letter then use case sensitive
|
||||
searching. Otherwise use case insensitive searching.
|
||||
"""
|
||||
return not any(imap(has_uppercase, needles))
|
@ -13,10 +13,7 @@ import sys
|
||||
import unicodedata
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
imap = map
|
||||
os.getcwdu = os.getcwd
|
||||
else:
|
||||
from itertools import imap
|
||||
|
||||
|
||||
def create_dir(path):
|
||||
@ -37,12 +34,7 @@ def encode_local(string):
|
||||
|
||||
def first(xs):
|
||||
it = iter(xs)
|
||||
try:
|
||||
if is_python3():
|
||||
return it.__next__()
|
||||
return it.next()
|
||||
except StopIteration:
|
||||
return None
|
||||
return next(it, None)
|
||||
|
||||
|
||||
def get_tab_entry_info(entry, separator):
|
||||
@ -51,23 +43,16 @@ def get_tab_entry_info(entry, separator):
|
||||
|
||||
[needle]__[index]__[path]
|
||||
"""
|
||||
needle, index, path = None, None, None
|
||||
|
||||
match_needle = re.search(r'(.*?)' + separator, entry)
|
||||
match_index = re.search(separator + r'([0-9]{1})', entry)
|
||||
match_path = re.search(
|
||||
separator + r'[0-9]{1}' + separator + r'(.*)',
|
||||
entry)
|
||||
|
||||
if match_needle:
|
||||
needle = match_needle.group(1)
|
||||
|
||||
if match_index:
|
||||
index = int(match_index.group(1))
|
||||
|
||||
if match_path:
|
||||
path = match_path.group(1)
|
||||
|
||||
needle = index = path = None
|
||||
parts = entry.split('__', 2)
|
||||
if len(parts) > 1:
|
||||
needle = parts[0]
|
||||
try:
|
||||
index = int(parts[1])
|
||||
except ValueError:
|
||||
index = None
|
||||
if len(parts) > 2:
|
||||
path = parts[2]
|
||||
return needle, index, path
|
||||
|
||||
|
||||
@ -114,17 +99,10 @@ def is_windows():
|
||||
|
||||
|
||||
def last(xs):
|
||||
it = iter(xs)
|
||||
tmp = None
|
||||
try:
|
||||
if is_python3():
|
||||
while True:
|
||||
tmp = it.__next__()
|
||||
else:
|
||||
while True:
|
||||
tmp = it.next()
|
||||
except StopIteration:
|
||||
return tmp
|
||||
v = None
|
||||
for i in iter(xs):
|
||||
v = i
|
||||
return v
|
||||
|
||||
|
||||
def move_file(src, dst):
|
||||
@ -169,21 +147,15 @@ def print_tab_menu(needle, tab_entries, separator):
|
||||
|
||||
def sanitize(directories):
|
||||
# edge case to allow '/' as a valid path
|
||||
clean = lambda x: unico(x) if x == os.sep else unico(x).rstrip(os.sep)
|
||||
return list(imap(clean, directories))
|
||||
def clean(x):
|
||||
return unico(x) if x == os.sep else unico(x).rstrip(os.sep)
|
||||
return [clean(d) for d in directories]
|
||||
|
||||
|
||||
def second(xs):
|
||||
it = iter(xs)
|
||||
try:
|
||||
if is_python2():
|
||||
it.next()
|
||||
return it.next()
|
||||
elif is_python3():
|
||||
next(it)
|
||||
return next(it)
|
||||
except StopIteration:
|
||||
return None
|
||||
next(it, None)
|
||||
return next(it, None)
|
||||
|
||||
|
||||
def surround_quotes(string):
|
||||
|
@ -0,0 +1,52 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.join(os.getcwd(), 'bin'))
|
||||
from autojump_data import (
|
||||
entriefy,
|
||||
dictify,
|
||||
parse_data,
|
||||
Entry,
|
||||
)
|
||||
|
||||
|
||||
def test_entriefy():
|
||||
assert list(entriefy({})) == []
|
||||
data = {
|
||||
"path1": 10,
|
||||
"path2": 12
|
||||
}
|
||||
r = entriefy(data)
|
||||
assert set(r) == set([Entry("path1", 10), Entry("path2", 12)])
|
||||
|
||||
|
||||
def test_dictify():
|
||||
assert dictify([]) == {}
|
||||
entries = [Entry("path1", 10), Entry("path2", 12)]
|
||||
assert dictify(entries) == {
|
||||
"path1": 10,
|
||||
"path2": 12
|
||||
}
|
||||
|
||||
|
||||
class TestParseData:
|
||||
|
||||
def test_valid_data_should_be_parsed(self):
|
||||
data = [
|
||||
"10.0\tpath_a",
|
||||
"12.3\tpath_a/path_b"
|
||||
]
|
||||
assert parse_data(data) == {
|
||||
"path_a": 10.0,
|
||||
"path_a/path_b": 12.3
|
||||
}
|
||||
|
||||
def test_invalid_data_should_be_ignored(self):
|
||||
data = [
|
||||
"10.0\tpath_a\tnada",
|
||||
"12.3",
|
||||
"10.0\tpath_a",
|
||||
]
|
||||
assert parse_data(data) == {
|
||||
"path_a": 10.0
|
||||
}
|
45
tests/autojump_path_match_test.py
Normal file
45
tests/autojump_path_match_test.py
Normal file
@ -0,0 +1,45 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.join(os.getcwd(), 'bin'))
|
||||
|
||||
from autojump_data import Entry
|
||||
import autojump_path_match as m
|
||||
|
||||
|
||||
def test_match_fuzzy():
|
||||
needles = ['foo', 'bar']
|
||||
haystack = [
|
||||
Entry("/foo/bar/baz", 11),
|
||||
Entry("/foo/baz/moo", 10),
|
||||
Entry("/moo/foo/baz", 10),
|
||||
]
|
||||
result = list(m.match_fuzzy(needles, haystack))
|
||||
assert result == [
|
||||
Entry("/foo/bar/baz", 11),
|
||||
Entry("/moo/foo/baz", 10),
|
||||
]
|
||||
|
||||
|
||||
def test_match_consecutive():
|
||||
needles = ['foo', 'baz']
|
||||
haystack = [
|
||||
Entry("/foo/bar/baz", 10),
|
||||
Entry("/foo/baz/moo", 10),
|
||||
Entry("/moo/foo/Baz", 10),
|
||||
Entry("/foo/bazar", 10),
|
||||
Entry("/foo/xxbaz", 10)
|
||||
]
|
||||
result = list(m.match_consecutive(needles, haystack))
|
||||
assert result == [
|
||||
Entry("/foo/bazar", 10),
|
||||
Entry("/foo/xxbaz", 10)
|
||||
]
|
||||
result = list(m.match_consecutive(needles, haystack, ignore_case=True))
|
||||
assert result == [
|
||||
Entry("/moo/foo/Baz", 10),
|
||||
Entry("/foo/bazar", 10),
|
||||
Entry("/foo/xxbaz", 10)
|
||||
]
|
@ -0,0 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.join(os.getcwd(), 'bin'))
|
Loading…
Reference in New Issue
Block a user