1
0
mirror of https://github.com/wting/autojump synced 2024-10-27 20:34:07 +00:00
wting_autojump/bin/autojump_path_match.py
2016-04-19 12:49:00 +08:00

164 lines
4.8 KiB
Python

import os
import re
import sys
from itertools import chain
from operator import attrgetter
from difflib import SequenceMatcher
from autojump_utils import (
last,
has_uppercase,
)
if sys.version_info[0] == 3:
ifilter = filter
imap = map
os.getcwdu = os.getcwd
else:
from itertools import ifilter
from itertools import imap
FUZZY_MATCH_THRESHOLD = 0.6
def find_matches(entries, needles, check_entries=True):
"""Return an iterator to matching entries."""
# TODO(wting|2014-02-24): replace assertion with unit test
assert isinstance(needles, list), "Needles must be a list."
ignore_case = detect_smartcase(needles)
try:
pwd = os.getcwdu()
except OSError:
pwd = None
# using closure to prevent constantly hitting hdd
def is_cwd(entry):
return os.path.realpath(entry.path) == pwd
if check_entries:
path_exists = lambda entry: os.path.exists(entry.path)
else:
path_exists = lambda _: True
data = sorted(
entries,
key=attrgetter('weight'),
reverse=True)
return ifilter(
lambda entry: not is_cwd(entry) and path_exists(entry),
chain(
match_consecutive(needles, data, ignore_case),
match_fuzzy(needles, data, ignore_case),
match_anywhere(needles, data, ignore_case)))
def match_anywhere(needles, haystack, ignore_case=False):
"""
Matches needles anywhere in the path as long as they're in the same (but
not necessary consecutive) order.
For example:
needles = ['foo', 'baz']
regex needle = r'.*foo.*baz.*'
haystack = [
(path="/foo/bar/baz", weight=10),
(path="/baz/foo/bar", weight=10),
(path="/foo/baz", weight=10)]
result = [
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
"""
regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
found = lambda haystack: re.search(
regex_needle,
haystack.path,
flags=regex_flags)
return ifilter(found, haystack)
def match_consecutive(needles, haystack, ignore_case=False):
"""
Matches consecutive needles at the end of a path.
For example:
needles = ['foo', 'baz']
haystack = [
(path="/foo/bar/baz", weight=10),
(path="/foo/baz/moo", weight=10),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
regex_needle = re.compile(r'''
foo # needle #1
[^/]* # all characters except os.sep zero or more times
/ # os.sep
[^/]* # all characters except os.sep zero or more times
baz # needle #2
[^/]* # all characters except os.sep zero or more times
$ # end of string
''')
result = [
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10)]
"""
reversed_needles = list(reversed(needles))
for entry in haystack:
path_segments = entry.path.split(os.sep)
for target, needle_part in zip(
reversed(path_segments), reversed_needles
):
if ignore_case:
needle_part = needle_part.lower()
target = target.lower()
if needle_part not in target:
break
else:
yield entry
def match_fuzzy(needles, haystack, ignore_case=False):
"""
Performs an approximate match with the last needle against the end of
every path past an acceptable threshold (FUZZY_MATCH_THRESHOLD).
For example:
needles = ['foo', 'bar']
haystack = [
(path="/foo/bar/baz", weight=11),
(path="/foo/baz/moo", weight=10),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10),
(path="/foo/bar", weight=10)]
result = [
(path="/foo/bar/baz", weight=11),
(path="/moo/foo/baz", weight=10),
(path="/foo/baz", weight=10),
(path="/foo/bar", weight=10)]
This is a weak heuristic and used as a last resort to find matches.
"""
needle = last(needles)
if ignore_case:
needle = needle.lower()
for entry in haystack:
_, tail = os.path.split(entry.path)
path = tail.lower() if ignore_case else tail
matcher = SequenceMatcher(a=needle, b=path)
if matcher.ratio() >= FUZZY_MATCH_THRESHOLD:
yield entry
def detect_smartcase(needles):
"""
If any needles contain an uppercase letter then use case sensitive
searching. Otherwise use case insensitive searching.
"""
return not any(imap(has_uppercase, needles))