1
0
mirror of https://github.com/wting/autojump synced 2024-10-27 20:34:07 +00:00
wting_autojump/bin/autojump_match.py

131 lines
4.0 KiB
Python
Raw Normal View History

2016-05-04 06:55:01 +00:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import re
from difflib import SequenceMatcher
from autojump_utils import is_python3
from autojump_utils import is_windows
from autojump_utils import last
2016-05-04 06:55:01 +00:00
if is_python3():
2016-05-04 06:55:01 +00:00
ifilter = filter
imap = map
os.getcwdu = os.getcwd
else:
from itertools import ifilter
from itertools import imap
def match_anywhere(needles, haystack, ignore_case=False):
"""
Matches needles anywhere in the path as long as they're in the same (but
not necessary consecutive) order.
For example:
needles = ['foo', 'baz']
regex needle = r'.*foo.*baz.*'
haystack = [
2016-05-04 07:50:34 +00:00
(path='/foo/bar/baz', weight=10),
(path='/baz/foo/bar', weight=10),
(path='/foo/baz', weight=10),
]
2016-05-04 06:55:01 +00:00
result = [
2016-05-04 07:50:34 +00:00
(path='/moo/foo/baz', weight=10),
(path='/foo/baz', weight=10),
]
2016-05-04 06:55:01 +00:00
"""
regex_needle = '.*' + '.*'.join(needles).replace('\\', '\\\\') + '.*'
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
found = lambda haystack: re.search(
regex_needle,
haystack.path,
2016-05-04 07:50:34 +00:00
flags=regex_flags,
)
2016-05-04 06:55:01 +00:00
return ifilter(found, haystack)
def match_consecutive(needles, haystack, ignore_case=False):
"""
Matches consecutive needles at the end of a path.
For example:
needles = ['foo', 'baz']
haystack = [
2016-05-04 07:50:34 +00:00
(path='/foo/bar/baz', weight=10),
(path='/foo/baz/moo', weight=10),
(path='/moo/foo/baz', weight=10),
(path='/foo/baz', weight=10),
]
2016-05-04 06:55:01 +00:00
regex_needle = re.compile(r'''
foo # needle #1
[^/]* # all characters except os.sep zero or more times
/ # os.sep
[^/]* # all characters except os.sep zero or more times
baz # needle #2
[^/]* # all characters except os.sep zero or more times
$ # end of string
''')
result = [
2016-05-04 07:50:34 +00:00
(path='/moo/foo/baz', weight=10),
(path='/foo/baz', weight=10),
]
2016-05-04 06:55:01 +00:00
"""
# The normal \\ separator needs to be escaped again for use in regex.
sep = '\\\\' if is_windows() else os.sep
regex_no_sep = '[^' + sep + ']*'
regex_no_sep_end = regex_no_sep + '$'
regex_one_sep = regex_no_sep + sep + regex_no_sep
# can't use compiled regex because of flags
regex_needle = regex_one_sep.join(imap(re.escape, needles)).replace('\\', '\\\\') + regex_no_sep_end # noqa
2016-05-04 06:55:01 +00:00
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
found = lambda entry: re.search(
regex_needle,
entry.path,
2016-05-04 07:50:34 +00:00
flags=regex_flags,
)
2016-05-04 06:55:01 +00:00
return ifilter(found, haystack)
def match_fuzzy(needles, haystack, ignore_case=False, threshold=0.6):
2016-05-04 06:55:01 +00:00
"""
Performs an approximate match with the last needle against the end of
every path past an acceptable threshold.
2016-05-04 06:55:01 +00:00
For example:
needles = ['foo', 'bar']
haystack = [
2016-05-04 07:50:34 +00:00
(path='/foo/bar/baz', weight=11),
(path='/foo/baz/moo', weight=10),
(path='/moo/foo/baz', weight=10),
(path='/foo/baz', weight=10),
(path='/foo/bar', weight=10),
]
2016-05-04 06:55:01 +00:00
result = [
2016-05-04 07:50:34 +00:00
(path='/foo/bar/baz', weight=11),
(path='/moo/foo/baz', weight=10),
(path='/foo/baz', weight=10),
(path='/foo/bar', weight=10),
]
2016-05-04 06:55:01 +00:00
This is a weak heuristic and used as a last resort to find matches.
"""
end_dir = lambda path: last(os.path.split(path))
if ignore_case:
needle = last(needles).lower()
match_percent = lambda entry: SequenceMatcher(
a=needle,
b=end_dir(entry.path.lower())).ratio()
else:
needle = last(needles)
match_percent = lambda entry: SequenceMatcher(
a=needle,
b=end_dir(entry.path)).ratio()
meets_threshold = lambda entry: match_percent(entry) >= threshold
2016-05-04 06:55:01 +00:00
return ifilter(meets_threshold, haystack)