2016-05-04 06:55:01 +00:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
from difflib import SequenceMatcher
|
|
|
|
|
2016-05-04 06:57:00 +00:00
|
|
|
from autojump_utils import is_python3
|
|
|
|
from autojump_utils import last
|
2016-05-04 06:55:01 +00:00
|
|
|
|
2016-05-04 06:57:00 +00:00
|
|
|
|
2016-06-23 01:09:36 +00:00
|
|
|
if is_python3(): # pragma: no cover
|
2016-05-04 06:55:01 +00:00
|
|
|
ifilter = filter
|
|
|
|
imap = map
|
|
|
|
os.getcwdu = os.getcwd
|
|
|
|
else:
|
|
|
|
from itertools import ifilter
|
|
|
|
from itertools import imap
|
|
|
|
|
|
|
|
|
|
|
|
def match_anywhere(needles, haystack, ignore_case=False):
|
|
|
|
"""
|
|
|
|
Matches needles anywhere in the path as long as they're in the same (but
|
|
|
|
not necessary consecutive) order.
|
|
|
|
|
|
|
|
For example:
|
|
|
|
needles = ['foo', 'baz']
|
|
|
|
regex needle = r'.*foo.*baz.*'
|
|
|
|
haystack = [
|
2016-05-04 07:50:34 +00:00
|
|
|
(path='/foo/bar/baz', weight=10),
|
|
|
|
(path='/baz/foo/bar', weight=10),
|
|
|
|
(path='/foo/baz', weight=10),
|
|
|
|
]
|
2016-05-04 06:55:01 +00:00
|
|
|
|
|
|
|
result = [
|
2016-05-04 07:50:34 +00:00
|
|
|
(path='/moo/foo/baz', weight=10),
|
|
|
|
(path='/foo/baz', weight=10),
|
|
|
|
]
|
2016-05-04 06:55:01 +00:00
|
|
|
"""
|
2016-05-04 07:50:47 +00:00
|
|
|
regex_needle = '.*' + '.*'.join(imap(re.escape, needles)) + '.*'
|
2016-05-04 06:55:01 +00:00
|
|
|
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
|
|
|
|
found = lambda haystack: re.search(
|
|
|
|
regex_needle,
|
|
|
|
haystack.path,
|
2016-05-04 07:50:34 +00:00
|
|
|
flags=regex_flags,
|
|
|
|
)
|
2016-05-04 06:55:01 +00:00
|
|
|
return ifilter(found, haystack)
|
|
|
|
|
|
|
|
|
|
|
|
def match_consecutive(needles, haystack, ignore_case=False):
|
|
|
|
"""
|
|
|
|
Matches consecutive needles at the end of a path.
|
|
|
|
|
|
|
|
For example:
|
|
|
|
needles = ['foo', 'baz']
|
|
|
|
haystack = [
|
2016-05-04 07:50:34 +00:00
|
|
|
(path='/foo/bar/baz', weight=10),
|
|
|
|
(path='/foo/baz/moo', weight=10),
|
|
|
|
(path='/moo/foo/baz', weight=10),
|
|
|
|
(path='/foo/baz', weight=10),
|
|
|
|
]
|
2016-05-04 06:55:01 +00:00
|
|
|
|
2016-06-23 01:09:36 +00:00
|
|
|
# We can't actually use re.compile because of re.UNICODE
|
2016-05-04 06:55:01 +00:00
|
|
|
regex_needle = re.compile(r'''
|
|
|
|
foo # needle #1
|
|
|
|
[^/]* # all characters except os.sep zero or more times
|
|
|
|
/ # os.sep
|
|
|
|
[^/]* # all characters except os.sep zero or more times
|
|
|
|
baz # needle #2
|
|
|
|
[^/]* # all characters except os.sep zero or more times
|
|
|
|
$ # end of string
|
|
|
|
''')
|
|
|
|
|
|
|
|
result = [
|
2016-05-04 07:50:34 +00:00
|
|
|
(path='/moo/foo/baz', weight=10),
|
|
|
|
(path='/foo/baz', weight=10),
|
|
|
|
]
|
2016-05-04 06:55:01 +00:00
|
|
|
"""
|
2019-02-26 23:55:20 +00:00
|
|
|
sep = '\\\\' if os.sep == '\\' else os.sep
|
|
|
|
regex_no_sep = '[^' + sep + ']*'
|
2016-05-04 06:55:01 +00:00
|
|
|
regex_no_sep_end = regex_no_sep + '$'
|
2019-02-26 23:55:20 +00:00
|
|
|
regex_one_sep = regex_no_sep + sep + regex_no_sep
|
2016-06-23 01:09:36 +00:00
|
|
|
regex_needle = regex_one_sep.join(imap(re.escape, needles)) + regex_no_sep_end
|
2016-05-04 06:55:01 +00:00
|
|
|
regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
|
|
|
|
found = lambda entry: re.search(
|
|
|
|
regex_needle,
|
|
|
|
entry.path,
|
2016-05-04 07:50:34 +00:00
|
|
|
flags=regex_flags,
|
|
|
|
)
|
2016-05-04 06:55:01 +00:00
|
|
|
return ifilter(found, haystack)
|
|
|
|
|
|
|
|
|
2016-05-04 06:57:00 +00:00
|
|
|
def match_fuzzy(needles, haystack, ignore_case=False, threshold=0.6):
|
2016-05-04 06:55:01 +00:00
|
|
|
"""
|
|
|
|
Performs an approximate match with the last needle against the end of
|
2016-05-04 06:57:00 +00:00
|
|
|
every path past an acceptable threshold.
|
2016-05-04 06:55:01 +00:00
|
|
|
|
|
|
|
For example:
|
|
|
|
needles = ['foo', 'bar']
|
|
|
|
haystack = [
|
2016-05-04 07:50:34 +00:00
|
|
|
(path='/foo/bar/baz', weight=11),
|
|
|
|
(path='/foo/baz/moo', weight=10),
|
|
|
|
(path='/moo/foo/baz', weight=10),
|
|
|
|
(path='/foo/baz', weight=10),
|
|
|
|
(path='/foo/bar', weight=10),
|
|
|
|
]
|
2016-05-04 06:55:01 +00:00
|
|
|
|
|
|
|
result = [
|
2016-05-04 07:50:34 +00:00
|
|
|
(path='/foo/bar/baz', weight=11),
|
|
|
|
(path='/moo/foo/baz', weight=10),
|
|
|
|
(path='/foo/baz', weight=10),
|
|
|
|
(path='/foo/bar', weight=10),
|
|
|
|
]
|
2016-05-04 06:55:01 +00:00
|
|
|
|
|
|
|
This is a weak heuristic and used as a last resort to find matches.
|
|
|
|
"""
|
|
|
|
end_dir = lambda path: last(os.path.split(path))
|
|
|
|
if ignore_case:
|
|
|
|
needle = last(needles).lower()
|
|
|
|
match_percent = lambda entry: SequenceMatcher(
|
|
|
|
a=needle,
|
2017-07-20 06:17:11 +00:00
|
|
|
b=end_dir(entry.path.lower()),
|
|
|
|
).ratio()
|
2016-05-04 06:55:01 +00:00
|
|
|
else:
|
|
|
|
needle = last(needles)
|
|
|
|
match_percent = lambda entry: SequenceMatcher(
|
|
|
|
a=needle,
|
2017-07-20 06:17:11 +00:00
|
|
|
b=end_dir(entry.path),
|
|
|
|
).ratio()
|
2016-05-04 06:57:00 +00:00
|
|
|
meets_threshold = lambda entry: match_percent(entry) >= threshold
|
2016-05-04 06:55:01 +00:00
|
|
|
return ifilter(meets_threshold, haystack)
|