wting_autojump/bin/autojump_match.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import re
from difflib import SequenceMatcher

from autojump_utils import is_python3
from autojump_utils import last


if is_python3():  # pragma: no cover
    ifilter = filter
    imap = map
    os.getcwdu = os.getcwd
else:
    from itertools import ifilter
    from itertools import imap


def match_anywhere(needles, haystack, ignore_case=False):
    """
    Matches needles anywhere in the path as long as they're in the same (but
    not necessary consecutive) order.

    For example:
        needles = ['foo', 'baz']
        regex needle = r'.*foo.*baz.*'
        haystack = [
            (path='/foo/bar/baz', weight=10),
            (path='/baz/foo/bar', weight=10),
            (path='/foo/baz', weight=10),
        ]

        result = [
            (path='/moo/foo/baz', weight=10),
            (path='/foo/baz', weight=10),
        ]
    """
    regex_needle = '.*' + '.*'.join(imap(re.escape, needles)) + '.*'
    regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
    found = lambda haystack: re.search(
        regex_needle,
        haystack.path,
        flags=regex_flags,
    )
    return ifilter(found, haystack)


def match_consecutive(needles, haystack, ignore_case=False):
    """
    Matches consecutive needles at the end of a path.

    For example:
        needles = ['foo', 'baz']
        haystack = [
            (path='/foo/bar/baz', weight=10),
            (path='/foo/baz/moo', weight=10),
            (path='/moo/foo/baz', weight=10),
            (path='/foo/baz', weight=10),
        ]

        # We can't actually use re.compile because of re.UNICODE
        regex_needle = re.compile(r'''
            foo     # needle #1
            [^/]*   # all characters except os.sep zero or more times
            /       # os.sep
            [^/]*   # all characters except os.sep zero or more times
            baz     # needle #2
            [^/]*   # all characters except os.sep zero or more times
            $       # end of string
            ''')

        result = [
            (path='/moo/foo/baz', weight=10),
            (path='/foo/baz', weight=10),
        ]
    """
    sep = '\\\\' if os.sep == '\\' else os.sep
    regex_no_sep = '[^' + sep + ']*'
    regex_no_sep_end = regex_no_sep + '$'
    regex_one_sep = regex_no_sep + sep + regex_no_sep
    regex_needle = regex_one_sep.join(imap(re.escape, needles)) + regex_no_sep_end
    regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
    found = lambda entry: re.search(
        regex_needle,
        entry.path,
        flags=regex_flags,
    )
    return ifilter(found, haystack)


def match_fuzzy(needles, haystack, ignore_case=False, threshold=0.6):
    """
    Performs an approximate match with the last needle against the end of
    every path past an acceptable threshold.

    For example:
        needles = ['foo', 'bar']
        haystack = [
            (path='/foo/bar/baz', weight=11),
            (path='/foo/baz/moo', weight=10),
            (path='/moo/foo/baz', weight=10),
            (path='/foo/baz', weight=10),
            (path='/foo/bar', weight=10),
        ]

    result = [
            (path='/foo/bar/baz', weight=11),
            (path='/moo/foo/baz', weight=10),
            (path='/foo/baz', weight=10),
            (path='/foo/bar', weight=10),
        ]

    This is a weak heuristic and used as a last resort to find matches.
    """
    end_dir = lambda path: last(os.path.split(path))
    if ignore_case:
        needle = last(needles).lower()
        match_percent = lambda entry: SequenceMatcher(
            a=needle,
            b=end_dir(entry.path.lower()),
        ).ratio()
    else:
        needle = last(needles)
        match_percent = lambda entry: SequenceMatcher(
            a=needle,
            b=end_dir(entry.path),
        ).ratio()
    meets_threshold = lambda entry: match_percent(entry) >= threshold
    return ifilter(meets_threshold, haystack)
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`
			`import os`
			`import re`
			`from difflib import SequenceMatcher`

Fix match imports and simplify fuzzy threshold. 2016-05-04 06:57:00 +00:00			`from autojump_utils import is_python3`
			`from autojump_utils import last`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00
Fix match imports and simplify fuzzy threshold. 2016-05-04 06:57:00 +00:00
Add match consecutive tests. 2016-06-23 01:09:36 +00:00			`if is_python3(): # pragma: no cover`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`ifilter = filter`
			`imap = map`
			`os.getcwdu = os.getcwd`
			`else:`
			`from itertools import ifilter`
			`from itertools import imap`


			`def match_anywhere(needles, haystack, ignore_case=False):`
			`"""`
			`Matches needles anywhere in the path as long as they're in the same (but`
			`not necessary consecutive) order.`

			`For example:`
			`needles = ['foo', 'baz']`
			`regex needle = r'.foo.baz.*'`
			`haystack = [`
Misc cleanup. 2016-05-04 07:50:34 +00:00			`(path='/foo/bar/baz', weight=10),`
			`(path='/baz/foo/bar', weight=10),`
			`(path='/foo/baz', weight=10),`
			`]`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00
			`result = [`
Misc cleanup. 2016-05-04 07:50:34 +00:00			`(path='/moo/foo/baz', weight=10),`
			`(path='/foo/baz', weight=10),`
			`]`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`"""`
Add match_anywhere tests. 2016-05-04 07:50:47 +00:00			`regex_needle = '.' + '.'.join(imap(re.escape, needles)) + '.*'`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`regex_flags = re.IGNORECASE \| re.UNICODE if ignore_case else re.UNICODE`
			`found = lambda haystack: re.search(`
			`regex_needle,`
			`haystack.path,`
Misc cleanup. 2016-05-04 07:50:34 +00:00			`flags=regex_flags,`
			`)`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`return ifilter(found, haystack)`


			`def match_consecutive(needles, haystack, ignore_case=False):`
			`"""`
			`Matches consecutive needles at the end of a path.`

			`For example:`
			`needles = ['foo', 'baz']`
			`haystack = [`
Misc cleanup. 2016-05-04 07:50:34 +00:00			`(path='/foo/bar/baz', weight=10),`
			`(path='/foo/baz/moo', weight=10),`
			`(path='/moo/foo/baz', weight=10),`
			`(path='/foo/baz', weight=10),`
			`]`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00
Add match consecutive tests. 2016-06-23 01:09:36 +00:00			`# We can't actually use re.compile because of re.UNICODE`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`regex_needle = re.compile(r'''`
			`foo # needle #1`
			`[^/]* # all characters except os.sep zero or more times`
			`/ # os.sep`
			`[^/]* # all characters except os.sep zero or more times`
			`baz # needle #2`
			`[^/]* # all characters except os.sep zero or more times`
			`$ # end of string`
			`''')`

			`result = [`
Misc cleanup. 2016-05-04 07:50:34 +00:00			`(path='/moo/foo/baz', weight=10),`
			`(path='/foo/baz', weight=10),`
			`]`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`"""`
Impliment fix in #436 2019-02-26 23:55:20 +00:00			`sep = '\\\\' if os.sep == '\\' else os.sep`
			`regex_no_sep = '[^' + sep + ']*'`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`regex_no_sep_end = regex_no_sep + '$'`
Impliment fix in #436 2019-02-26 23:55:20 +00:00			`regex_one_sep = regex_no_sep + sep + regex_no_sep`
Add match consecutive tests. 2016-06-23 01:09:36 +00:00			`regex_needle = regex_one_sep.join(imap(re.escape, needles)) + regex_no_sep_end`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`regex_flags = re.IGNORECASE \| re.UNICODE if ignore_case else re.UNICODE`
			`found = lambda entry: re.search(`
			`regex_needle,`
			`entry.path,`
Misc cleanup. 2016-05-04 07:50:34 +00:00			`flags=regex_flags,`
			`)`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`return ifilter(found, haystack)`


Fix match imports and simplify fuzzy threshold. 2016-05-04 06:57:00 +00:00			`def match_fuzzy(needles, haystack, ignore_case=False, threshold=0.6):`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`"""`
			`Performs an approximate match with the last needle against the end of`
Fix match imports and simplify fuzzy threshold. 2016-05-04 06:57:00 +00:00			`every path past an acceptable threshold.`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00
			`For example:`
			`needles = ['foo', 'bar']`
			`haystack = [`
Misc cleanup. 2016-05-04 07:50:34 +00:00			`(path='/foo/bar/baz', weight=11),`
			`(path='/foo/baz/moo', weight=10),`
			`(path='/moo/foo/baz', weight=10),`
			`(path='/foo/baz', weight=10),`
			`(path='/foo/bar', weight=10),`
			`]`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00
			`result = [`
Misc cleanup. 2016-05-04 07:50:34 +00:00			`(path='/foo/bar/baz', weight=11),`
			`(path='/moo/foo/baz', weight=10),`
			`(path='/foo/baz', weight=10),`
			`(path='/foo/bar', weight=10),`
			`]`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00
			`This is a weak heuristic and used as a last resort to find matches.`
			`"""`
			`end_dir = lambda path: last(os.path.split(path))`
			`if ignore_case:`
			`needle = last(needles).lower()`
			`match_percent = lambda entry: SequenceMatcher(`
			`a=needle,`
(Running pre-commit) Use Yelp-style indentation. 2017-07-20 06:17:11 +00:00			`b=end_dir(entry.path.lower()),`
			`).ratio()`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`else:`
			`needle = last(needles)`
			`match_percent = lambda entry: SequenceMatcher(`
			`a=needle,`
(Running pre-commit) Use Yelp-style indentation. 2017-07-20 06:17:11 +00:00			`b=end_dir(entry.path),`
			`).ratio()`
Fix match imports and simplify fuzzy threshold. 2016-05-04 06:57:00 +00:00			`meets_threshold = lambda entry: match_percent(entry) >= threshold`
Simplify flake8 noqa. 2016-05-04 06:55:01 +00:00			`return ifilter(meets_threshold, haystack)`