#!/usr/bin/env python
"""Copyright Joel Schaerer 2008-2010
This file is part of autojump

autojump is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

autojump is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with autojump.  If not, see <http://www.gnu.org/licenses/>.

Autojump is a small tool that maintains a database of your most
used directories, and finds the best match to help you jump to
frequently used places."""

from __future__ import division, print_function

try: # fix to get optimised pickle in python < 3
    import cPickle as pickle
except ImportError:
    import pickle

import getopt
from sys import argv, stderr, version_info, exit
from tempfile import NamedTemporaryFile
from operator import itemgetter
import os
MAX_KEYWEIGHT = 1000
MAX_STORED_PATHS = 600
COMPLETION_SEPARATOR = '__'
CONFIG_DIR = os.environ.get("AUTOJUMP_DATA_DIR", os.path.expanduser("~"))

def uniqadd(collection, key):
    """Adds a key to a list only if it is not already present"""
    if key not in collection:
        collection.append(key)

def dicadd(dic, key, increment=1):
    """Increment a value in a dic, set it to 0
    if is is not already present"""
    dic[key] = dic.get(key, 0.)+increment

def save(path_dict, dic_file):
    """Save the database in an atomic way, and preserve
       a backup file."""
    # If the dic_file exists, check that it belongs to us
    # Otherwise, fail quietly
    if (not os.path.exists(dic_file)) or os.getuid() == os.stat(dic_file)[4]:
        temp = NamedTemporaryFile(dir=CONFIG_DIR, delete=False)
        pickle.dump(path_dict, temp, -1)
        temp.flush()
        os.fsync(temp)
        temp.close()
        #cf. http://thunk.org/tytso/blog/2009/03/15/dont-fear-the-fsync/
        os.rename(temp.name, dic_file) 
        try: #backup file
            import time 
            if (not os.path.exists(dic_file+".bak") or
                    time.time()-os.path.getmtime(dic_file+".bak")>86400):
                import shutil
                shutil.copy(dic_file, dic_file+".bak")
        except OSError as ex:
            print("Error while creating backup autojump file. (%s)" %
                    ex, file=stderr)

def forget(path_dict, dic_file):
    """Gradually forget about directories. Only call
    from the actual jump since it can take time"""
    keyweight = sum(path_dict.values()) 
    if keyweight > MAX_KEYWEIGHT: 
        for k in path_dict.keys():
            path_dict[k] *= 0.9 * MAX_KEYWEIGHT / keyweight
        save(path_dict, dic_file)

def clean_dict(sorted_dirs, path_dict):
    """Limits the sized of the path_dict to MAX_STORED_PATHS. 
    Returns True if keys were deleted"""
    if len(sorted_dirs) > MAX_STORED_PATHS:
        #remove 25 more than needed, to avoid doing it every time
        for path, dummy in sorted_dirs[MAX_STORED_PATHS-25:]:
            del path_dict[path]
        return True
    else: return False

def approximatch(pat, text, max_errors):
    """Calculate the Damerau-Levenshtein distance between :pat and :text,
    minimized over all possible positions of :pat within :text. As an
    optimization, this distance is only accurate if it is <= :max_errors.
    Return values greater than :max_errors indicate that the distance is _at
    least_ that much. Runs in O(:max_errors * len(:text)) time."""
    cols = [list(range(0, len(pat)+1))]
    errors = len(pat)
    for i in range(0, len(text)): cols.append([errors] * (len(pat) + 1))
    last_active = min(max_errors, len(pat))
    last_seen_in_text = {}
    for i, char1 in enumerate(text):
        cols[i+1][0] = 0
        last_seen_in_pat = 0
        for j, char2 in enumerate(pat):
            i1 = last_seen_in_text[char2] if char2 in last_seen_in_text else 0
            j1 = last_seen_in_pat
            if char1 == char2:
                cols[i+1][j+1] = cols[i][j]
                last_seen_in_pat = j + 1
            else:
                cols[i+1][j+1] = 1 + min(cols[i+1][j], cols[i][j+1], cols[i][j])
            if i1 and j1:
                cols[i+1][j+1] = min(cols[i+1][j+1], 1 + (i - i1) + (j - j1) + cols[i1-1][j1-1])

            #Ukkonen's cut-off heuristic. See 'Theoretical and Empirical
            #Comparisons of Approximate String Matching Algorithms by Chang and
            #Lampe for details.
            if j + 1 == len(pat):
                errors = min(errors, cols[i+1][j+1])
            elif j + 1 == last_active + 1:
                break

        last_seen_in_text[char1] = i + 1

        if last_active < len(pat): last_active += 1
        while cols[i+1][last_active] > max_errors: last_active -= 1

    return errors

def find_matches(dirs, patterns, result_list, ignore_case, approx, max_matches):
    """Find max_matches paths that match the pattern, 
    and add them to the result_list"""

    def get_pattern_and_match(patterns, path):
        #For the last pattern, only match the end of the pattern
        for n, pattern in enumerate(patterns):
            if n == len(patterns) - 1:
                match_string = "/".join(path.split('/')[-1-pattern.count('/'):])
            else:
                match_string = path
            if ignore_case:
                pattern = pattern.lower()
                match_string = match_string.lower()
            yield (pattern, match_string)

    if approx:
        one_error_paths = []
        two_error_paths = []
        for path, count in dirs:
            if len(one_error_paths) >= max_matches:
                break
            total_errors = 0
            bad_match = False
            for pattern, match_string in get_pattern_and_match(patterns, path):
                errors = approximatch(pattern, match_string, 2)
                #If the number of errors are >= than the string length, then a
                #match is always possible, so this result is useless.
                if errors >= len(pattern) or errors >= len(match_string):
                    bad_match = True
                    break
                total_errors += errors
            if bad_match:
                continue
            #Verify that the path exists 
            #(useful in the case of external drives, for example)
            if total_errors <= 2 and os.path.exists(path):
                if total_errors == 1:
                    uniqadd(one_error_paths, path)
                elif total_errors == 2:
                    uniqadd(two_error_paths, path)
        result_list.extend(one_error_paths)
        result_list.extend(two_error_paths[:max_matches-len(one_error_paths)])
    else:
        for path, count in dirs:
            if len(result_list) >= max_matches:
                break
            if all(match_string.find(pattern) != -1
                    for pattern, match_string in
                    get_pattern_and_match(patterns, path)) and os.path.exists(path):
                uniqadd(result_list, path)

def open_dic(dic_file, error_recovery=False):
    """Try hard to open the database file, recovering
       from backup if needed. """
    try:
        aj_file = open(dic_file, 'rb')
        if version_info[0] > 2:
            #encoding is only specified for python2.x compatibility
            path_dict = pickle.load(aj_file, encoding="utf-8")
        else:
            path_dict = pickle.load(aj_file)
        aj_file.close()
        return path_dict
    except (IOError, EOFError, pickle.UnpicklingError):
        if not error_recovery and os.path.exists(dic_file+".bak"):
            print('Problem with autojump database,\
                    trying to recover from backup...', file=stderr)
            import shutil
            shutil.copy(dic_file+".bak", dic_file)
            return open_dic(dic_file, True)
        else: return {} #if everything fails, return an empty file

def shell_utility():
    """Run this when autojump is called as a shell utility"""
    try:
        optlist, args = getopt.getopt(argv[1:], 'a',
                ['stat', 'import', 'completion', 'bash']) 
    except getopt.GetoptError as ex:
        print("Unknown command line argument: %s" % ex)
        exit(1)

    if CONFIG_DIR == os.path.expanduser("~"):
        dic_file = CONFIG_DIR+"/.autojump_py"
    else:
        dic_file = CONFIG_DIR+"/autojump_py"
    path_dict = open_dic(dic_file)
    if ('-a', '') in optlist:
        # The home dir can be reached quickly by "cd"
        # and may interfere with other directories
        if(args[-1] != os.path.expanduser("~")): 
            dicadd(path_dict, args[-1])
            save(path_dict, dic_file)
    elif ('--stat', '') in optlist:
        paths = list(path_dict.items())
        paths.sort(key=itemgetter(1))
        for path, count in paths[-100:]:
            print("%.1f:\t%s" % (count, path))
        print("Total key weight: %d. Number of stored paths: %d" %
                (sum(path_dict.values()), len(paths)))
    else:
        import re
        completion = False
        #userchoice is i if the pattern is __pattern__i, otherwise -1
        userchoice = -1 
        results = []
        if ('--completion', '') in optlist:
            completion = True
        else:
            forget(path_dict, dic_file) #gradually forget about old directories
        if not args: patterns = [""]
        else: patterns = args

        # If the last pattern contains a full path, jump there
        # The regexp is because we need to support stuff like
        # "j wo jo__3__/home/joel/workspace/joel" for zsh
        last_pattern_path = re.sub("(.*)"+COMPLETION_SEPARATOR, "", patterns[-1])
        if (len(last_pattern_path)>0 and
              last_pattern_path[0] == "/" and
              os.path.exists(last_pattern_path)):
            if not completion: print(last_pattern_path)
        else:
            #check for ongoing completion, and act accordingly
            endmatch = re.search(COMPLETION_SEPARATOR+"([0-9]+)", patterns[-1])
            if endmatch:  #user has selected a completion
                userchoice = int(endmatch.group(1))
                patterns[-1] = re.sub(COMPLETION_SEPARATOR+"[0-9]+.*",
                        "", patterns[-1])
            else: #user hasn't selected a completion, display the same choices again
                endmatch = re.match("(.*)"+COMPLETION_SEPARATOR, patterns[-1])
                if endmatch: patterns[-1] = endmatch.group(1)

            try:
                cwd = os.path.realpath(os.curdir)
            #Sometimes the current path doesn't exist anymore.
            #In that case, jump if possible.
            except OSError:
                cwd = None
            dirs = list((path, count) for path, count in path_dict.items()
                    if path != cwd)
            dirs.sort(key=itemgetter(1), reverse=True)
            if completion or userchoice != -1:
                max_matches = 9
            else:
                max_matches = 1
            find_matches(dirs, patterns, results, False, False, max_matches)
            # If not found, try ignoring case.
            # On completion always show all results
            if completion or not results: 
                find_matches(dirs, patterns, results,
                        ignore_case=True, approx=False, max_matches=max_matches) 

            if not results:
                find_matches(dirs, patterns, results,
                        ignore_case=True, approx=True, max_matches=max_matches) 

            # Keep the database to a reasonable size
            if not completion and clean_dict(dirs, path_dict):
                save(path_dict, dic_file)

            if completion and ('--bash', '') in optlist: quotes = '"'
            else: quotes = ""

            if userchoice != -1:
                if len(results) > userchoice-1 : 
                    print(quotes+results[userchoice-1]+quotes)
            elif len(results) > 1 and completion:
                print("\n".join(("%s%s%d%s%s" % (patterns[-1],
                    COMPLETION_SEPARATOR, n+1, COMPLETION_SEPARATOR, r)
                    for n, r in enumerate(results[:8]))))
            elif results: print(quotes+results[0]+quotes)
            else:
                return False
            return True


if __name__ == "__main__":
    success=shell_utility()
    if not success: exit(1)