pmacs3/mode_python.py

import commands, os.path, sets, string
import color, completer, default, mode2, lex2, method, regex, tab2
import ctag_python
from point2 import Point
from lex2 import Grammar, PatternRule, RegionRule

class StringGrammar(Grammar):
    rules = [
        PatternRule(name=r'octal', pattern=r'\\[0-7]{3}'),
        PatternRule(name=r'escaped', pattern=r'\\.'),
    ]

class PythonGrammar(Grammar):
    rules = [
        PatternRule(name=r'functionname', pattern=r'(?<=def )[a-zA-Z_][a-zA-Z0-9_]*'),
        PatternRule(name=r'classname', pattern=r'(?<=class )[a-zA-Z_][a-zA-Z0-9_]*'),
        PatternRule(name=r'reserved', pattern=r'(?:True|None|False|Exception|self)(?![a-zA-Z0-9_])'),
        PatternRule(name=r'keyword', pattern=r'(?:yield|while|try|return|raise|print|pass|or|not|lambda|is|in|import|if|global|from|for|finally|exec|except|else|elif|del|def|continue|class|break|assert|as|and)(?![a-zA-Z0-9_])'),
        PatternRule(name=r"builtin", pattern=r'(?<!\.)(?:zip|xrange|vars|unicode|unichr|type|tuple|super|sum|str|staticmethod|sorted|slice|setattr|set|round|repr|reduce|raw_input|range|property|pow|ord|open|oct|object|max|min|map|long|locals|list|len|iter|issubclass|isinstance|int|input|id|hex|hash|hasattr|globals|getattr|frozenset|float|filter|file|execfile|eval|enumerate|divmod|dir|dict|delattr|complex|compile|coerce|cmp|classmethod|chr|callable|bool)(?![a-zA-Z0-9_])'),
        PatternRule(name=r'methodcall', pattern=r'(?<=\. )[a-zA-Z_][a-zA-Z0-9_]*(?= *\()'),
        PatternRule(name=r'functioncall', pattern=r'[a-zA-Z_][a-zA-Z0-9_]*(?= *\()'),
        PatternRule(name=r'system_identifier', pattern=r'__[a-zA-Z0-9_]+__'),
        PatternRule(name=r'private_identifier', pattern=r'__[a-zA-Z0-9_]*'),
        PatternRule(name=r'hidden_identifier', pattern=r'_[a-zA-Z0-9_]*'),
        PatternRule(name=r'identifier', pattern=r'[a-zA-Z_][a-zA-Z0-9_]*'),
        PatternRule(name=r'delimiter', pattern=r'\(|\)|\[|\]|{|}|@|,|:|\.|`|=|;|\+=|-=|\*=|/=|//=|%=|&=|\|=|\^=|>>=|<<=|\*\*='),
        PatternRule(name=r"operator", pattern=r"\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%"),
        PatternRule(name=r"integer", pattern=r"(?<![\.0-9a-zA-Z_])(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?(?![\.0-9a-zA-Z_])"),
        PatternRule(name=r"float", pattern=r"(?<![\.0-9a-zA-Z_])(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)(?![\.0-9a-zA-Z_])"),
        PatternRule(name=r"imaginary", pattern=r"(?<![\.0-9a-zA-Z_])(?:[0-9]+|(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)[jJ])(?![\.0-9a-zA-Z_])"),
        RegionRule(name=r'string', start=r'"""', grammar=StringGrammar(), end=r'"""'),
        RegionRule(name=r'string', start=r"'''", grammar=StringGrammar(), end=r"'''"),
        RegionRule(name=r'string', start=r'"', grammar=StringGrammar(), end=r'"'),
        RegionRule(name=r'string', start=r"'", grammar=StringGrammar(), end=r"'"),
        PatternRule(name=r'comment', pattern=r'#.*$'),
        PatternRule(name=r'continuation', pattern=r'\\$'),
    ]

class PythonTabber(tab2.StackTabber):
    endlevel_names   = ('pass', 'return', 'yield', 'raise', 'break', 'continue')
    startlevel_names = ('if', 'try', 'class', 'def', 'for', 'while', 'try')
    def __init__(self, m):
        tab2.StackTabber.__init__(self, m)
        self.base_level = 0

    def is_base(self, y):
        if y == 0:
            # we always know that line 0 is indented at the 0 level
            return True
        tokens = self.get_tokens(y)
        if not tokens:
            # if a line has no tokens, we don't know much about its indentation
            return False
        elif tokens[0].name in self.startlevel_names:
            # if a line has no whitespace and beings with something like
            # 'while','class','def','if',etc. then we can start at it
            return True
        else:
            # otherwise, we can't be sure that its level is correct
            return False

    def get_level(self, y):
        self._calc_level(y)
        return self.lines.get(y)

    def _calc_level(self, y):
        # ok, so first remember where we are going, and find our starting point
        target = y
        while not self.is_base(y) and y > 0:
            y -= 1

        # ok, so clear out our stack and then loop over each line
        self.popped = False
        self.markers = []
        while y <= target:
            self.continued   = False
            self.last_popped = self.popped
            self.popped      = False
            tokens           = self.get_tokens(y)
            currlvl          = self.get_curr_level()
            # if we were continuing, let's pop that previous continuation token
            # and note that we're continuing
            if self.markers and self.markers[-1].name == 'cont':
                raise Exception, repr(self.markers)
                self.continued = True
                self._pop()
            # if we haven't reached the target-line yet, we can detect how many
            # levels of unindention, if any, the user chose on previous lines
            if y < target and tokens:
                if self.token_is_whitespace(y, 0):
                    l = len(tokens[0].string)
                else:
                    l = 0
                while currlvl > l:
                    self._pop()
                    currlvl = self.get_curr_level()
                    self.popped = True
            # ok, having done all that, we can now process each token on the line
            for i in range(0, len(tokens)):
                currlvl = self._handle_token(currlvl, y, i)
            # so let's store the level for this line, as well as some debugging
            self.lines[y]  = currlvl
            self.record[y] = tuple(self.markers)
            y += 1

    def _handle_close_token(self, currlvl, y, i):
        try:
            return tab2.StackTabber._handle_close_token(self, currlvl, y, i)
        except:
            return currlvl

    def _handle_other_token(self, currlvl, y, i):
        token  = self.get_token(y, i)
        fqname = token.fqname()
        if fqname == 'continuation':
            # we need to pop the indentation level over, unless last line was
            # also a continued line
            if self.continued:
                self._opt_append('cont', currlvl)
            else:
                self._opt_append('cont', currlvl + 4)
        elif fqname == 'string.start':
            # while inside of a string, there is no indention leve
            self._opt_append('string', None)
        elif fqname == 'string.end':
            # since we're done with the string, resume our indentation level
            self._opt_pop('string')
        elif fqname == 'delimiter':
            # we only reall care about a colon as part of a one-line statement,
            # i.e.   "while ok: foo()" or "if True: print 3"
            if token.string == ':':
                if self.markers and self.markers[-1].name in ('[', '{'):
                    pass
                elif self.is_rightmost_token(y, i):
                    pass
                else:
                    self._pop()
        elif fqname == 'keyword':
            if token.string in self.endlevel_names:
                # we know we'll unindent at least once
                self._pop()
                self.popped = True
            elif token.string in self.startlevel_names and self.is_leftmost_token(y, i):
                # we know we will indent exactly once
                self._append(token.string, currlvl + 4)
            elif token.string in ('elif', 'else') and self.is_leftmost_token(y, i):
                # we know we'll unindent at least to the first if/elif
                if not self.popped and not self.last_popped:
                    self._pop_until('if', 'elif')
                    currlvl = self.get_curr_level()
                self._append(token.string, currlvl + 4)
            elif token.string == 'except' and self.is_leftmost_token(y, i):
                # we know we'll unindent at least to the first try
                if not self.popped and not self.last_popped:
                    self._pop_until('try')
                    currlvl = self.get_curr_level()
                self._append(token.string, currlvl + 4)
            elif token.string == 'finally' and self.is_leftmost_token(y, i):
                # we know we'll unindent at least to the first try/except
                if not self.popped and not self.last_popped:
                    self._pop_until('try', 'except')
                    currlvl = self.get_curr_level()
                self._append(token.string, currlvl + 4)
        return currlvl

class Python(mode2.Fundamental):
    tabbercls  = PythonTabber
    grammar    = PythonGrammar()
    opentoken  = 'delimiter'
    opentags   = {'(': ')', '[': ']', '{': '}'}
    closetoken = 'delimiter'
    closetags  = {')': '(', ']': '[', '}': '{'}
    def __init__(self, w):
        mode2.Fundamental.__init__(self, w)
        # tag matching
        self.add_bindings('close-paren', (')',))
        self.add_bindings('close-brace', ('}',))
        self.add_bindings('close-bracket', (']',))
        # add python-specific methods
        self.add_action_and_bindings(PythonCheckSyntax(), ('C-c s',))
        self.add_action_and_bindings(PythonDictCleanup(), ('C-c h',))
        self.add_action_and_bindings(PythonUpdateTags(), ('C-c t',))
        self.add_action_and_bindings(PythonTagComplete(), ('C-c k',))
        # highlighting
        self.colors = {
            'keyword':           color.build('cyan', 'default'),
            #'reserved':          color.build('cyan', 'default'),
            'reserved':          color.build('magenta', 'default'),
            'builtin':           color.build('cyan', 'default'),
            'functionname':      color.build('blue', 'default'),
            'classname':         color.build('green', 'default'),
            'string.start':      color.build('green', 'default'),
            'string.null':       color.build('green', 'default'),
            'string.octal':      color.build('magenta', 'default'),
            'string.escaped':    color.build('magenta', 'default'),
            'string.format':     color.build('yellow', 'default'),
            'string.end':        color.build('green', 'default'),
            'integer':           color.build('default', 'default'),
            'float':             color.build('default', 'default'),
            'imaginary':         color.build('default', 'default'),
            'comment':           color.build('red', 'default'),
            'continuation':      color.build('red', 'default'),
            'system_identifier': color.build('cyan', 'default'),
        }
        self.pythonlib = "."
    def name(self):
        return "Python"

class PythonSetLib(method.Method):
    '''Set the path(s) to find perl modules'''
    def _args(self):
        return [method.Argument("lib", type=type(""), prompt="Python Path: ",
                                default=default.build_constant("."))]
    def _execute(self, w, **vargs):
        w.mode.pythonlib = vargs['lib']

class PythonCheckSyntax(method.Method):
    '''Check the syntax of the current python file'''
    def _execute(self, w, **vargs):
        mod = os.path.splitext(os.path.basename(w.buffer.path))[0]
        cmd = "PYTHONPATH=%s python -c 'import %s'" % (w.mode.pythonlib, mod)
        (status, output) = commands.getstatusoutput(cmd)
        if status == 0:
            w.application.set_error("Syntax OK")
            w.application.data_buffer("python-syntax", output, switch_to=False)
        else:
            output = output + "\ncommand exit status: %d" % (status)
            w.application.data_buffer("python-syntax", output, switch_to=True)

class PythonUpdateTags(method.Method):
    '''Update the CTag data associated with a python buffer'''
    def _args(self):
        return [method.Argument("lib", prompt="Module Base: ", datatype='path',
                                default=default.build_constant("."))]
    def _execute(self, w, **vargs):
        w.mode.ctagger = ctag_python.PythonCTagger()
        w.mode.ctagger.process_paths([vargs['lib']])
        w.application.set_error('Tag data updated')

class PythonTagComplete(method.Method):
    '''Complete a symbol using tag data'''
    def _execute(self, w, **vargs):
        if not w.mode.ctagger.packages:
            w.application.methods['python-update-tags'].execute(w)
            return

        cursor = w.logical_cursor()
        b      = w.buffer
        line   = b.lines[cursor.y]
        end    = cursor.x
        start  = cursor.x

        word_chars = string.letters + string.digits + '_'
        if start == 0:
            w.application.set_error('walrus 1')
            return

        c = line[start - 1]
        if c == '(':
            w.application.set_error('goldfinch 1')
            return
        elif c not in word_chars:
            w.application.set_error('walrus 2')
            return

        while start > 0 and line[start - 1] in word_chars:
            start -= 1
        if start == end:
            w.application.set_error('walrus 3')
            return
        word = line[start:end]

        candidates = []
        seen = sets.Set()
        for p in w.mode.ctagger.packages.iterkeys():
            if p.startswith(word):
                if p in seen:
                    continue
                candidates.append(p)
                seen.add(p)
        for e in w.mode.ctagger.entries.itervalues():
            if e.symbol.startswith(word):
                if e.symbol in seen:
                    continue
                candidates.append(e.symbol)
                seen.add(e.symbol)
        if len(candidates) == 0:
            w.application.set_error('No match: %r' % word)
            return
        elif len(candidates) == 1:
            newword = candidates[0]
            if word == newword:
                w.application.set_error('Already completed!')
                return
            else:
                w.application.set_error('Unique match!')
        else:
            newword = completer.find_common_string(candidates)
            w.application.set_error('Ambiguous match: %r' % (candidates))
        b.delete_string(Point(start, cursor.y), Point(end, cursor.y))
        b.insert_string(Point(start, cursor.y), newword)

class PythonDictCleanup(method.Method):
    '''Align assignment blocks and literal dictionaries'''
    def _execute(self, w, **vargs):
        cursor = w.logical_cursor()
        b = w.buffer

        # so this is where we will store the groups that we find
        groups_by_line = {}

        # the regex we will try
        regexes = [regex.python_dict_cleanup,
                   regex.python_assign_cleanup]

        # if we aren't in a hash, inform the user and exit
        line = b.lines[cursor.y]
        myregex = None
        for r in regexes:
            if r.match(line):
                myregex = r

        if myregex is None:
            raise Exception, "Not a python dict line"

        groups_by_line[cursor.y] = myregex.match(line).groups()

        # find the beginning of this hash block
        start = 0
        i = cursor.y - 1
        while i >= 0:
            line = b.lines[i]
            m = myregex.match(line)
            if not m:
                start = i + 1
                break
            else:
                groups_by_line[i] = m.groups()
            i -= 1

        # find the end of this hash block
        end = len(b.lines) - 1
        i = cursor.y + 1
        while i < len(b.lines):
            line = b.lines[i]
            m = myregex.match(line)
            if not m:
                end = i - 1
                break
            else:
                groups_by_line[i] = m.groups()
            i += 1

        # assume that the least indented line is correct
        indent_w = min([len(groups_by_line[k][0]) for k in groups_by_line])

        # find the longest hash key to base all the other padding on
        key_w = max([len(groups_by_line[k][1]) for k in groups_by_line])

        # for each line, format it correctly
        keys = groups_by_line.keys()
        keys.sort()
        data = ''
        for i in keys:
            indent_pad = ' ' * indent_w
            key = groups_by_line[i][1]
            sep = groups_by_line[i][3]
            value = groups_by_line[i][5]
            key_pad = ' ' * (key_w - len(key))
            if sep == '=':
                data += indent_pad + key + key_pad + ' ' + sep + ' ' + value + '\n'
            else:
                data += indent_pad + key + sep + ' ' + key_pad + value + '\n'

        # remove the old text and add the new
        start_p = Point(0, start)
        end_p = Point(0, end + 1)
        w.kill(start_p, end_p)
        w.insert_string(start_p, data)