pmacs3/highlight2.py

import sys
import lex2

color_list = []
color_list.extend(['\033[3%dm' % x for x in range(0, 8)])
color_list.extend(['\033[3%d;1m' % x for x in range(0, 8)])
color_list.extend(['\033[0m'])

color_names = [
    'black', 'dred', 'dgreen', 'brown', 'dblue', 'dpurple', 'dcyan', 'lgrey',
    'dgrey', 'lred', 'lgreen', 'yellow', 'lblue', 'lpurple', 'lcyan', 'white',
    'unset',
]

color_dict ={}
for i in range(0, len(color_list)):
    color_dict[color_names[i]] = color_list[i]

class Highlighter:
    def __init__(self, lexer):
        self.lexer         = lexer
        self.tokens        = []
        self.line_contexts = {}

    def dump(self, fmt='(%3s, %2s) | %s'):
        print fmt % ('y', 'x', 'string')
        for i in range(0, len(self.tokens)):
            group = self.tokens[i]
            print 'LINE %d' % i
            for token in group:
                print fmt % (token.y, token.x, token.string)

    def display(self, token_colors={}, debug=False):
        for group in self.tokens:
            for token in group:
                color_name = None
                name_parts = token.name.split('.')
                for i in range(0, len(name_parts)):
                    if '.'.join(name_parts[i:]) in token_colors:
                        color_name = token_colors['.'.join(name_parts[i:])]
                        break
                if color_name is not None:
                    sys.stdout.write(color_dict[color_name])
                    pass
                elif debug:
                    raise Exception, "no highlighting for %r" % token.name
                else:
                    color_name = 'white'
                    sys.stdout.write(color_dict[color_name])
                sys.stdout.write(token.string)
            sys.stdout.write('\n')

    def highlight(self, lines):
        self.tokens        = [[] for l in lines]
        self.lexer.lex(lines, y=0, x=0)
        for token in self.lexer:
            self.tokens[token.y].append(token)
        self.line_contexts = dict(self.lexer.line_contexts)

    # relexing
    # ======================
    def relex(self, lines, y1, x1, y2, x2):
        # start the relexing process
        #self.lexer.lex(lines, y1, 0)
        rulecontexts = self.line_contexts[y1]
        self.lexer.resume(lines, y1, 0, rulecontexts)

        # these keep track of the current y coordinate, the current token index
        # on line[y], and the current "new token", respectively.
        y           = y1
        i           = 0
        getnext     = True
        new_token   = None

        while True:
            # if we have overstepped our bounds, then exit!
            if y >= len(lines):
                break

            # if we need another new_token, then try to get it.
            if getnext:
                try:
                    new_token = self.lexer.next()
                    getnext   = False
                except StopIteration:
                    # ok, so this means that ALL the rest of the tokens didn't
                    # show up, because we're done. so delete them and exit
                    for j in range(y, len(lines)):
                        del self.tokens[j][i:]
                        i = 0
                    break

            # if our next token is one a future line, we need to just get rid of
            # all our old tokens until we get there
            while new_token.y > y:
                del self.tokens[y][i:]
                i = 0
                y += 1

            # ok, so see if we have current tokens on this line; if so get it
            if i < len(self.tokens[y]):
                old_token = self.tokens[y][i]
                assert old_token.y == y, "%d == %d" % (old_token.y, y)
            else:
                old_token = None

            if old_token is None:
                # since we don't have a previous token at this location, just
                # insert the new one
                self.tokens[y].insert(i, new_token)
                i += 1
                getnext = True
            elif '.' not in old_token.name and old_token == new_token:
                # if they match, then leave the old one alone
                i += 1
                getnext = True
                if new_token.y >= y2 and new_token.end_x() >= x2:
                    # in this case, we can (probably) assume that the rest of
                    # the lines will lex the same way
                    break
            elif old_token.x < new_token.end_x():
                # ok, so we haven't gotten to this new token yet. obviously
                # this token never showed up in the new lexing, so delete it.
                del self.tokens[y][i]
            elif old_token.x >= new_token.end_x():
                # ok, this token is further out, so just insert the new token
                # ahead of it, move our counter out and continue
                self.tokens[y].insert(i, new_token)
                i += 1
                getnext = True
            else:
                # this should never happen
                raise Exception, "this isn't happening"


    # deletion
    # ======================
    def update_del(self, lines, y1, x1, y2, x2):
        assert y1 >= 0
        assert y1 <= y2
        assert y2 < len(lines)

        xdelta = x2 - x1
        ydelta = y2 - y1

        # construct a new token data structure; it will have one list for
        # every y index in lines. also, fill in tokens before the change
        newtokens = [[] for x in lines]
        for y in range(0, y1):
            newtokens[y] = self.tokens[y]

        # so for each line we currently have, we need to process every token,
        # transferring them from the old structure to the new, and modifying
        # them appropriately to take the change into account.
        for y in range(y1, len(self.tokens)):
            while self.tokens[y]:
                # so remove the token from the old structure, and figure out
                # where it stands in relation to the deletion
                token = self.tokens[y].pop(0)
                tx1 = token.x
                tx2 = token.x + len(token.string)

                # the notation "*|*| " refers to what the text spans, i.e.:
                # before|during|after the deletion
                if (y, tx2) <= (y1, x1):
                    # *| |
                    newtokens[y].append(token)
                elif (y, tx1) >= (y2, x2):
                    # | |*
                    token.y -= ydelta
                    if y == y2:
                        token.x -= xdelta
                    newtokens[token.y].append(token)
                elif (y, tx1) < (y1, x1):
                    if (y, tx2) <= (y2, x2):
                        # *|*|
                        token.string = token.string[:x1 - tx1]
                    else:
                        # *|*|*
                        token.string = token.string[:x1 - tx1] + token.string[x2 - tx1:]
                    newtokens[y].append(token)
                elif (y, tx1) < (y2, x2):
                    if (y, tx2) <= (y2, x2):
                        # |*|
                        pass
                    else:
                        # |*|*
                        token.x = x1
                        token.y -= ydelta
                        token.string = token.string[x2 - tx1:]
                        newtokens[token.y].append(token)
                else:
                    raise Exception, "this should never happen: %r" % token
        # ok, now that we have built a correct new structure, store a reference
        # to it instead.
        self.tokens = newtokens

    def relex_del(self, lines, y1, x1, y2, x2):
        # first let's update our existing tokens to fix their offsets, etc.
        self.update_del(lines, y1, x1, y2, x2)

        # then let's do some relexing
        self.relex(lines, y1, x1, y2, x2)

    # addition
    # ======================
    def update_add(self, lines, y1, x1, newlines):
        assert y1 >= 0
        assert len(newlines) > 0

        y2 = y1 + len(newlines) - 1
        if y2 == y1:
            x2 = x1 + len(newlines[0])
        else:
            x2 = len(newlines[-1])

        xdelta = x2 - x1
        ydelta = y2 - y1

        # construct a new token data structure, with the right number of lines
        newtokens = []
        for i in range(0, len(self.tokens) + ydelta):
            newtokens.append([])

        # copy the tokens that show up before the changed line
        for y in range(0, y1):
            newtokens[y] = self.tokens[y]

        # process the tokens that show up on the changed line
        post_change_list = []
        for t in self.tokens[y1]:
            tx1 = t.x
            tx2 = t.x + len(t.string)
            ty = t.y
            ts = t.string
            if tx2 <= x1:
                # '*| ' before the insertion
                newtokens[y1].append(t)
            elif tx1 >= x1:
                # ' |*' after the insertion
                t.x += xdelta
                t.y = y2
                post_change_list.append(t)
            else:
                # '*|*' around the insertion
                t1 = t.copy()
                t1.string = t.string[:x1 - tx1]
                newtokens[y1].append(t1)

                t2 = t.copy()
                t2.string = t.string[x1 - tx1:]
                t2.x = x2
                t2.y = y2
                post_change_list.append(t2)

        # add in the new data
        newtokens[y1].append(lex2.Token('new', '', y1, x1, newlines[0]))
        for i in range(1, len(newlines)):
            yi = y1 + i
            newtokens[yi].append(lex2.Token('new', '', yi, 0, newlines[i]))

        # add the post-change tokens back
        for t in post_change_list:
            newtokens[y2].append(t)

        # for each subsequent line, fix it's tokens' y coordinates
        for y in range(y1 + 1, len(self.tokens)):
            for t in self.tokens[y]:
                t.y += ydelta
                newtokens[t.y].append(t)

        # ok, now that we have built a correct new structure, store a reference
        # to it instead.
        self.tokens = newtokens

    def relex_add(self, lines, y1, x1, newlines):
        # first let's update our existing tokens to fix their offsets, etc.
        self.update_add(lines, y1, x1, newlines)

        # create some extra info that we need
        y2 = y1 + len(newlines) - 1
        if y2 == y1:
            x2 = x1 + len(newlines[0])
        else:
            x2 = len(newlines[-1])

        # now let's start the relexing process
        self.relex(lines, y1, x1, y2, x2)