pmacs3/highlight.py

import re, sys
from lex import Token

color_list = []
color_list.extend(['\033[3%dm' % x for x in range(0, 8)])
color_list.extend(['\033[3%d;1m' % x for x in range(0, 8)])
color_list.extend(['\033[0m'])

color_names = [
    'black', 'dred', 'dgreen', 'brown', 'dblue', 'dpurple', 'dcyan', 'lgrey',
    'dgrey', 'lred', 'lgreen', 'yellow', 'lblue', 'lpurple', 'lcyan', 'white',
    'unset',
]

color_dict ={}
for i in range(0, len(color_list)):
    color_dict[color_names[i]] = color_list[i]

def token_match(self, token, name, data=None):
    return token.fqname() == name and data is None or token.string == data
def token_match2(self, token, name, regex):
    return token.fqname() == name and regex.match(token.string)
def token_vmatch(self, token, *pairs):
    for (name, data) in pairs:
        if token_match(token, name, data):
            return True
    return False
def token_vmatch2(self, token, *pairs):
    for (name, regex) in pairs:
        if token_match(token, name, regex):
            return True
    return False

class Highlighter(object):
    def __init__(self, lexer):
        self.lexer  = lexer
        self.tokens = []

    def dump(self, fmt='(%3s, %2s) | %s'):
        print fmt % ('y', 'x', 'string')
        for i in range(0, len(self.tokens)):
            group = self.tokens[i]
            print 'LINE %d' % i
            for token in group:
                print fmt % (token.y, token.x, token.string)

    def display(self, token_colors={}, debug=False):
        for group in self.tokens:
            for token in group:
                color_name = None
                name_parts = token.name.split('.')
                for i in range(0, len(name_parts)):
                    if '.'.join(name_parts[i:]) in token_colors:
                        color_name = token_colors['.'.join(name_parts[i:])]
                        break
                if color_name is not None:
                    sys.stdout.write(color_dict[color_name])
                    pass
                elif debug:
                    raise Exception, "no highlighting for %r" % token.name
                else:
                    color_name = 'white'
                    sys.stdout.write(color_dict[color_name])
                sys.stdout.write(token.string)
            sys.stdout.write('\n')

    def delete_token(self, y, i):
        assert y < len(self.tokens), "%d < %d" % (y, len(self.tokens))
        assert i < len(self.tokens[y]), "%d < %d" % (i, len(self.tokens[i]))
        deleted = []
        deleted.append(self.tokens[y].pop(i))
        while y < len(self.tokens):
            while i < len(self.tokens[y]):
                while deleted and self.tokens[y][i].parent is not deleted[-1]:
                    del deleted[-1]
                if not deleted:
                    return
                elif self.tokens[y][i].parent is deleted[-1]:
                    deleted.append(self.tokens[y].pop(i))
                else:
                    raise Exception, "huh?? %r %r" % (self.tokens[y][i].parent,
                                                      deleted)
            i = 0
            y += 1

    def highlight(self, lines):
        self.tokens        = [[] for l in lines]
        for token in self.lexer.lex(lines, y=0, x=0):
            self.tokens[token.y].append(token)

    # relexing
    # ======================
    def relex(self, lines, y1, x1, y2, x2, token=None):
        if token:
            gen = self.lexer.resume(lines, y1, 0, token)
        else:
            gen = self.lexer.lex(lines, y1, 0)

        # these keep track of the current y coordinate, the current token index
        # on line[y], and the current "new token", respectively.
        y         = y1
        i         = 0
        getnext   = True
        new_token = None

        while True:
            # if we have overstepped our bounds, then exit!
            if y >= len(lines):
                break

            # if we need another new_token, then try to get it.
            if getnext:
                try:
                    new_token = gen.next()
                    getnext   = False
                except StopIteration:
                    # ok, so this means that ALL the rest of the tokens didn't
                    # show up, because we're done. so delete them and exit
                    for j in range(y, len(lines)):
                        del self.tokens[j][i:]
                        i = 0
                    break

            # if our next token is one a future line, we need to just get rid of
            # all our old tokens until we get there
            #onfuture = False
            while new_token.y > y:
                del self.tokens[y][i:]
                i = 0
                y += 1

            # ok, so see if we have current tokens on this line; if so get it
            if i < len(self.tokens[y]):
                old_token = self.tokens[y][i]
                assert old_token.y == y, "%d == %d" % (old_token.y, y)
            else:
                #raise Exception, "K %d %r" % (i, new_token)
                old_token = None

            if old_token is None:
                #raise Exception, "J %d %r" % (i, new_token)
                # since we don't have a previous token at this location, just
                # insert the new one
                self.tokens[y].insert(i, new_token)
                i += 1
                getnext = True
            elif old_token == new_token:
                # if they match, then leave the old one alone
                i += 1
                getnext = True
                if new_token.y > y2:
                    # in this case, we can be sure that the rest of the lines
                    # will lex the same way
                    break
            elif old_token.x < new_token.end_x():
                # ok, so we haven't gotten to this new token yet. obviously
                # this token never showed up in the new lexing, so delete it.
                del self.tokens[y][i]
            elif old_token.x >= new_token.end_x():
                # ok, this token is further out, so just insert the new token
                # ahead of it, move our counter out and continue
                self.tokens[y].insert(i, new_token)
                i += 1
                getnext = True
            else:
                # this should never happen
                raise Exception, "this isn't happening"
    
    # deletion
    # ======================
    def update_del(self, lines, y1, x1, y2, x2):
        assert y1 >= 0
        assert y1 <= y2

        # first let's delete any token who falls in the range of the change (or,
        # in the case of child tokens, whose parent is being deleted).
        y     = y1
        i     = 0
        done  = False
        if self.tokens[y1]:
            ctoken = self.tokens[y1][0]
        else:
            ctoken = None
        while not done:
            if y >= len(self.tokens):
                break
            if i < len(self.tokens[y]):
                # figure out if this token is in our range. notice that
                # delete_token() will take care of the need to recursively
                # delete children for us
                token = self.tokens[y][i]
                if token.y > y2 or y == y2 and token.x >= x2:
                    done = True
                elif token.y < y1 or token.y == y1 and token.x < x1:
                    i += 1
                else:
                    self.delete_token(y, i)
            y += 1
            i = 0

        # ok, so now we need to "adjust" the (x,y) coordinates of all the tokens
        # after the change. first we will copy over the pre-deletion tokens.
        newtokens = [[] for x in range(0, len(self.tokens) - y2 + y1)]

        for y in range(0, y1):
            for token in self.tokens[y]:
                newtokens[y].append(token)

        # then the tokens which occured on the same line as the end of the
        # deletion.
        for token in self.tokens[y1]:
            newtokens[y1].append(token)
        if y2 != y1:
            for token in self.tokens[y2]:
                token.x = token.x - x2 + x1
                token.y = y1
                newtokens[y1].append(token)

        # finally, we will copy over the tokens from subsequent lines
        for y in range(y2 + 1, len(self.tokens)):
            for token in self.tokens[y]:
                token.y = token.y - y2 + y1
                newtokens[y - y2 + y1].append(token)

        # now save our new tokens
        self.tokens = newtokens
        return ctoken
        
    def relex_del(self, lines, y1, x1, y2, x2):
        # first let's update our existing tokens to fix their offsets, etc.
        ctoken = self.update_del(lines, y1, x1, y2, x2)

        # then let's do some relexing
        self.relex(lines, y1, x1, y2, x2, ctoken)

    # addition
    # ======================
    def update_add(self, lines, y1, x1, newlines):
        assert y1 >= 0
        assert len(newlines) > 0

        y2 = y1 + len(newlines) - 1
        if y2 == y1:
            x2 = x1 + len(newlines[0])
        else:
            x2 = len(newlines[-1])

        xdelta = x2 - x1
        ydelta = y2 - y1
        if self.tokens[y1]:
            ctoken = self.tokens[y1][0]
        else:
            ctoken = None

        # construct a new token data structure, with the right number of lines
        newtokens = []
        for i in range(0, len(self.tokens) + ydelta):
            newtokens.append([])

        # copy the tokens that show up before the changed line
        for y in range(0, y1):
            newtokens[y] = self.tokens[y]

        # process the tokens that show up on the changed line
        post_change_list = []
        for t in self.tokens[y1]:
            tx1 = t.x
            tx2 = t.x + len(t.string)
            ty = t.y
            ts = t.string
            if tx2 <= x1:
                # '*| ' before the insertion
                newtokens[y1].append(t)
            elif tx1 >= x1:
                # ' |*' after the insertion
                t.x += xdelta
                t.y = y2
                post_change_list.append(t)
            else:
                # '*|*' around the insertion
                t1 = t.copy()
                t1.string = t.string[:x1 - tx1]
                newtokens[y1].append(t1)

                t2 = t.copy()
                t2.string = t.string[x1 - tx1:]
                t2.x = x2
                t2.y = y2
                post_change_list.append(t2)

        # add in the new data
        newtokens[y1].append(Token('new', '', y1, x1, newlines[0]))
        for i in range(1, len(newlines)):
            yi = y1 + i
            newtokens[yi].append(Token('new', '', yi, 0, newlines[i]))

        # add the post-change tokens back
        for t in post_change_list:
            newtokens[y2].append(t)

        # for each subsequent line, fix it's tokens' y coordinates
        for y in range(y1 + 1, len(self.tokens)):
            for t in self.tokens[y]:
                t.y += ydelta
                newtokens[t.y].append(t)

        # ok, now that we have built a correct new structure, store a reference
        # to it instead.
        self.tokens = newtokens
        return ctoken

    def relex_add(self, lines, y1, x1, newlines):
        # first let's update our existing tokens to fix their offsets, etc.
        ctoken = self.update_add(lines, y1, x1, newlines)

        # create some extra info that we need
        y2 = y1 + len(newlines) - 1
        if y2 == y1:
            x2 = x1 + len(newlines[0])
        else:
            x2 = len(newlines[-1])

        # now let's start the relexing process
        self.relex(lines, y1, x1, y2, x2, ctoken)
--HG-- branch : pmacs2 2007-10-21 20:55:43 -04:00			`import re, sys`
			`from lex import Token`

			`color_list = []`
			`color_list.extend(['\033[3%dm' % x for x in range(0, 8)])`
			`color_list.extend(['\033[3%d;1m' % x for x in range(0, 8)])`
			`color_list.extend(['\033[0m'])`

			`color_names = [`
			`'black', 'dred', 'dgreen', 'brown', 'dblue', 'dpurple', 'dcyan', 'lgrey',`
			`'dgrey', 'lred', 'lgreen', 'yellow', 'lblue', 'lpurple', 'lcyan', 'white',`
			`'unset',`
			`]`

			`color_dict ={}`
			`for i in range(0, len(color_list)):`
			`color_dict[color_names[i]] = color_list[i]`

			`def token_match(self, token, name, data=None):`
			`return token.fqname() == name and data is None or token.string == data`
			`def token_match2(self, token, name, regex):`
			`return token.fqname() == name and regex.match(token.string)`
			`def token_vmatch(self, token, *pairs):`
			`for (name, data) in pairs:`
			`if token_match(token, name, data):`
			`return True`
			`return False`
			`def token_vmatch2(self, token, *pairs):`
			`for (name, regex) in pairs:`
			`if token_match(token, name, regex):`
			`return True`
			`return False`

--HG-- branch : pmacs2 2008-03-14 17:17:04 -04:00			`class Highlighter(object):`
--HG-- branch : pmacs2 2007-10-21 20:55:43 -04:00			`def __init__(self, lexer):`
color-data-buffer exists --HG-- branch : pmacs2 2008-03-28 09:44:39 -04:00			`self.lexer = lexer`
			`self.tokens = []`
--HG-- branch : pmacs2 2007-10-21 20:55:43 -04:00
			`def dump(self, fmt='(%3s, %2s) \| %s'):`
			`print fmt % ('y', 'x', 'string')`
			`for i in range(0, len(self.tokens)):`
			`group = self.tokens[i]`
			`print 'LINE %d' % i`
			`for token in group:`
			`print fmt % (token.y, token.x, token.string)`

			`def display(self, token_colors={}, debug=False):`
			`for group in self.tokens:`
			`for token in group:`
			`color_name = None`
			`name_parts = token.name.split('.')`
			`for i in range(0, len(name_parts)):`
			`if '.'.join(name_parts[i:]) in token_colors:`
			`color_name = token_colors['.'.join(name_parts[i:])]`
			`break`
			`if color_name is not None:`
			`sys.stdout.write(color_dict[color_name])`
			`pass`
			`elif debug:`
			`raise Exception, "no highlighting for %r" % token.name`
			`else:`
			`color_name = 'white'`
			`sys.stdout.write(color_dict[color_name])`
			`sys.stdout.write(token.string)`
			`sys.stdout.write('\n')`

			`def delete_token(self, y, i):`
			`assert y < len(self.tokens), "%d < %d" % (y, len(self.tokens))`
			`assert i < len(self.tokens[y]), "%d < %d" % (i, len(self.tokens[i]))`
			`deleted = []`
			`deleted.append(self.tokens[y].pop(i))`
			`while y < len(self.tokens):`
			`while i < len(self.tokens[y]):`
			`while deleted and self.tokens[y][i].parent is not deleted[-1]:`
			`del deleted[-1]`
			`if not deleted:`
			`return`
			`elif self.tokens[y][i].parent is deleted[-1]:`
			`deleted.append(self.tokens[y].pop(i))`
			`else:`
			`raise Exception, "huh?? %r %r" % (self.tokens[y][i].parent,`
			`deleted)`
			`i = 0`
			`y += 1`

			`def highlight(self, lines):`
			`self.tokens = [[] for l in lines]`
			`for token in self.lexer.lex(lines, y=0, x=0):`
			`self.tokens[token.y].append(token)`

			`# relexing`
			`# ======================`
			`def relex(self, lines, y1, x1, y2, x2, token=None):`
			`if token:`
			`gen = self.lexer.resume(lines, y1, 0, token)`
			`else:`
			`gen = self.lexer.lex(lines, y1, 0)`

			`# these keep track of the current y coordinate, the current token index`
			`# on line[y], and the current "new token", respectively.`
improvements in relexing speed --HG-- branch : pmacs2 2008-04-05 14:06:49 -04:00			`y = y1`
			`i = 0`
			`getnext = True`
			`new_token = None`
--HG-- branch : pmacs2 2007-10-21 20:55:43 -04:00
			`while True:`
			`# if we have overstepped our bounds, then exit!`
			`if y >= len(lines):`
			`break`

			`# if we need another new_token, then try to get it.`
			`if getnext:`
			`try:`
			`new_token = gen.next()`
			`getnext = False`
			`except StopIteration:`
			`# ok, so this means that ALL the rest of the tokens didn't`
			`# show up, because we're done. so delete them and exit`
			`for j in range(y, len(lines)):`
			`del self.tokens[j][i:]`
			`i = 0`
			`break`

			`# if our next token is one a future line, we need to just get rid of`
			`# all our old tokens until we get there`
improvements in relexing speed --HG-- branch : pmacs2 2008-04-05 14:06:49 -04:00			`#onfuture = False`
--HG-- branch : pmacs2 2007-10-21 20:55:43 -04:00			`while new_token.y > y:`
			`del self.tokens[y][i:]`
			`i = 0`
			`y += 1`

			`# ok, so see if we have current tokens on this line; if so get it`
			`if i < len(self.tokens[y]):`
			`old_token = self.tokens[y][i]`
			`assert old_token.y == y, "%d == %d" % (old_token.y, y)`
			`else:`
			`#raise Exception, "K %d %r" % (i, new_token)`
			`old_token = None`

			`if old_token is None:`
			`#raise Exception, "J %d %r" % (i, new_token)`
			`# since we don't have a previous token at this location, just`
			`# insert the new one`
			`self.tokens[y].insert(i, new_token)`
			`i += 1`
			`getnext = True`
			`elif old_token == new_token:`
			`# if they match, then leave the old one alone`
			`i += 1`
			`getnext = True`
			`if new_token.y > y2:`
			`# in this case, we can be sure that the rest of the lines`
			`# will lex the same way`
			`break`
			`elif old_token.x < new_token.end_x():`
			`# ok, so we haven't gotten to this new token yet. obviously`
			`# this token never showed up in the new lexing, so delete it.`
			`del self.tokens[y][i]`
			`elif old_token.x >= new_token.end_x():`
			`# ok, this token is further out, so just insert the new token`
			`# ahead of it, move our counter out and continue`
			`self.tokens[y].insert(i, new_token)`
			`i += 1`
			`getnext = True`
			`else:`
			`# this should never happen`
			`raise Exception, "this isn't happening"`

			`# deletion`
			`# ======================`
			`def update_del(self, lines, y1, x1, y2, x2):`
			`assert y1 >= 0`
			`assert y1 <= y2`

			`# first let's delete any token who falls in the range of the change (or,`
			`# in the case of child tokens, whose parent is being deleted).`
			`y = y1`
			`i = 0`
			`done = False`
			`if self.tokens[y1]:`
			`ctoken = self.tokens[y1][0]`
			`else:`
			`ctoken = None`
			`while not done:`
			`if y >= len(self.tokens):`
			`break`
			`if i < len(self.tokens[y]):`
			`# figure out if this token is in our range. notice that`
			`# delete_token() will take care of the need to recursively`
			`# delete children for us`
			`token = self.tokens[y][i]`
			`if token.y > y2 or y == y2 and token.x >= x2:`
			`done = True`
			`elif token.y < y1 or token.y == y1 and token.x < x1:`
			`i += 1`
			`else:`
			`self.delete_token(y, i)`
			`y += 1`
			`i = 0`

			`# ok, so now we need to "adjust" the (x,y) coordinates of all the tokens`
			`# after the change. first we will copy over the pre-deletion tokens.`
			`newtokens = [[] for x in range(0, len(self.tokens) - y2 + y1)]`

			`for y in range(0, y1):`
			`for token in self.tokens[y]:`
			`newtokens[y].append(token)`

			`# then the tokens which occured on the same line as the end of the`
			`# deletion.`
			`for token in self.tokens[y1]:`
			`newtokens[y1].append(token)`
			`if y2 != y1:`
			`for token in self.tokens[y2]:`
			`token.x = token.x - x2 + x1`
			`token.y = y1`
			`newtokens[y1].append(token)`

			`# finally, we will copy over the tokens from subsequent lines`
			`for y in range(y2 + 1, len(self.tokens)):`
			`for token in self.tokens[y]:`
			`token.y = token.y - y2 + y1`
			`newtokens[y - y2 + y1].append(token)`

			`# now save our new tokens`
			`self.tokens = newtokens`
			`return ctoken`

			`def relex_del(self, lines, y1, x1, y2, x2):`
			`# first let's update our existing tokens to fix their offsets, etc.`
			`ctoken = self.update_del(lines, y1, x1, y2, x2)`

			`# then let's do some relexing`
			`self.relex(lines, y1, x1, y2, x2, ctoken)`

			`# addition`
			`# ======================`
			`def update_add(self, lines, y1, x1, newlines):`
			`assert y1 >= 0`
			`assert len(newlines) > 0`

			`y2 = y1 + len(newlines) - 1`
			`if y2 == y1:`
			`x2 = x1 + len(newlines[0])`
			`else:`
			`x2 = len(newlines[-1])`

			`xdelta = x2 - x1`
			`ydelta = y2 - y1`
			`if self.tokens[y1]:`
			`ctoken = self.tokens[y1][0]`
			`else:`
			`ctoken = None`

			`# construct a new token data structure, with the right number of lines`
			`newtokens = []`
			`for i in range(0, len(self.tokens) + ydelta):`
			`newtokens.append([])`

			`# copy the tokens that show up before the changed line`
			`for y in range(0, y1):`
			`newtokens[y] = self.tokens[y]`

			`# process the tokens that show up on the changed line`
			`post_change_list = []`
			`for t in self.tokens[y1]:`
			`tx1 = t.x`
			`tx2 = t.x + len(t.string)`
			`ty = t.y`
			`ts = t.string`
			`if tx2 <= x1:`
			`# '*\| ' before the insertion`
			`newtokens[y1].append(t)`
			`elif tx1 >= x1:`
			`# ' \|*' after the insertion`
			`t.x += xdelta`
			`t.y = y2`
			`post_change_list.append(t)`
			`else:`
			`# '\|' around the insertion`
			`t1 = t.copy()`
			`t1.string = t.string[:x1 - tx1]`
			`newtokens[y1].append(t1)`

			`t2 = t.copy()`
			`t2.string = t.string[x1 - tx1:]`
			`t2.x = x2`
			`t2.y = y2`
			`post_change_list.append(t2)`

			`# add in the new data`
			`newtokens[y1].append(Token('new', '', y1, x1, newlines[0]))`
			`for i in range(1, len(newlines)):`
			`yi = y1 + i`
			`newtokens[yi].append(Token('new', '', yi, 0, newlines[i]))`

			`# add the post-change tokens back`
			`for t in post_change_list:`
			`newtokens[y2].append(t)`

			`# for each subsequent line, fix it's tokens' y coordinates`
			`for y in range(y1 + 1, len(self.tokens)):`
			`for t in self.tokens[y]:`
			`t.y += ydelta`
			`newtokens[t.y].append(t)`

			`# ok, now that we have built a correct new structure, store a reference`
			`# to it instead.`
			`self.tokens = newtokens`
			`return ctoken`

			`def relex_add(self, lines, y1, x1, newlines):`
			`# first let's update our existing tokens to fix their offsets, etc.`
			`ctoken = self.update_add(lines, y1, x1, newlines)`

			`# create some extra info that we need`
			`y2 = y1 + len(newlines) - 1`
			`if y2 == y1:`
			`x2 = x1 + len(newlines[0])`
			`else:`
			`x2 = len(newlines[-1])`

			`# now let's start the relexing process`
			`self.relex(lines, y1, x1, y2, x2, ctoken)`