pmacs3/highlight.py

import point

# to be clear:

# tokens are generated by the lexer from the buffer, and correspond to lexical
# information about a logical portion of the buffer.
# regions are derived from a combination of the lexical tokens (which correspond
# to the logical buffer) and the physical line endings (i.e. dependent on screen
# width, etc.)
class Highlighter:
    '''class used by modes to manage syntax highlighting'''
    def __init__(self, m):
        self.mode = m
        self.tokens = None
        self.regions = None

    def invalidate_tokens(self):
        self.tokens = None
        self.invalidate_regions()

    def invalidate_regions(self):
        self.regions = None

    def invalidate_token_range(self, start_offset, end_offset, m, n, diff):
        # fix all the tokens to update their offsets, and clean out
        # a token which spans the change
        offset = start_offset
        i = 0
        last_index_before = None
        first_index_after = None
        while i < len(self.tokens):
            t = self.tokens[i]
            t.debug = False
            if t.end <= start_offset:
                last_index_before = i
                i += 1
            elif t.start >= end_offset:
                if first_index_after is None:
                    first_index_after = i
                t.start += diff
                t.end += diff
                i += 1
            else:
                if offset == start_offset:
                    offset = self.tokens[i].start
                del self.tokens[i]

        # delete m tokens further forward
        for i in range(0, m):
            if first_index_after is None:
                break
            elif first_index_after > len(self.tokens):
                del self.tokens[first_index_after]
            elif first_index_after == len(self.tokens):
                del self.tokens[first_index_after]
                first_index_after = None

        # delete n tokens further back
        for i in range(0, n):
            if last_index_before is None:
                break
            elif last_index_before > 0:
                del self.tokens[last_index_before]
                last_index_before -= 1
            elif last_index_before == 0:
                del self.tokens[0]
                last_index_before = None
                break

        return (last_index_before, first_index_after)

    def reparse_region(self, last_index_before, first_index_after):
        i = last_index_before
        if i is None:
            i = 0
            tokens_before = False
            start_offset = 0
        else:
            tokens_before = True
            start_offset = self.tokens[i].start

        j = first_index_after
        if j is None or j >= len(self.tokens):
            j = -1
            tokens_after = False
            end_offset = None
        else:
            tokens_after = True
            end_offset = self.tokens[j].end

        # FIXME
        # new things the strategy should do include:
        #   1. not generating the huge "data" string
        #   2. really generating the "roll-back" with
        #      data not just by rolling back the index
        #      of the lexer
        #   3. pass in only as much data as you need
        #      to do the minimal check, and for the
        #      "after the change" checking, use append
        #      to strategically keep the string 1-2
        #      tokens ahead of where it needs to be
        #data = self.mode.window.buffer.make_string()
        #self.mode.lexer.lex(data, start_offset)
        if self.tokens:
            buf_index = max(self.tokens[i].start - 100, 0)
        else:
            buf_index = 0
        if end_offset is None:
            data = self.mode.window.buffer.make_string(start=buf_index, end=None)
        else:
            data = self.mode.window.buffer.make_string(start=buf_index,
                                                       end=end_offset + 100)
        self.mode.lexer.lex(data, start_offset - buf_index, buf_index)
        saved_t = False

        while True:
            if saved_t is True:
                # we want to retry t agagin
                saved_t = False
            else:
                try:
                    t = self.mode.lexer.next()
                    if t is None:
                        continue
                except:
                    # we have no more tokens, so delete whatever was left and
                    # then return
                    if i < len(self.tokens):
                        del self.tokens[i:]
                    self.mode.lexer.lex()
                    return

            if i >= len(self.tokens):
                # we don't have any old tokens this far out, so just keep
                t.debug = True
                self.tokens.append(t)
                i += 1
            elif t.end <= self.tokens[i].start:
                # we shouldn't get here if we are before the change
                assert not tokens_before
                # the token is before our tokens, so we can just add it
                t.debug = True
                self.tokens.insert(i, t)
                i += 1
            elif t.start == self.tokens[i].start and \
                 t.end == self.tokens[i].end and \
                 t.name == self.tokens[i].name:
                # the token is identical to ours, so we can either
                # stop if we are after the change, or confirm the
                # start point if we are before
                if tokens_before:
                    tokens_before = False
                    i += 1
                else:
                    self.tokens[i].debug = True
                    self.mode.lexer.lex()
                    return
            else:
                if i < len(self.tokens):
                    del self.tokens[i]
                if tokens_before and i < 0:
                    raise Exception, "oh no!"
                    # we need to keep sliding our window back
                    i -= 1
                    start_offset = self.tokens[i].start
                    self.mode.lexer.lex(data, start_offset)
                elif tokens_before:
                    # ok, now we aren't sliding our window back
                    # and can proceed normally
                    tokens_before = False
                    saved_t = True
                else:
                    # the new token conflicts with the old one, so delete
                    # the old one and try again
                    saved_t = True

        raise Exception, "we should never get here (dolphin 2)"

    def _region_changed_slow(self):
        self.invalidate_tokens()
        self.get_regions()
        return

    def _region_added_dumb(self, p, xdiff, ydiff, s):
        self.invalidate_regions()

        # calculate the start and end offsets of the change, and the
        # difference to the length of the whole data string
        start_offset = self.mode.window.buffer.get_point_offset(p)
        end_offset = start_offset
        assert (xdiff > 0 and ydiff >= 0) or ydiff > 0

        if ydiff > 0:
            p2 = point.Point(p.x + xdiff, p.y + ydiff)
        elif ydiff == 0:
            p2 = point.Point(p.x + xdiff, p.y)
        new_offset = self.mode.window.buffer.get_point_offset(p2)
        diff = new_offset - start_offset
        assert diff > 0

        # move the tokens start and end points so that the additions
        # (while not being correct) won't break the existing
        # highlighting
        for t in self.tokens:
            t.debug = False
            if t.end <= start_offset:
                pass
            elif t.start >= end_offset:
                t.start += diff
                t.end += diff
            else:
                t.end += diff

    def _region_added_complex(self, p, xdiff, ydiff, s):
        self.invalidate_regions()

        # calculate the start and end offsets of the change, and the
        # difference to the length of the whole data string
        start_offset = self.mode.window.buffer.get_point_offset(p)
        end_offset = start_offset
        assert ydiff >= 0
        if ydiff > 0:
            p2 = point.Point(p.x + xdiff, p.y + ydiff)
        elif ydiff == 0:
            p2 = point.Point(p.x + xdiff, p.y)
        new_offset = self.mode.window.buffer.get_point_offset(p2)
        diff = new_offset - start_offset

        (i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 1, diff)
        #(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 2, diff)
        self.reparse_region(i, j)

    def region_added(self, p, xdiff, ydiff, s):
        if s == ' ' or s == '    ':
            self._region_added_dumb(p, xdiff, ydiff, s)
        else:
            self._region_added_complex(p, xdiff, ydiff, s)

    def _region_removed_dumb(self, p1, p2, s):
        self.invalidate_regions()

        # calculate the start and end offsets of the change, and the
        # difference to the length of the whole data string
        #diff = r
        diff = len(s)
        start_offset = self.mode.window.buffer.get_point_offset(p1)
        end_offset = start_offset + diff

        # move the tokens start and end points so that the additions
        # (while not being correct) won't break the existing
        # highlighting
        i = 0
        while i < len(self.tokens):
            t = self.tokens[i]
            t.debug = False

            # if our token contains a trailing newline, certain
            # deletions may not match unless we pretend that the end
            # is one character earlier
            if t.string.endswith('\n'):
                t_end = t.end - 1
            else:
                t_end = t.end

            if t_end <= start_offset:
                pass
            elif t.start >= start_offset and t_end <= end_offset:
                del self.tokens[i]
                continue
            elif t_end >= start_offset and t_end <= end_offset:
                t.end = start_offset
            elif t.start >= start_offset and t.start <= end_offset:
                t.start = end_offset
            else:
                t.start -= diff
                t.end -= diff
            if t.start == t.end:
                del self.tokens[i]
                continue
            else:
                assert t.start < t.end
                i += 1

    def _region_removed_complex(self, p1, p2, s):
        self.invalidate_regions()

        # calculate the start and end offsets of the change, and the
        # difference to the length of the whole data string
        diff = len(s)
        start_offset = self.mode.window.buffer.get_point_offset(p1)
        end_offset = start_offset + diff

        (i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 1, -diff)
        #(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 2, -diff)
        self.reparse_region(i, j)

    def region_removed(self, p1, p2, s):
        self._region_removed_complex(p1, p2, s)

    def get_tokens(self):
        if self.tokens is None:
            self.lex_buffer()
        return self.tokens

    def lex_buffer(self):
        '''lexes the buffer according to the grammar'''
        if (not hasattr(self.mode, "grammar") or self.mode.grammar is None or
            not hasattr(self.mode, "lexer") or self.mode.lexer is None):
            self.tokens = []
            return

        self.mode.lexer.lex(self.mode.window.buffer.make_string())

        self.tokens = []
        for token in self.mode.lexer:
            if token is not None:
                self.tokens.append(token)

    def get_regions(self):
        def endloop(line, pindex, plines):
            '''helper method for get_regions'''
            self.regions.append([])
            o = offset + len(line) + 1
            if (pindex < len(plines) and
                self.mode.window._physical_lines_cont[pindex]):
                # in this case we don't skip the newline
                o -= 1
            p = pindex + 1
            return o, p

        self.get_tokens()

        if self.regions is None:
            plines = self.mode.window.get_physical_lines()

            tindex = 0 # token index
            offset = 0 # string offset
            pindex = 0 # physical index

            self.regions = [[]]

            # looping over the physical lines
            while pindex < len(plines):
                last = 0
                line = plines[pindex]

                # figure out if we have a current token, and if so, which one
                if tindex < len(self.tokens):
                    t = self.tokens[tindex]
                else:
                    t = None

                # if the current line doesn't contain a token, then
                # make a default color token for that line and
                # continue
                if type(t) == type(""):
                    raise Exception, repr(t)
                if t is None or t.start >= offset + len(line):
                    r = Region(0, len(line), self.mode.default_color, line, '', None)
                    self.regions[-1].append(r)
                    offset, pindex = endloop(line, pindex, plines)
                    continue

                # looping over the tokens on a physical line
                while t is not None and t.start < offset + len(line):
                    if t.start > offset + last:
                        assert last <= t.start - offset, \
                            "iegjeigje (%d <= %d)" % (last, t.start - offset)
                        # there is uncolored space before/between the token(s)
                        r = Region(last, t.start - offset,
                                   self.mode.default_color,
                                   line[last:t.start - offset], '', None)
                        self.regions[-1].append(r)
                        last = t.start - offset

                    color = self.mode.colors.get(t.name, self.mode.default_color)
                    if t.debug:
                        # this is useful for seeing which places get relexed
                        #color = self.mode.colors.get('bizzaro', self.mode.default_color)
                        pass

                    # in the case of a multiline token, looping over
                    # the lines it spans and incrementing as in the upper
                    # loop...
                    while t.end > offset + len(line):
                        assert last <= len(line), \
                            "jjjjccccc (%d <= %d)" % (last, len(line))
                        r = Region(last, len(line), color, line[last:], t.name, t)
                        self.regions[-1].append(r)
                        last = 0
                        offset, pindex = endloop(line, pindex, plines)
                        if pindex >= len(plines):
                            # huh???
                            raise Exception, "fuck me"
                            return self.regions
                        else:
                            line = plines[pindex]

                    assert last <= t.end - offset, \
                        "bbjjgjg (%d <= %d - %d)" % (last, t.end, offset)
                    r = Region(last, t.end - offset, color, line[last:t.end-offset], t.name, t)
                    self.regions[-1].append(r)
                    last = t.end - offset

                    tindex += 1
                    if tindex < len(self.tokens):
                        t = self.tokens[tindex]
                    else:
                        t = None

                last = self.regions[-1][-1][1]
                offset, pindex = endloop(line, pindex, plines)

        return self.regions

class Region:
    index_to_attr = ['start', 'end', 'attr', 'value', 'name']
    def __init__(self, start, end, attr, value, name, token=None):
        self.start = start
        self.end   = end
        self.attr  = attr
        self.value = value
        self.name  = name
        self.token = token
    def __getitem__(self, i):
        return getattr(self, self.index_to_attr[i])
    def __repr__(self):
        return '<Region: %r, %r, %r, %r, %r>' % (self.start, self.end, self.attr,
                                                 self.value, self.name)