import point # to be clear: # tokens are generated by the lexer from the buffer, and correspond to lexical # information about a logical portion of the buffer. # regions are derived from a combination of the lexical tokens (which correspond # to the logical buffer) and the physical line endings (i.e. dependent on screen # width, etc.) class Highlighter: '''class used by modes to manage syntax highlighting''' def __init__(self, m): self.mode = m self.tokens = None self.regions = None def invalidate_tokens(self): self.tokens = None self.invalidate_regions() def invalidate_regions(self): self.regions = None def invalidate_token_range(self, start_offset, end_offset, m, n, diff): # fix all the tokens to update their offsets, and clean out # a token which spans the change offset = start_offset i = 0 last_index_before = None first_index_after = None while i < len(self.tokens): t = self.tokens[i] t.debug = False if t.end <= start_offset: last_index_before = i i += 1 elif t.start >= end_offset: if first_index_after is None: first_index_after = i t.start += diff t.end += diff i += 1 else: if offset == start_offset: offset = self.tokens[i].start del self.tokens[i] # delete m tokens further forward for i in range(0, m): if first_index_after is None: break elif first_index_after > len(self.tokens): del self.tokens[first_index_after] elif first_index_after == len(self.tokens): del self.tokens[first_index_after] first_index_after = None # delete n tokens further back for i in range(0, n): if last_index_before is None: break elif last_index_before > 0: del self.tokens[last_index_before] last_index_before -= 1 elif last_index_before == 0: del self.tokens[0] last_index_before = None break return (last_index_before, first_index_after) def reparse_region(self, last_index_before, first_index_after): i = last_index_before if i is None: i = 0 tokens_before = False start_offset = 0 else: tokens_before = True start_offset = self.tokens[i].start j = first_index_after if j is None or j >= len(self.tokens): j = -1 tokens_after = False end_offset = None else: tokens_after = True end_offset = self.tokens[j].end # FIXME # new things the strategy should do include: # 1. not generating the huge "data" string # 2. really generating the "roll-back" with # data not just by rolling back the index # of the lexer # 3. pass in only as much data as you need # to do the minimal check, and for the # "after the change" checking, use append # to strategically keep the string 1-2 # tokens ahead of where it needs to be #data = self.mode.window.buffer.make_string() #self.mode.lexer.lex(data, start_offset) if self.tokens: buf_index = max(self.tokens[i].start - 100, 0) else: buf_index = 0 if end_offset is None: data = self.mode.window.buffer.make_string(start=buf_index, end=None) else: data = self.mode.window.buffer.make_string(start=buf_index, end=end_offset + 100) self.mode.lexer.lex(data, start_offset - buf_index, buf_index) saved_t = False while True: if saved_t is True: # we want to retry t agagin saved_t = False else: try: t = self.mode.lexer.next() if t is None: continue except: # we have no more tokens, so delete whatever was left and # then return if i < len(self.tokens): del self.tokens[i:] self.mode.lexer.lex() return if i >= len(self.tokens): # we don't have any old tokens this far out, so just keep t.debug = True self.tokens.append(t) i += 1 elif t.end <= self.tokens[i].start: # we shouldn't get here if we are before the change assert not tokens_before # the token is before our tokens, so we can just add it t.debug = True self.tokens.insert(i, t) i += 1 elif t.start == self.tokens[i].start and \ t.end == self.tokens[i].end and \ t.name == self.tokens[i].name: # the token is identical to ours, so we can either # stop if we are after the change, or confirm the # start point if we are before if tokens_before: tokens_before = False i += 1 else: self.tokens[i].debug = True self.mode.lexer.lex() return else: if i < len(self.tokens): del self.tokens[i] if tokens_before and i < 0: raise Exception, "oh no!" # we need to keep sliding our window back i -= 1 start_offset = self.tokens[i].start self.mode.lexer.lex(data, start_offset) elif tokens_before: # ok, now we aren't sliding our window back # and can proceed normally tokens_before = False saved_t = True else: # the new token conflicts with the old one, so delete # the old one and try again saved_t = True raise Exception, "we should never get here (dolphin 2)" def _region_changed_slow(self): self.invalidate_tokens() self.get_regions() return def _region_added_dumb(self, p, xdiff, ydiff, s): self.invalidate_regions() # calculate the start and end offsets of the change, and the # difference to the length of the whole data string start_offset = self.mode.window.buffer.get_point_offset(p) end_offset = start_offset assert (xdiff > 0 and ydiff >= 0) or ydiff > 0 if ydiff > 0: p2 = point.Point(p.x + xdiff, p.y + ydiff) elif ydiff == 0: p2 = point.Point(p.x + xdiff, p.y) new_offset = self.mode.window.buffer.get_point_offset(p2) diff = new_offset - start_offset assert diff > 0 # move the tokens start and end points so that the additions # (while not being correct) won't break the existing # highlighting for t in self.tokens: t.debug = False if t.end <= start_offset: pass elif t.start >= end_offset: t.start += diff t.end += diff else: t.end += diff def _region_added_complex(self, p, xdiff, ydiff, s): self.invalidate_regions() # calculate the start and end offsets of the change, and the # difference to the length of the whole data string start_offset = self.mode.window.buffer.get_point_offset(p) end_offset = start_offset assert ydiff >= 0 if ydiff > 0: p2 = point.Point(p.x + xdiff, p.y + ydiff) elif ydiff == 0: p2 = point.Point(p.x + xdiff, p.y) new_offset = self.mode.window.buffer.get_point_offset(p2) diff = new_offset - start_offset (i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 1, diff) #(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 2, diff) self.reparse_region(i, j) def region_added(self, p, xdiff, ydiff, s): if s == ' ' or s == ' ': self._region_added_dumb(p, xdiff, ydiff, s) else: self._region_added_complex(p, xdiff, ydiff, s) def _region_removed_dumb(self, p1, p2, s): self.invalidate_regions() # calculate the start and end offsets of the change, and the # difference to the length of the whole data string #diff = r diff = len(s) start_offset = self.mode.window.buffer.get_point_offset(p1) end_offset = start_offset + diff # move the tokens start and end points so that the additions # (while not being correct) won't break the existing # highlighting i = 0 while i < len(self.tokens): t = self.tokens[i] t.debug = False # if our token contains a trailing newline, certain # deletions may not match unless we pretend that the end # is one character earlier if t.string.endswith('\n'): t_end = t.end - 1 else: t_end = t.end if t_end <= start_offset: pass elif t.start >= start_offset and t_end <= end_offset: del self.tokens[i] continue elif t_end >= start_offset and t_end <= end_offset: t.end = start_offset elif t.start >= start_offset and t.start <= end_offset: t.start = end_offset else: t.start -= diff t.end -= diff if t.start == t.end: del self.tokens[i] continue else: assert t.start < t.end i += 1 def _region_removed_complex(self, p1, p2, s): self.invalidate_regions() # calculate the start and end offsets of the change, and the # difference to the length of the whole data string diff = len(s) start_offset = self.mode.window.buffer.get_point_offset(p1) end_offset = start_offset + diff (i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 1, -diff) #(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 2, -diff) self.reparse_region(i, j) def region_removed(self, p1, p2, s): self._region_removed_complex(p1, p2, s) def get_tokens(self): if self.tokens is None: self.lex_buffer() return self.tokens def lex_buffer(self): '''lexes the buffer according to the grammar''' if not hasattr(self.mode, "grammar") or \ not hasattr(self.mode, "lexer") or \ self.mode.grammar is None or \ self.mode.lexer is None: self.tokens = [] return self.mode.lexer.lex(self.mode.window.buffer.make_string()) self.tokens = [] for token in self.mode.lexer: if token is not None: self.tokens.append(token) def get_regions(self): def endloop(line, pindex, plines): '''helper method for get_regions''' self.regions.append([]) o = offset + len(line) + 1 if (pindex < len(plines) and self.mode.window._physical_lines_cont[pindex]): # in this case we don't skip the newline o -= 1 p = pindex + 1 return o, p self.get_tokens() if self.regions is None: plines = self.mode.window.get_physical_lines() tindex = 0 # token index offset = 0 # string offset pindex = 0 # physical index self.regions = [[]] # looping over the physical lines while pindex < len(plines): last = 0 line = plines[pindex] # figure out if we have a current token, and if so, which one if tindex < len(self.tokens): t = self.tokens[tindex] else: t = None # if the current line doesn't contain a token, then # make a default color token for that line and # continue if type(t) == type(""): raise Exception, repr(t) if t is None or t.start >= offset + len(line): r = Region(0, len(line), self.mode.default_color, line, '', None) self.regions[-1].append(r) offset, pindex = endloop(line, pindex, plines) continue # looping over the tokens on a physical line while t is not None and t.start < offset + len(line): if t.start > offset + last: assert last <= t.start - offset, \ "iegjeigje (%d <= %d)" % (last, t.start - offset) # there is uncolored space before/between the token(s) r = Region(last, t.start - offset, self.mode.default_color, line[last:t.start - offset], '', None) self.regions[-1].append(r) last = t.start - offset color = self.mode.colors.get(t.name, self.mode.default_color) if t.debug: # this is useful for seeing which places get relexed #color = self.mode.colors.get('bizzaro', self.mode.default_color) pass # in the case of a multiline token, looping over # the lines it spans and incrementing as in the upper # loop... while t.end > offset + len(line): assert last <= len(line), \ "jjjjccccc (%d <= %d)" % (last, len(line)) r = Region(last, len(line), color, line[last:], t.name, t) self.regions[-1].append(r) last = 0 offset, pindex = endloop(line, pindex, plines) if pindex >= len(plines): # huh??? raise Exception, "fuck me" return self.regions else: line = plines[pindex] assert last <= t.end - offset, \ "bbjjgjg (%d <= %d - %d)" % (last, t.end, offset) r = Region(last, t.end - offset, color, line[last:t.end-offset], t.name, t) self.regions[-1].append(r) last = t.end - offset tindex += 1 if tindex < len(self.tokens): t = self.tokens[tindex] else: t = None last = self.regions[-1][-1][1] offset, pindex = endloop(line, pindex, plines) return self.regions class Region: index_to_attr = ['start', 'end', 'attr', 'value', 'name'] def __init__(self, start, end, attr, value, name, token=None): self.start = start self.end = end self.attr = attr self.value = value self.name = name self.token = token def __getitem__(self, i): return getattr(self, self.index_to_attr[i]) def __repr__(self): return '' % (self.start, self.end, self.attr, self.value, self.name)