pmacs3/highlight.py

429 lines
16 KiB
Python

import point
# to be clear:
# tokens are generated by the lexer from the buffer, and correspond to lexical
# information about a logical portion of the buffer.
# regions are derived from a combination of the lexical tokens (which correspond
# to the logical buffer) and the physical line endings (i.e. dependent on screen
# width, etc.)
class Highlighter:
'''class used by modes to manage syntax highlighting'''
def __init__(self, m):
self.mode = m
self.tokens = None
self.regions = None
def invalidate_tokens(self):
self.tokens = None
self.invalidate_regions()
def invalidate_regions(self):
self.regions = None
def invalidate_token_range(self, start_offset, end_offset, m, n, diff):
# fix all the tokens to update their offsets, and clean out
# a token which spans the change
offset = start_offset
i = 0
last_index_before = None
first_index_after = None
while i < len(self.tokens):
t = self.tokens[i]
t.debug = False
if t.end <= start_offset:
last_index_before = i
i += 1
elif t.start >= end_offset:
if first_index_after is None:
first_index_after = i
t.start += diff
t.end += diff
i += 1
else:
if offset == start_offset:
offset = self.tokens[i].start
del self.tokens[i]
# delete m tokens further forward
for i in range(0, m):
if first_index_after is None:
break
elif first_index_after > len(self.tokens):
del self.tokens[first_index_after]
elif first_index_after == len(self.tokens):
del self.tokens[first_index_after]
first_index_after = None
# delete n tokens further back
for i in range(0, n):
if last_index_before is None:
break
elif last_index_before > 0:
del self.tokens[last_index_before]
last_index_before -= 1
elif last_index_before == 0:
del self.tokens[0]
last_index_before = None
break
return (last_index_before, first_index_after)
def reparse_region(self, last_index_before, first_index_after):
i = last_index_before
if i is None:
i = 0
tokens_before = False
start_offset = 0
else:
tokens_before = True
start_offset = self.tokens[i].start
j = first_index_after
if j is None or j >= len(self.tokens):
j = -1
tokens_after = False
end_offset = None
else:
tokens_after = True
end_offset = self.tokens[j].end
# FIXME
# new things the strategy should do include:
# 1. not generating the huge "data" string
# 2. really generating the "roll-back" with
# data not just by rolling back the index
# of the lexer
# 3. pass in only as much data as you need
# to do the minimal check, and for the
# "after the change" checking, use append
# to strategically keep the string 1-2
# tokens ahead of where it needs to be
#data = self.mode.window.buffer.make_string()
#self.mode.lexer.lex(data, start_offset)
if self.tokens:
buf_index = max(self.tokens[i].start - 100, 0)
else:
buf_index = 0
if end_offset is None:
data = self.mode.window.buffer.make_string(start=buf_index, end=None)
else:
data = self.mode.window.buffer.make_string(start=buf_index,
end=end_offset + 100)
self.mode.lexer.lex(data, start_offset - buf_index, buf_index)
saved_t = False
while True:
if saved_t is True:
# we want to retry t agagin
saved_t = False
else:
try:
t = self.mode.lexer.next()
if t is None:
continue
except:
# we have no more tokens, so delete whatever was left and
# then return
if i < len(self.tokens):
del self.tokens[i:]
self.mode.lexer.lex()
return
if i >= len(self.tokens):
# we don't have any old tokens this far out, so just keep
t.debug = True
self.tokens.append(t)
i += 1
elif t.end <= self.tokens[i].start:
# we shouldn't get here if we are before the change
assert not tokens_before
# the token is before our tokens, so we can just add it
t.debug = True
self.tokens.insert(i, t)
i += 1
elif t.start == self.tokens[i].start and \
t.end == self.tokens[i].end and \
t.name == self.tokens[i].name:
# the token is identical to ours, so we can either
# stop if we are after the change, or confirm the
# start point if we are before
if tokens_before:
tokens_before = False
i += 1
else:
self.tokens[i].debug = True
self.mode.lexer.lex()
return
else:
if i < len(self.tokens):
del self.tokens[i]
if tokens_before and i < 0:
raise Exception, "oh no!"
# we need to keep sliding our window back
i -= 1
start_offset = self.tokens[i].start
self.mode.lexer.lex(data, start_offset)
elif tokens_before:
# ok, now we aren't sliding our window back
# and can proceed normally
tokens_before = False
saved_t = True
else:
# the new token conflicts with the old one, so delete
# the old one and try again
saved_t = True
raise Exception, "we should never get here (dolphin 2)"
def _region_changed_slow(self):
self.invalidate_tokens()
self.get_regions()
return
def _region_added_dumb(self, p, xdiff, ydiff, s):
self.invalidate_regions()
# calculate the start and end offsets of the change, and the
# difference to the length of the whole data string
start_offset = self.mode.window.buffer.get_point_offset(p)
end_offset = start_offset
assert (xdiff > 0 and ydiff >= 0) or ydiff > 0
if ydiff > 0:
p2 = point.Point(p.x + xdiff, p.y + ydiff)
elif ydiff == 0:
p2 = point.Point(p.x + xdiff, p.y)
new_offset = self.mode.window.buffer.get_point_offset(p2)
diff = new_offset - start_offset
assert diff > 0
# move the tokens start and end points so that the additions
# (while not being correct) won't break the existing
# highlighting
for t in self.tokens:
t.debug = False
if t.end <= start_offset:
pass
elif t.start >= end_offset:
t.start += diff
t.end += diff
else:
t.end += diff
def _region_added_complex(self, p, xdiff, ydiff, s):
self.invalidate_regions()
# calculate the start and end offsets of the change, and the
# difference to the length of the whole data string
start_offset = self.mode.window.buffer.get_point_offset(p)
end_offset = start_offset
assert ydiff >= 0
if ydiff > 0:
p2 = point.Point(p.x + xdiff, p.y + ydiff)
elif ydiff == 0:
p2 = point.Point(p.x + xdiff, p.y)
new_offset = self.mode.window.buffer.get_point_offset(p2)
diff = new_offset - start_offset
(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 1, diff)
#(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 2, diff)
self.reparse_region(i, j)
def region_added(self, p, xdiff, ydiff, s):
if s == ' ' or s == ' ':
self._region_added_dumb(p, xdiff, ydiff, s)
else:
self._region_added_complex(p, xdiff, ydiff, s)
def _region_removed_dumb(self, p1, p2, s):
self.invalidate_regions()
# calculate the start and end offsets of the change, and the
# difference to the length of the whole data string
#diff = r
diff = len(s)
start_offset = self.mode.window.buffer.get_point_offset(p1)
end_offset = start_offset + diff
# move the tokens start and end points so that the additions
# (while not being correct) won't break the existing
# highlighting
i = 0
while i < len(self.tokens):
t = self.tokens[i]
t.debug = False
# if our token contains a trailing newline, certain
# deletions may not match unless we pretend that the end
# is one character earlier
if t.string.endswith('\n'):
t_end = t.end - 1
else:
t_end = t.end
if t_end <= start_offset:
pass
elif t.start >= start_offset and t_end <= end_offset:
del self.tokens[i]
continue
elif t_end >= start_offset and t_end <= end_offset:
t.end = start_offset
elif t.start >= start_offset and t.start <= end_offset:
t.start = end_offset
else:
t.start -= diff
t.end -= diff
if t.start == t.end:
del self.tokens[i]
continue
else:
assert t.start < t.end
i += 1
def _region_removed_complex(self, p1, p2, s):
self.invalidate_regions()
# calculate the start and end offsets of the change, and the
# difference to the length of the whole data string
diff = len(s)
start_offset = self.mode.window.buffer.get_point_offset(p1)
end_offset = start_offset + diff
(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 1, -diff)
#(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 2, -diff)
self.reparse_region(i, j)
def region_removed(self, p1, p2, s):
self._region_removed_complex(p1, p2, s)
def get_tokens(self):
if self.tokens is None:
self.lex_buffer()
return self.tokens
def lex_buffer(self):
'''lexes the buffer according to the grammar'''
if (not hasattr(self.mode, "grammar") or self.mode.grammar is None or
not hasattr(self.mode, "lexer") or self.mode.lexer is None):
self.tokens = []
return
self.mode.lexer.lex(self.mode.window.buffer.make_string())
self.tokens = []
for token in self.mode.lexer:
if token is not None:
self.tokens.append(token)
def get_regions(self):
def endloop(line, pindex, plines):
'''helper method for get_regions'''
self.regions.append([])
o = offset + len(line) + 1
if (pindex < len(plines) and
self.mode.window._physical_lines_cont[pindex]):
# in this case we don't skip the newline
o -= 1
p = pindex + 1
return o, p
self.get_tokens()
if self.regions is None:
plines = self.mode.window.get_physical_lines()
tindex = 0 # token index
offset = 0 # string offset
pindex = 0 # physical index
self.regions = [[]]
# looping over the physical lines
while pindex < len(plines):
last = 0
line = plines[pindex]
# figure out if we have a current token, and if so, which one
if tindex < len(self.tokens):
t = self.tokens[tindex]
else:
t = None
# if the current line doesn't contain a token, then
# make a default color token for that line and
# continue
if type(t) == type(""):
raise Exception, repr(t)
if t is None or t.start >= offset + len(line):
r = Region(0, len(line), self.mode.default_color, line, '', None)
self.regions[-1].append(r)
offset, pindex = endloop(line, pindex, plines)
continue
# looping over the tokens on a physical line
while t is not None and t.start < offset + len(line):
if t.start > offset + last:
assert last <= t.start - offset, \
"iegjeigje (%d <= %d)" % (last, t.start - offset)
# there is uncolored space before/between the token(s)
r = Region(last, t.start - offset,
self.mode.default_color,
line[last:t.start - offset], '', None)
self.regions[-1].append(r)
last = t.start - offset
color = self.mode.colors.get(t.name, self.mode.default_color)
if t.debug:
# this is useful for seeing which places get relexed
#color = self.mode.colors.get('bizzaro', self.mode.default_color)
pass
# in the case of a multiline token, looping over
# the lines it spans and incrementing as in the upper
# loop...
while t.end > offset + len(line):
assert last <= len(line), \
"jjjjccccc (%d <= %d)" % (last, len(line))
r = Region(last, len(line), color, line[last:], t.name, t)
self.regions[-1].append(r)
last = 0
offset, pindex = endloop(line, pindex, plines)
if pindex >= len(plines):
# huh???
raise Exception, "fuck me"
return self.regions
else:
line = plines[pindex]
assert last <= t.end - offset, \
"bbjjgjg (%d <= %d - %d)" % (last, t.end, offset)
r = Region(last, t.end - offset, color, line[last:t.end-offset], t.name, t)
self.regions[-1].append(r)
last = t.end - offset
tindex += 1
if tindex < len(self.tokens):
t = self.tokens[tindex]
else:
t = None
last = self.regions[-1][-1][1]
offset, pindex = endloop(line, pindex, plines)
return self.regions
class Region:
index_to_attr = ['start', 'end', 'attr', 'value', 'name']
def __init__(self, start, end, attr, value, name, token=None):
self.start = start
self.end = end
self.attr = attr
self.value = value
self.name = name
self.token = token
def __getitem__(self, i):
return getattr(self, self.index_to_attr[i])
def __repr__(self):
return '<Region: %r, %r, %r, %r, %r>' % (self.start, self.end, self.attr,
self.value, self.name)