2007-03-06 10:05:38 -05:00
|
|
|
import point
|
|
|
|
|
|
|
|
# to be clear:
|
|
|
|
|
|
|
|
# tokens are generated by the lexer from the buffer, and correspond to lexical
|
|
|
|
# information about a logical portion of the buffer.
|
|
|
|
# regions are derived from a combination of the lexical tokens (which correspond
|
|
|
|
# to the logical buffer) and the physical line endings (i.e. dependent on screen
|
|
|
|
# width, etc.)
|
|
|
|
class Highlighter:
|
|
|
|
'''class used by modes to manage syntax highlighting'''
|
|
|
|
def __init__(self, m):
|
|
|
|
self.mode = m
|
|
|
|
self.tokens = None
|
|
|
|
self.regions = None
|
|
|
|
|
|
|
|
def invalidate_tokens(self):
|
|
|
|
self.tokens = None
|
|
|
|
self.invalidate_regions()
|
|
|
|
|
|
|
|
def invalidate_regions(self):
|
|
|
|
self.regions = None
|
|
|
|
|
|
|
|
def invalidate_token_range(self, start_offset, end_offset, m, n, diff):
|
|
|
|
# fix all the tokens to update their offsets, and clean out
|
|
|
|
# a token which spans the change
|
|
|
|
offset = start_offset
|
|
|
|
i = 0
|
|
|
|
last_index_before = None
|
|
|
|
first_index_after = None
|
|
|
|
while i < len(self.tokens):
|
|
|
|
t = self.tokens[i]
|
|
|
|
t.debug = False
|
|
|
|
if t.end <= start_offset:
|
|
|
|
last_index_before = i
|
|
|
|
i += 1
|
|
|
|
elif t.start >= end_offset:
|
|
|
|
if first_index_after is None:
|
|
|
|
first_index_after = i
|
|
|
|
t.start += diff
|
|
|
|
t.end += diff
|
|
|
|
i += 1
|
|
|
|
else:
|
|
|
|
if offset == start_offset:
|
|
|
|
offset = self.tokens[i].start
|
|
|
|
del self.tokens[i]
|
|
|
|
|
|
|
|
# delete m tokens further forward
|
|
|
|
for i in range(0, m):
|
|
|
|
if first_index_after is None:
|
|
|
|
break
|
|
|
|
elif first_index_after > len(self.tokens):
|
|
|
|
del self.tokens[first_index_after]
|
|
|
|
elif first_index_after == len(self.tokens):
|
|
|
|
del self.tokens[first_index_after]
|
|
|
|
first_index_after = None
|
|
|
|
|
|
|
|
# delete n tokens further back
|
|
|
|
for i in range(0, n):
|
|
|
|
if last_index_before is None:
|
|
|
|
break
|
|
|
|
elif last_index_before > 0:
|
|
|
|
del self.tokens[last_index_before]
|
|
|
|
last_index_before -= 1
|
|
|
|
elif last_index_before == 0:
|
|
|
|
del self.tokens[0]
|
|
|
|
last_index_before = None
|
|
|
|
break
|
|
|
|
|
|
|
|
return (last_index_before, first_index_after)
|
|
|
|
|
|
|
|
def reparse_region(self, last_index_before, first_index_after):
|
|
|
|
i = last_index_before
|
|
|
|
if i is None:
|
|
|
|
i = 0
|
|
|
|
tokens_before = False
|
|
|
|
start_offset = 0
|
|
|
|
else:
|
|
|
|
tokens_before = True
|
|
|
|
start_offset = self.tokens[i].start
|
|
|
|
|
|
|
|
j = first_index_after
|
|
|
|
if j is None or j >= len(self.tokens):
|
|
|
|
j = -1
|
|
|
|
tokens_after = False
|
|
|
|
end_offset = None
|
|
|
|
else:
|
|
|
|
tokens_after = True
|
|
|
|
end_offset = self.tokens[j].end
|
|
|
|
|
|
|
|
# FIXME
|
|
|
|
# new things the strategy should do include:
|
|
|
|
# 1. not generating the huge "data" string
|
|
|
|
# 2. really generating the "roll-back" with
|
|
|
|
# data not just by rolling back the index
|
|
|
|
# of the lexer
|
|
|
|
# 3. pass in only as much data as you need
|
|
|
|
# to do the minimal check, and for the
|
|
|
|
# "after the change" checking, use append
|
|
|
|
# to strategically keep the string 1-2
|
|
|
|
# tokens ahead of where it needs to be
|
|
|
|
#data = self.mode.window.buffer.make_string()
|
|
|
|
#self.mode.lexer.lex(data, start_offset)
|
2007-05-06 00:32:20 -04:00
|
|
|
if self.tokens:
|
2007-03-06 10:05:38 -05:00
|
|
|
buf_index = max(self.tokens[i].start - 100, 0)
|
|
|
|
else:
|
|
|
|
buf_index = 0
|
|
|
|
if end_offset is None:
|
|
|
|
data = self.mode.window.buffer.make_string(start=buf_index, end=None)
|
|
|
|
else:
|
|
|
|
data = self.mode.window.buffer.make_string(start=buf_index,
|
|
|
|
end=end_offset + 100)
|
|
|
|
self.mode.lexer.lex(data, start_offset - buf_index, buf_index)
|
|
|
|
saved_t = False
|
|
|
|
|
|
|
|
while True:
|
|
|
|
if saved_t is True:
|
|
|
|
# we want to retry t agagin
|
|
|
|
saved_t = False
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
t = self.mode.lexer.next()
|
|
|
|
if t is None:
|
|
|
|
continue
|
|
|
|
except:
|
|
|
|
# we have no more tokens, so delete whatever was left and
|
|
|
|
# then return
|
|
|
|
if i < len(self.tokens):
|
|
|
|
del self.tokens[i:]
|
|
|
|
self.mode.lexer.lex()
|
|
|
|
return
|
|
|
|
|
|
|
|
if i >= len(self.tokens):
|
|
|
|
# we don't have any old tokens this far out, so just keep
|
|
|
|
t.debug = True
|
|
|
|
self.tokens.append(t)
|
|
|
|
i += 1
|
|
|
|
elif t.end <= self.tokens[i].start:
|
|
|
|
# we shouldn't get here if we are before the change
|
|
|
|
assert not tokens_before
|
|
|
|
# the token is before our tokens, so we can just add it
|
|
|
|
t.debug = True
|
|
|
|
self.tokens.insert(i, t)
|
|
|
|
i += 1
|
|
|
|
elif t.start == self.tokens[i].start and \
|
|
|
|
t.end == self.tokens[i].end and \
|
|
|
|
t.name == self.tokens[i].name:
|
|
|
|
# the token is identical to ours, so we can either
|
|
|
|
# stop if we are after the change, or confirm the
|
|
|
|
# start point if we are before
|
|
|
|
if tokens_before:
|
|
|
|
tokens_before = False
|
|
|
|
i += 1
|
|
|
|
else:
|
|
|
|
self.tokens[i].debug = True
|
|
|
|
self.mode.lexer.lex()
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
if i < len(self.tokens):
|
|
|
|
del self.tokens[i]
|
|
|
|
if tokens_before and i < 0:
|
|
|
|
raise Exception, "oh no!"
|
|
|
|
# we need to keep sliding our window back
|
|
|
|
i -= 1
|
|
|
|
start_offset = self.tokens[i].start
|
|
|
|
self.mode.lexer.lex(data, start_offset)
|
|
|
|
elif tokens_before:
|
|
|
|
# ok, now we aren't sliding our window back
|
|
|
|
# and can proceed normally
|
|
|
|
tokens_before = False
|
|
|
|
saved_t = True
|
|
|
|
else:
|
|
|
|
# the new token conflicts with the old one, so delete
|
|
|
|
# the old one and try again
|
|
|
|
saved_t = True
|
|
|
|
|
|
|
|
raise Exception, "we should never get here (dolphin 2)"
|
|
|
|
|
|
|
|
def _region_changed_slow(self):
|
|
|
|
self.invalidate_tokens()
|
|
|
|
self.get_regions()
|
|
|
|
return
|
|
|
|
|
|
|
|
def _region_added_dumb(self, p, xdiff, ydiff, s):
|
|
|
|
self.invalidate_regions()
|
|
|
|
|
|
|
|
# calculate the start and end offsets of the change, and the
|
|
|
|
# difference to the length of the whole data string
|
|
|
|
start_offset = self.mode.window.buffer.get_point_offset(p)
|
|
|
|
end_offset = start_offset
|
|
|
|
assert (xdiff > 0 and ydiff >= 0) or ydiff > 0
|
|
|
|
|
|
|
|
if ydiff > 0:
|
|
|
|
p2 = point.Point(p.x + xdiff, p.y + ydiff)
|
|
|
|
elif ydiff == 0:
|
|
|
|
p2 = point.Point(p.x + xdiff, p.y)
|
|
|
|
new_offset = self.mode.window.buffer.get_point_offset(p2)
|
|
|
|
diff = new_offset - start_offset
|
|
|
|
assert diff > 0
|
|
|
|
|
|
|
|
# move the tokens start and end points so that the additions
|
|
|
|
# (while not being correct) won't break the existing
|
|
|
|
# highlighting
|
|
|
|
for t in self.tokens:
|
|
|
|
t.debug = False
|
|
|
|
if t.end <= start_offset:
|
|
|
|
pass
|
|
|
|
elif t.start >= end_offset:
|
|
|
|
t.start += diff
|
|
|
|
t.end += diff
|
|
|
|
else:
|
|
|
|
t.end += diff
|
|
|
|
|
|
|
|
def _region_added_complex(self, p, xdiff, ydiff, s):
|
|
|
|
self.invalidate_regions()
|
|
|
|
|
|
|
|
# calculate the start and end offsets of the change, and the
|
|
|
|
# difference to the length of the whole data string
|
|
|
|
start_offset = self.mode.window.buffer.get_point_offset(p)
|
|
|
|
end_offset = start_offset
|
|
|
|
assert ydiff >= 0
|
|
|
|
if ydiff > 0:
|
|
|
|
p2 = point.Point(p.x + xdiff, p.y + ydiff)
|
|
|
|
elif ydiff == 0:
|
|
|
|
p2 = point.Point(p.x + xdiff, p.y)
|
|
|
|
new_offset = self.mode.window.buffer.get_point_offset(p2)
|
|
|
|
diff = new_offset - start_offset
|
|
|
|
|
|
|
|
(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 1, diff)
|
|
|
|
#(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 2, diff)
|
|
|
|
self.reparse_region(i, j)
|
|
|
|
|
|
|
|
def region_added(self, p, xdiff, ydiff, s):
|
|
|
|
if s == ' ' or s == ' ':
|
|
|
|
self._region_added_dumb(p, xdiff, ydiff, s)
|
|
|
|
else:
|
|
|
|
self._region_added_complex(p, xdiff, ydiff, s)
|
|
|
|
|
|
|
|
def _region_removed_dumb(self, p1, p2, s):
|
|
|
|
self.invalidate_regions()
|
|
|
|
|
|
|
|
# calculate the start and end offsets of the change, and the
|
|
|
|
# difference to the length of the whole data string
|
|
|
|
#diff = r
|
|
|
|
diff = len(s)
|
|
|
|
start_offset = self.mode.window.buffer.get_point_offset(p1)
|
|
|
|
end_offset = start_offset + diff
|
|
|
|
|
|
|
|
# move the tokens start and end points so that the additions
|
|
|
|
# (while not being correct) won't break the existing
|
|
|
|
# highlighting
|
|
|
|
i = 0
|
|
|
|
while i < len(self.tokens):
|
|
|
|
t = self.tokens[i]
|
|
|
|
t.debug = False
|
|
|
|
|
|
|
|
# if our token contains a trailing newline, certain
|
|
|
|
# deletions may not match unless we pretend that the end
|
|
|
|
# is one character earlier
|
|
|
|
if t.string.endswith('\n'):
|
|
|
|
t_end = t.end - 1
|
|
|
|
else:
|
|
|
|
t_end = t.end
|
|
|
|
|
|
|
|
if t_end <= start_offset:
|
|
|
|
pass
|
|
|
|
elif t.start >= start_offset and t_end <= end_offset:
|
|
|
|
del self.tokens[i]
|
|
|
|
continue
|
|
|
|
elif t_end >= start_offset and t_end <= end_offset:
|
|
|
|
t.end = start_offset
|
|
|
|
elif t.start >= start_offset and t.start <= end_offset:
|
|
|
|
t.start = end_offset
|
|
|
|
else:
|
|
|
|
t.start -= diff
|
|
|
|
t.end -= diff
|
|
|
|
if t.start == t.end:
|
|
|
|
del self.tokens[i]
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
assert t.start < t.end
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
def _region_removed_complex(self, p1, p2, s):
|
|
|
|
self.invalidate_regions()
|
|
|
|
|
|
|
|
# calculate the start and end offsets of the change, and the
|
|
|
|
# difference to the length of the whole data string
|
|
|
|
diff = len(s)
|
|
|
|
start_offset = self.mode.window.buffer.get_point_offset(p1)
|
|
|
|
end_offset = start_offset + diff
|
|
|
|
|
|
|
|
(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 1, -diff)
|
|
|
|
#(i, j) = self.invalidate_token_range(start_offset, end_offset, 1, 2, -diff)
|
|
|
|
self.reparse_region(i, j)
|
|
|
|
|
|
|
|
def region_removed(self, p1, p2, s):
|
|
|
|
self._region_removed_complex(p1, p2, s)
|
|
|
|
|
|
|
|
def get_tokens(self):
|
|
|
|
if self.tokens is None:
|
|
|
|
self.lex_buffer()
|
|
|
|
return self.tokens
|
|
|
|
|
|
|
|
def lex_buffer(self):
|
|
|
|
'''lexes the buffer according to the grammar'''
|
2007-06-05 20:01:05 -04:00
|
|
|
if (not hasattr(self.mode, "grammar") or self.mode.grammar is None or
|
|
|
|
not hasattr(self.mode, "lexer") or self.mode.lexer is None):
|
2007-03-06 10:05:38 -05:00
|
|
|
self.tokens = []
|
|
|
|
return
|
|
|
|
|
|
|
|
self.mode.lexer.lex(self.mode.window.buffer.make_string())
|
|
|
|
|
|
|
|
self.tokens = []
|
|
|
|
for token in self.mode.lexer:
|
|
|
|
if token is not None:
|
|
|
|
self.tokens.append(token)
|
|
|
|
|
|
|
|
def get_regions(self):
|
|
|
|
def endloop(line, pindex, plines):
|
|
|
|
'''helper method for get_regions'''
|
|
|
|
self.regions.append([])
|
|
|
|
o = offset + len(line) + 1
|
|
|
|
if (pindex < len(plines) and
|
|
|
|
self.mode.window._physical_lines_cont[pindex]):
|
|
|
|
# in this case we don't skip the newline
|
|
|
|
o -= 1
|
|
|
|
p = pindex + 1
|
|
|
|
return o, p
|
|
|
|
|
|
|
|
self.get_tokens()
|
|
|
|
|
|
|
|
if self.regions is None:
|
|
|
|
plines = self.mode.window.get_physical_lines()
|
|
|
|
|
|
|
|
tindex = 0 # token index
|
|
|
|
offset = 0 # string offset
|
|
|
|
pindex = 0 # physical index
|
|
|
|
|
|
|
|
self.regions = [[]]
|
|
|
|
|
|
|
|
# looping over the physical lines
|
|
|
|
while pindex < len(plines):
|
|
|
|
last = 0
|
|
|
|
line = plines[pindex]
|
|
|
|
|
|
|
|
# figure out if we have a current token, and if so, which one
|
|
|
|
if tindex < len(self.tokens):
|
|
|
|
t = self.tokens[tindex]
|
|
|
|
else:
|
|
|
|
t = None
|
|
|
|
|
|
|
|
# if the current line doesn't contain a token, then
|
|
|
|
# make a default color token for that line and
|
|
|
|
# continue
|
|
|
|
if type(t) == type(""):
|
|
|
|
raise Exception, repr(t)
|
|
|
|
if t is None or t.start >= offset + len(line):
|
|
|
|
r = Region(0, len(line), self.mode.default_color, line, '', None)
|
|
|
|
self.regions[-1].append(r)
|
|
|
|
offset, pindex = endloop(line, pindex, plines)
|
|
|
|
continue
|
|
|
|
|
|
|
|
# looping over the tokens on a physical line
|
|
|
|
while t is not None and t.start < offset + len(line):
|
|
|
|
if t.start > offset + last:
|
|
|
|
assert last <= t.start - offset, \
|
|
|
|
"iegjeigje (%d <= %d)" % (last, t.start - offset)
|
|
|
|
# there is uncolored space before/between the token(s)
|
|
|
|
r = Region(last, t.start - offset,
|
|
|
|
self.mode.default_color,
|
|
|
|
line[last:t.start - offset], '', None)
|
|
|
|
self.regions[-1].append(r)
|
|
|
|
last = t.start - offset
|
|
|
|
|
|
|
|
color = self.mode.colors.get(t.name, self.mode.default_color)
|
|
|
|
if t.debug:
|
|
|
|
# this is useful for seeing which places get relexed
|
|
|
|
#color = self.mode.colors.get('bizzaro', self.mode.default_color)
|
|
|
|
pass
|
|
|
|
|
|
|
|
# in the case of a multiline token, looping over
|
|
|
|
# the lines it spans and incrementing as in the upper
|
|
|
|
# loop...
|
|
|
|
while t.end > offset + len(line):
|
|
|
|
assert last <= len(line), \
|
|
|
|
"jjjjccccc (%d <= %d)" % (last, len(line))
|
|
|
|
r = Region(last, len(line), color, line[last:], t.name, t)
|
|
|
|
self.regions[-1].append(r)
|
|
|
|
last = 0
|
|
|
|
offset, pindex = endloop(line, pindex, plines)
|
|
|
|
if pindex >= len(plines):
|
|
|
|
# huh???
|
|
|
|
raise Exception, "fuck me"
|
|
|
|
return self.regions
|
|
|
|
else:
|
|
|
|
line = plines[pindex]
|
|
|
|
|
|
|
|
assert last <= t.end - offset, \
|
|
|
|
"bbjjgjg (%d <= %d - %d)" % (last, t.end, offset)
|
|
|
|
r = Region(last, t.end - offset, color, line[last:t.end-offset], t.name, t)
|
|
|
|
self.regions[-1].append(r)
|
|
|
|
last = t.end - offset
|
|
|
|
|
|
|
|
tindex += 1
|
|
|
|
if tindex < len(self.tokens):
|
|
|
|
t = self.tokens[tindex]
|
|
|
|
else:
|
|
|
|
t = None
|
|
|
|
|
|
|
|
last = self.regions[-1][-1][1]
|
|
|
|
offset, pindex = endloop(line, pindex, plines)
|
|
|
|
|
|
|
|
return self.regions
|
|
|
|
|
|
|
|
class Region:
|
|
|
|
index_to_attr = ['start', 'end', 'attr', 'value', 'name']
|
|
|
|
def __init__(self, start, end, attr, value, name, token=None):
|
|
|
|
self.start = start
|
|
|
|
self.end = end
|
|
|
|
self.attr = attr
|
|
|
|
self.value = value
|
|
|
|
self.name = name
|
|
|
|
self.token = token
|
|
|
|
def __getitem__(self, i):
|
|
|
|
return getattr(self, self.index_to_attr[i])
|
|
|
|
def __repr__(self):
|
|
|
|
return '<Region: %r, %r, %r, %r, %r>' % (self.start, self.end, self.attr,
|
|
|
|
self.value, self.name)
|