parent
6334498f3d
commit
7f66ee2ea4
|
@ -304,10 +304,8 @@ class Highlighter:
|
||||||
|
|
||||||
def lex_buffer(self):
|
def lex_buffer(self):
|
||||||
'''lexes the buffer according to the grammar'''
|
'''lexes the buffer according to the grammar'''
|
||||||
if not hasattr(self.mode, "grammar") or \
|
if (not hasattr(self.mode, "grammar") or self.mode.grammar is None or
|
||||||
not hasattr(self.mode, "lexer") or \
|
not hasattr(self.mode, "lexer") or self.mode.lexer is None):
|
||||||
self.mode.grammar is None or \
|
|
||||||
self.mode.lexer is None:
|
|
||||||
self.tokens = []
|
self.tokens = []
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
@ -18,8 +18,9 @@ for i in range(0, len(color_list)):
|
||||||
|
|
||||||
class Highlighter:
|
class Highlighter:
|
||||||
def __init__(self, lexer):
|
def __init__(self, lexer):
|
||||||
self.lexer = lexer
|
self.lexer = lexer
|
||||||
self.tokens = []
|
self.tokens = []
|
||||||
|
self.line_contexts = {}
|
||||||
|
|
||||||
def dump(self, fmt='(%3s, %2s) | %s'):
|
def dump(self, fmt='(%3s, %2s) | %s'):
|
||||||
print fmt % ('y', 'x', 'string')
|
print fmt % ('y', 'x', 'string')
|
||||||
|
@ -50,10 +51,11 @@ class Highlighter:
|
||||||
sys.stdout.write('\n')
|
sys.stdout.write('\n')
|
||||||
|
|
||||||
def highlight(self, lines):
|
def highlight(self, lines):
|
||||||
self.tokens = [[] for l in lines]
|
self.tokens = [[] for l in lines]
|
||||||
self.lexer.lex(lines, y=0, x=0)
|
self.lexer.lex(lines, y=0, x=0)
|
||||||
for token in self.lexer:
|
for token in self.lexer:
|
||||||
self.tokens[token.y].append(token)
|
self.tokens[token.y].append(token)
|
||||||
|
self.line_contexts = dict(self.lexer.line_contexts)
|
||||||
|
|
||||||
# relexing
|
# relexing
|
||||||
# ======================
|
# ======================
|
||||||
|
@ -61,12 +63,12 @@ class Highlighter:
|
||||||
# start the relexing process
|
# start the relexing process
|
||||||
self.lexer.lex(lines, y1, 0)
|
self.lexer.lex(lines, y1, 0)
|
||||||
|
|
||||||
# this keeps track of the current y coordinate, the current token index
|
# these keep track of the current y coordinate, the current token index
|
||||||
# on line[y], and the current "new token", respectively.
|
# on line[y], and the current "new token", respectively.
|
||||||
y = y1
|
y = y1
|
||||||
i = 0
|
i = 0
|
||||||
getnext = True
|
getnext = True
|
||||||
new_token = None
|
new_token = None
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# if we have overstepped our bounds, then exit!
|
# if we have overstepped our bounds, then exit!
|
||||||
|
@ -77,7 +79,7 @@ class Highlighter:
|
||||||
if getnext:
|
if getnext:
|
||||||
try:
|
try:
|
||||||
new_token = self.lexer.next()
|
new_token = self.lexer.next()
|
||||||
getnext = False
|
getnext = False
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
# ok, so this means that ALL the rest of the tokens didn't
|
# ok, so this means that ALL the rest of the tokens didn't
|
||||||
# show up, because we're done. so delete them and exit
|
# show up, because we're done. so delete them and exit
|
||||||
|
|
108
lex2.py
108
lex2.py
|
@ -3,6 +3,20 @@ import re
|
||||||
valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
|
valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
|
||||||
reserved_names = ['start', 'middle', 'end', 'null']
|
reserved_names = ['start', 'middle', 'end', 'null']
|
||||||
|
|
||||||
|
class RuleContext:
|
||||||
|
# to be clear:
|
||||||
|
# x, y: where the rule processing began
|
||||||
|
# rule: the rule which began
|
||||||
|
# flag: a signal to be used to resume the rule correctly
|
||||||
|
# context: the previous rule namespace(s)
|
||||||
|
# matchd: the dictionary returned by the rule's matching
|
||||||
|
def __init__(self, y, x, rule, flag, context, matchd):
|
||||||
|
self.y = y
|
||||||
|
self.x = x
|
||||||
|
self.rule = rule
|
||||||
|
self.context = context
|
||||||
|
self.matchd = matchd
|
||||||
|
|
||||||
class Token(object):
|
class Token(object):
|
||||||
def __init__(self, name, rule, y, x, s, **vargs):
|
def __init__(self, name, rule, y, x, s, **vargs):
|
||||||
self.name = name
|
self.name = name
|
||||||
|
@ -45,11 +59,10 @@ class ConstantRule(Rule):
|
||||||
assert name not in reserved_names, "reserved rule name: %r" % name
|
assert name not in reserved_names, "reserved rule name: %r" % name
|
||||||
self.name = name
|
self.name = name
|
||||||
self.constant = constant
|
self.constant = constant
|
||||||
def match(self, lexer, context=[], d={}, parent=None):
|
def match(self, lexer, context=[], d={}):
|
||||||
if lexer.lines[lexer.y][lexer.x:].startswith(self.constant):
|
if lexer.lines[lexer.y][lexer.x:].startswith(self.constant):
|
||||||
name = '.'.join(context + [self.name])
|
name = '.'.join(context + [self.name])
|
||||||
lexer.add_token(self.make_token(lexer, self.constant, name,
|
lexer.add_token(self.make_token(lexer, self.constant, name, grammar=lexer.grammar))
|
||||||
parent=parent))
|
|
||||||
lexer.x += len(self.constant)
|
lexer.x += len(self.constant)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
|
@ -62,12 +75,11 @@ class PatternRule(Rule):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.pattern = pattern
|
self.pattern = pattern
|
||||||
self.re = re.compile(pattern)
|
self.re = re.compile(pattern)
|
||||||
def match(self, lexer, context=[], d={}, parent=None):
|
def match(self, lexer, context=[], d={}):
|
||||||
m = self.re.match(lexer.lines[lexer.y], lexer.x)
|
m = self.re.match(lexer.lines[lexer.y], lexer.x)
|
||||||
if m:
|
if m:
|
||||||
name = '.'.join(context + [self.name])
|
name = '.'.join(context + [self.name])
|
||||||
lexer.add_token(self.make_token(lexer, m.group(0), name,
|
lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
|
||||||
parent=parent))
|
|
||||||
lexer.x += len(m.group(0))
|
lexer.x += len(m.group(0))
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
|
@ -77,11 +89,11 @@ class ContextPatternRule(Rule):
|
||||||
def __init__(self, name, pattern, fallback):
|
def __init__(self, name, pattern, fallback):
|
||||||
assert valid_name_re.match(name), 'invalid name %r' % name
|
assert valid_name_re.match(name), 'invalid name %r' % name
|
||||||
assert name not in reserved_names, "reserved rule name: %r" % name
|
assert name not in reserved_names, "reserved rule name: %r" % name
|
||||||
self.name = name
|
self.name = name
|
||||||
self.pattern = pattern
|
self.pattern = pattern
|
||||||
self.fallback = fallback
|
self.fallback = fallback
|
||||||
self.fallback_re = re.compile(fallback)
|
self.fallback_re = re.compile(fallback)
|
||||||
def match(self, lexer, context=[], d={}, parent=None):
|
def match(self, lexer, context=[], d={}):
|
||||||
try:
|
try:
|
||||||
r = re.compile(self.pattern % d)
|
r = re.compile(self.pattern % d)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
@ -89,8 +101,7 @@ class ContextPatternRule(Rule):
|
||||||
m = r.match(lexer.lines[lexer.y], lexer.x)
|
m = r.match(lexer.lines[lexer.y], lexer.x)
|
||||||
if m:
|
if m:
|
||||||
name = '.'.join(context + [self.name])
|
name = '.'.join(context + [self.name])
|
||||||
lexer.add_token(self.make_token(lexer, m.group(0), name,
|
lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
|
||||||
parent=parent))
|
|
||||||
lexer.x += len(m.group(0))
|
lexer.x += len(m.group(0))
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
|
@ -105,18 +116,23 @@ class RegionRule(Rule):
|
||||||
self.grammar = grammar
|
self.grammar = grammar
|
||||||
self.end = end
|
self.end = end
|
||||||
self.start_re = re.compile(start)
|
self.start_re = re.compile(start)
|
||||||
def _add_from_regex(self, context, name, lexer, m, parent=None):
|
def _add_from_regex(self, context, name, lexer, m, grammar):
|
||||||
t_name = '.'.join(context + [self.name, name])
|
t_name = '.'.join(context + [self.name, name])
|
||||||
t = self.make_token(lexer, m.group(0), t_name, parent=parent)
|
t = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
|
||||||
lexer.add_token(t)
|
lexer.add_token(t)
|
||||||
lexer.x += len(m.group(0))
|
lexer.x += len(m.group(0))
|
||||||
|
def restart(self, lexer, rulecontext):
|
||||||
|
pass
|
||||||
def match(self, lexer, context=[], d={}):
|
def match(self, lexer, context=[], d={}):
|
||||||
m = self.start_re.match(lexer.lines[lexer.y], lexer.x)
|
m = self.start_re.match(lexer.lines[lexer.y], lexer.x)
|
||||||
# see if we can match out start token
|
# see if we can match our start token
|
||||||
if m:
|
if m:
|
||||||
|
|
||||||
# ok, so create our start token, and get ready to start reading data
|
# ok, so create our start token, and get ready to start reading data
|
||||||
d = m.groupdict()
|
d = m.groupdict()
|
||||||
self._add_from_regex(context, 'start', lexer, m)
|
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
|
||||||
|
list(context), dict(d)))
|
||||||
|
self._add_from_regex(context, 'start', lexer, m, lexer.grammar)
|
||||||
null_t_name = '.'.join(context + [self.name, 'null'])
|
null_t_name = '.'.join(context + [self.name, 'null'])
|
||||||
null_t = None
|
null_t = None
|
||||||
|
|
||||||
|
@ -131,7 +147,7 @@ class RegionRule(Rule):
|
||||||
done = False
|
done = False
|
||||||
while not done and lexer.y < len(lexer.lines):
|
while not done and lexer.y < len(lexer.lines):
|
||||||
old_y = lexer.y
|
old_y = lexer.y
|
||||||
# if this line is empty, then we will skip it, but here weinsert
|
# if this line is empty, then we skip it, but here we insert
|
||||||
# an empty null token just so we have something
|
# an empty null token just so we have something
|
||||||
if len(lexer.lines[lexer.y]) == 0:
|
if len(lexer.lines[lexer.y]) == 0:
|
||||||
null_t = Token(null_t_name, None, lexer.y, lexer.x, '')
|
null_t = Token(null_t_name, None, lexer.y, lexer.x, '')
|
||||||
|
@ -146,7 +162,7 @@ class RegionRule(Rule):
|
||||||
if self.end:
|
if self.end:
|
||||||
m = end_re.match(lexer.lines[lexer.y], lexer.x)
|
m = end_re.match(lexer.lines[lexer.y], lexer.x)
|
||||||
if m:
|
if m:
|
||||||
self._add_from_regex(context, 'end', lexer, m)
|
self._add_from_regex(context, 'end', lexer, m, None)
|
||||||
done = True
|
done = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -155,7 +171,7 @@ class RegionRule(Rule):
|
||||||
found = False
|
found = False
|
||||||
for rule in self.grammar.rules:
|
for rule in self.grammar.rules:
|
||||||
if rule.match(lexer, context + [self.name], d):
|
if rule.match(lexer, context + [self.name], d):
|
||||||
found = True
|
found = True
|
||||||
null_t = None
|
null_t = None
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -171,17 +187,19 @@ class RegionRule(Rule):
|
||||||
|
|
||||||
# ok, since we're soon going to be on a different line (or
|
# ok, since we're soon going to be on a different line (or
|
||||||
# already are), we want a new null token. so forget about the
|
# already are), we want a new null token. so forget about the
|
||||||
# current one.
|
# current one (i.e. stop adding to it).
|
||||||
null_t = None
|
null_t = None
|
||||||
|
|
||||||
# if we're still on the same line at this point (and not done)
|
# if we're still on the same line at this point (and not done)
|
||||||
# then that means we're finished with the line and should move
|
# then that means we're finished with the line and should move
|
||||||
# on to the next one here
|
# on to the next one here
|
||||||
if not done and old_y == lexer.y:
|
if not done and old_y == lexer.y:
|
||||||
|
lexer.save_context()
|
||||||
lexer.y += 1
|
lexer.y += 1
|
||||||
lexer.x = 0
|
lexer.x = 0
|
||||||
|
|
||||||
# alright, we're finally done procesing the region, so return true
|
# alright, we're finally done procesing the region, so return true
|
||||||
|
lexer.context.pop(-1)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
# region was not matched; we never started. so return false
|
# region was not matched; we never started. so return false
|
||||||
|
@ -198,9 +216,9 @@ class DualRegionRule(Rule):
|
||||||
self.grammar2 = grammar2
|
self.grammar2 = grammar2
|
||||||
self.end = end
|
self.end = end
|
||||||
self.start_re = re.compile(start)
|
self.start_re = re.compile(start)
|
||||||
def _add_from_regex(self, context, name, lexer, m, parent=None):
|
def _add_from_regex(self, context, name, lexer, m, grammar=None):
|
||||||
t_name = '.'.join(context + [self.name, name])
|
t_name = '.'.join(context + [self.name, name])
|
||||||
t = self.make_token(lexer, m.group(0), t_name, parent=parent)
|
t = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
|
||||||
lexer.add_token(t)
|
lexer.add_token(t)
|
||||||
lexer.x += len(m.group(0))
|
lexer.x += len(m.group(0))
|
||||||
def match(self, lexer, context=[], d={}):
|
def match(self, lexer, context=[], d={}):
|
||||||
|
@ -208,11 +226,13 @@ class DualRegionRule(Rule):
|
||||||
# see if we can match out start token
|
# see if we can match out start token
|
||||||
if m1:
|
if m1:
|
||||||
# ok, so create our start token, and get ready to start reading data
|
# ok, so create our start token, and get ready to start reading data
|
||||||
self._add_from_regex(context, 'start', lexer, m1)
|
self._add_from_regex(context, 'start', lexer, m1, lexer.grammar)
|
||||||
null_t_name = '.'.join(context + [self.name, 'null'])
|
null_t_name = '.'.join(context + [self.name, 'null'])
|
||||||
null_t = None
|
null_t = None
|
||||||
|
|
||||||
d1 = m1.groupdict()
|
d1 = m1.groupdict()
|
||||||
|
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
|
||||||
|
list(context), dict(d1)))
|
||||||
d2 = {}
|
d2 = {}
|
||||||
|
|
||||||
middle_re = re.compile(self.middle % d1)
|
middle_re = re.compile(self.middle % d1)
|
||||||
|
@ -237,7 +257,7 @@ class DualRegionRule(Rule):
|
||||||
m2 = middle_re.match(lexer.lines[lexer.y], lexer.x)
|
m2 = middle_re.match(lexer.lines[lexer.y], lexer.x)
|
||||||
if m2:
|
if m2:
|
||||||
d2 = m2.groupdict()
|
d2 = m2.groupdict()
|
||||||
self._add_from_regex(context, 'middle', lexer, m2)
|
self._add_from_regex(context, 'middle', lexer, m2, None)
|
||||||
done = True
|
done = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -269,6 +289,7 @@ class DualRegionRule(Rule):
|
||||||
# then that means we're finished with the line and should move
|
# then that means we're finished with the line and should move
|
||||||
# on to the next one here
|
# on to the next one here
|
||||||
if not done and old_y == lexer.y:
|
if not done and old_y == lexer.y:
|
||||||
|
lexer.save_context()
|
||||||
lexer.y += 1
|
lexer.y += 1
|
||||||
lexer.x = 0
|
lexer.x = 0
|
||||||
|
|
||||||
|
@ -276,6 +297,9 @@ class DualRegionRule(Rule):
|
||||||
# instead of middle tokens
|
# instead of middle tokens
|
||||||
d3 = dict(d1.items() + d2.items())
|
d3 = dict(d1.items() + d2.items())
|
||||||
end_re = re.compile(self.end % d3)
|
end_re = re.compile(self.end % d3)
|
||||||
|
lexer.context.pop(-1)
|
||||||
|
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'middle',
|
||||||
|
list(context), dict(d3)))
|
||||||
|
|
||||||
# ok, so as long as we aren't done (we haven't found an end token),
|
# ok, so as long as we aren't done (we haven't found an end token),
|
||||||
# keep reading input
|
# keep reading input
|
||||||
|
@ -296,7 +320,7 @@ class DualRegionRule(Rule):
|
||||||
# proceed to "stage 2"
|
# proceed to "stage 2"
|
||||||
m3 = end_re.match(lexer.lines[lexer.y], lexer.x)
|
m3 = end_re.match(lexer.lines[lexer.y], lexer.x)
|
||||||
if m3:
|
if m3:
|
||||||
self._add_from_regex(context, 'end', lexer, m3)
|
self._add_from_regex(context, 'end', lexer, m3, None)
|
||||||
done = True
|
done = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -328,10 +352,12 @@ class DualRegionRule(Rule):
|
||||||
# then that means we're finished with the line and should move
|
# then that means we're finished with the line and should move
|
||||||
# on to the next one here
|
# on to the next one here
|
||||||
if not done and old_y == lexer.y:
|
if not done and old_y == lexer.y:
|
||||||
|
lexer.save_context()
|
||||||
lexer.y += 1
|
lexer.y += 1
|
||||||
lexer.x = 0
|
lexer.x = 0
|
||||||
|
|
||||||
# alright, we're finally done procesing the dual region; return true
|
# alright, we're finally done processing; return true
|
||||||
|
lexer.context.pop(-1)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
# dual region was not matched; we never started. so return false
|
# dual region was not matched; we never started. so return false
|
||||||
|
@ -346,27 +372,36 @@ class Grammar:
|
||||||
|
|
||||||
class Lexer:
|
class Lexer:
|
||||||
def __init__(self, name, grammar):
|
def __init__(self, name, grammar):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.grammar = grammar
|
self.grammar = grammar
|
||||||
self.y = 0
|
self.y = 0
|
||||||
self.x = 0
|
self.x = 0
|
||||||
self.lines = None
|
self.lines = None
|
||||||
self.tokens = []
|
self.tokens = []
|
||||||
|
|
||||||
|
self.context = []
|
||||||
|
self.line_contexts = {}
|
||||||
|
|
||||||
def add_token(self, t):
|
def add_token(self, t):
|
||||||
self.tokens.append(t)
|
self.tokens.append(t)
|
||||||
|
|
||||||
def lex(self, lines, y=0, x=0):
|
def lex(self, lines, y=0, x=0):
|
||||||
self.y = y
|
self.y = y
|
||||||
self.x = x
|
self.x = x
|
||||||
self.lines = lines
|
self.lines = lines
|
||||||
self.tokens = []
|
self.tokens = []
|
||||||
|
|
||||||
|
self.context = []
|
||||||
|
self.line_contexts = {}
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
if self.lines is None:
|
if self.lines is None:
|
||||||
raise Exception, "no lines to lex"
|
raise Exception, "no lines to lex"
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def save_context(self):
|
||||||
|
self.line_contexts[self.y] = list(self.context)
|
||||||
|
|
||||||
def next(self):
|
def next(self):
|
||||||
null_t_name = 'null'
|
null_t_name = 'null'
|
||||||
null_t = None
|
null_t = None
|
||||||
|
@ -377,13 +412,14 @@ class Lexer:
|
||||||
curr_t = None
|
curr_t = None
|
||||||
for rule in self.grammar.rules:
|
for rule in self.grammar.rules:
|
||||||
if rule.match(self):
|
if rule.match(self):
|
||||||
assert self.tokens, "AAAAA %s" % repr(self.tokens)
|
assert self.tokens, "match rendered no tokens?"
|
||||||
return self.tokens.pop(0)
|
return self.tokens.pop(0)
|
||||||
if null_t is None:
|
if null_t is None:
|
||||||
null_t = Token(null_t_name, None, self.y, self.x, '')
|
null_t = Token(null_t_name, None, self.y, self.x, '')
|
||||||
self.add_token(null_t)
|
self.add_token(null_t)
|
||||||
null_t.add_to_string(line[self.x])
|
null_t.add_to_string(line[self.x])
|
||||||
self.x += 1
|
self.x += 1
|
||||||
|
self.save_context()
|
||||||
null_t = None
|
null_t = None
|
||||||
self.y += 1
|
self.y += 1
|
||||||
self.x = 0
|
self.x = 0
|
||||||
|
|
|
@ -25,10 +25,11 @@ class Python(mode2.Fundamental):
|
||||||
self.default_color = color.build('default', 'default')
|
self.default_color = color.build('default', 'default')
|
||||||
|
|
||||||
self.colors = {
|
self.colors = {
|
||||||
'keyword': color.build('cyan', 'default'),
|
'keyword': color.build('cyan', 'default'),
|
||||||
'builtin_method': color.build('cyan', 'default'),
|
'reserved': color.build('cyan', 'default'),
|
||||||
'methodname': color.build('blue', 'default'),
|
'builtin_method': color.build('cyan', 'default'),
|
||||||
'classname': color.build('green', 'default'),
|
'methodname': color.build('blue', 'default'),
|
||||||
|
'classname': color.build('green', 'default'),
|
||||||
|
|
||||||
'string.start': color.build('green', 'default'),
|
'string.start': color.build('green', 'default'),
|
||||||
'string.null': color.build('green', 'default'),
|
'string.null': color.build('green', 'default'),
|
||||||
|
@ -37,9 +38,12 @@ class Python(mode2.Fundamental):
|
||||||
'string.format': color.build('yellow', 'default'),
|
'string.format': color.build('yellow', 'default'),
|
||||||
'string.end': color.build('green', 'default'),
|
'string.end': color.build('green', 'default'),
|
||||||
|
|
||||||
'integer': color.build('red', 'default'),
|
#'integer': color.build('red', 'default'),
|
||||||
'float': color.build('red', 'default'),
|
#'float': color.build('red', 'default'),
|
||||||
'imaginary': color.build('red', 'default'),
|
#'imaginary': color.build('red', 'default'),
|
||||||
|
'integer': color.build('default', 'default'),
|
||||||
|
'float': color.build('default', 'default'),
|
||||||
|
'imaginary': color.build('default', 'default'),
|
||||||
|
|
||||||
'tq_string.start': color.build('green', 'default'),
|
'tq_string.start': color.build('green', 'default'),
|
||||||
'tq_string.null': color.build('green', 'default'),
|
'tq_string.null': color.build('green', 'default'),
|
||||||
|
|
Loading…
Reference in New Issue