parent
6334498f3d
commit
7f66ee2ea4
|
@ -304,10 +304,8 @@ class Highlighter:
|
|||
|
||||
def lex_buffer(self):
|
||||
'''lexes the buffer according to the grammar'''
|
||||
if not hasattr(self.mode, "grammar") or \
|
||||
not hasattr(self.mode, "lexer") or \
|
||||
self.mode.grammar is None or \
|
||||
self.mode.lexer is None:
|
||||
if (not hasattr(self.mode, "grammar") or self.mode.grammar is None or
|
||||
not hasattr(self.mode, "lexer") or self.mode.lexer is None):
|
||||
self.tokens = []
|
||||
return
|
||||
|
||||
|
|
|
@ -18,8 +18,9 @@ for i in range(0, len(color_list)):
|
|||
|
||||
class Highlighter:
|
||||
def __init__(self, lexer):
|
||||
self.lexer = lexer
|
||||
self.tokens = []
|
||||
self.lexer = lexer
|
||||
self.tokens = []
|
||||
self.line_contexts = {}
|
||||
|
||||
def dump(self, fmt='(%3s, %2s) | %s'):
|
||||
print fmt % ('y', 'x', 'string')
|
||||
|
@ -50,10 +51,11 @@ class Highlighter:
|
|||
sys.stdout.write('\n')
|
||||
|
||||
def highlight(self, lines):
|
||||
self.tokens = [[] for l in lines]
|
||||
self.tokens = [[] for l in lines]
|
||||
self.lexer.lex(lines, y=0, x=0)
|
||||
for token in self.lexer:
|
||||
self.tokens[token.y].append(token)
|
||||
self.line_contexts = dict(self.lexer.line_contexts)
|
||||
|
||||
# relexing
|
||||
# ======================
|
||||
|
@ -61,12 +63,12 @@ class Highlighter:
|
|||
# start the relexing process
|
||||
self.lexer.lex(lines, y1, 0)
|
||||
|
||||
# this keeps track of the current y coordinate, the current token index
|
||||
# these keep track of the current y coordinate, the current token index
|
||||
# on line[y], and the current "new token", respectively.
|
||||
y = y1
|
||||
i = 0
|
||||
getnext = True
|
||||
new_token = None
|
||||
y = y1
|
||||
i = 0
|
||||
getnext = True
|
||||
new_token = None
|
||||
|
||||
while True:
|
||||
# if we have overstepped our bounds, then exit!
|
||||
|
@ -77,7 +79,7 @@ class Highlighter:
|
|||
if getnext:
|
||||
try:
|
||||
new_token = self.lexer.next()
|
||||
getnext = False
|
||||
getnext = False
|
||||
except StopIteration:
|
||||
# ok, so this means that ALL the rest of the tokens didn't
|
||||
# show up, because we're done. so delete them and exit
|
||||
|
|
108
lex2.py
108
lex2.py
|
@ -3,6 +3,20 @@ import re
|
|||
valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
|
||||
reserved_names = ['start', 'middle', 'end', 'null']
|
||||
|
||||
class RuleContext:
|
||||
# to be clear:
|
||||
# x, y: where the rule processing began
|
||||
# rule: the rule which began
|
||||
# flag: a signal to be used to resume the rule correctly
|
||||
# context: the previous rule namespace(s)
|
||||
# matchd: the dictionary returned by the rule's matching
|
||||
def __init__(self, y, x, rule, flag, context, matchd):
|
||||
self.y = y
|
||||
self.x = x
|
||||
self.rule = rule
|
||||
self.context = context
|
||||
self.matchd = matchd
|
||||
|
||||
class Token(object):
|
||||
def __init__(self, name, rule, y, x, s, **vargs):
|
||||
self.name = name
|
||||
|
@ -45,11 +59,10 @@ class ConstantRule(Rule):
|
|||
assert name not in reserved_names, "reserved rule name: %r" % name
|
||||
self.name = name
|
||||
self.constant = constant
|
||||
def match(self, lexer, context=[], d={}, parent=None):
|
||||
def match(self, lexer, context=[], d={}):
|
||||
if lexer.lines[lexer.y][lexer.x:].startswith(self.constant):
|
||||
name = '.'.join(context + [self.name])
|
||||
lexer.add_token(self.make_token(lexer, self.constant, name,
|
||||
parent=parent))
|
||||
lexer.add_token(self.make_token(lexer, self.constant, name, grammar=lexer.grammar))
|
||||
lexer.x += len(self.constant)
|
||||
return True
|
||||
else:
|
||||
|
@ -62,12 +75,11 @@ class PatternRule(Rule):
|
|||
self.name = name
|
||||
self.pattern = pattern
|
||||
self.re = re.compile(pattern)
|
||||
def match(self, lexer, context=[], d={}, parent=None):
|
||||
def match(self, lexer, context=[], d={}):
|
||||
m = self.re.match(lexer.lines[lexer.y], lexer.x)
|
||||
if m:
|
||||
name = '.'.join(context + [self.name])
|
||||
lexer.add_token(self.make_token(lexer, m.group(0), name,
|
||||
parent=parent))
|
||||
lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
|
||||
lexer.x += len(m.group(0))
|
||||
return True
|
||||
else:
|
||||
|
@ -77,11 +89,11 @@ class ContextPatternRule(Rule):
|
|||
def __init__(self, name, pattern, fallback):
|
||||
assert valid_name_re.match(name), 'invalid name %r' % name
|
||||
assert name not in reserved_names, "reserved rule name: %r" % name
|
||||
self.name = name
|
||||
self.pattern = pattern
|
||||
self.fallback = fallback
|
||||
self.name = name
|
||||
self.pattern = pattern
|
||||
self.fallback = fallback
|
||||
self.fallback_re = re.compile(fallback)
|
||||
def match(self, lexer, context=[], d={}, parent=None):
|
||||
def match(self, lexer, context=[], d={}):
|
||||
try:
|
||||
r = re.compile(self.pattern % d)
|
||||
except KeyError:
|
||||
|
@ -89,8 +101,7 @@ class ContextPatternRule(Rule):
|
|||
m = r.match(lexer.lines[lexer.y], lexer.x)
|
||||
if m:
|
||||
name = '.'.join(context + [self.name])
|
||||
lexer.add_token(self.make_token(lexer, m.group(0), name,
|
||||
parent=parent))
|
||||
lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
|
||||
lexer.x += len(m.group(0))
|
||||
return True
|
||||
else:
|
||||
|
@ -105,18 +116,23 @@ class RegionRule(Rule):
|
|||
self.grammar = grammar
|
||||
self.end = end
|
||||
self.start_re = re.compile(start)
|
||||
def _add_from_regex(self, context, name, lexer, m, parent=None):
|
||||
def _add_from_regex(self, context, name, lexer, m, grammar):
|
||||
t_name = '.'.join(context + [self.name, name])
|
||||
t = self.make_token(lexer, m.group(0), t_name, parent=parent)
|
||||
t = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
|
||||
lexer.add_token(t)
|
||||
lexer.x += len(m.group(0))
|
||||
def restart(self, lexer, rulecontext):
|
||||
pass
|
||||
def match(self, lexer, context=[], d={}):
|
||||
m = self.start_re.match(lexer.lines[lexer.y], lexer.x)
|
||||
# see if we can match out start token
|
||||
# see if we can match our start token
|
||||
if m:
|
||||
|
||||
# ok, so create our start token, and get ready to start reading data
|
||||
d = m.groupdict()
|
||||
self._add_from_regex(context, 'start', lexer, m)
|
||||
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
|
||||
list(context), dict(d)))
|
||||
self._add_from_regex(context, 'start', lexer, m, lexer.grammar)
|
||||
null_t_name = '.'.join(context + [self.name, 'null'])
|
||||
null_t = None
|
||||
|
||||
|
@ -131,7 +147,7 @@ class RegionRule(Rule):
|
|||
done = False
|
||||
while not done and lexer.y < len(lexer.lines):
|
||||
old_y = lexer.y
|
||||
# if this line is empty, then we will skip it, but here weinsert
|
||||
# if this line is empty, then we skip it, but here we insert
|
||||
# an empty null token just so we have something
|
||||
if len(lexer.lines[lexer.y]) == 0:
|
||||
null_t = Token(null_t_name, None, lexer.y, lexer.x, '')
|
||||
|
@ -146,7 +162,7 @@ class RegionRule(Rule):
|
|||
if self.end:
|
||||
m = end_re.match(lexer.lines[lexer.y], lexer.x)
|
||||
if m:
|
||||
self._add_from_regex(context, 'end', lexer, m)
|
||||
self._add_from_regex(context, 'end', lexer, m, None)
|
||||
done = True
|
||||
break
|
||||
|
||||
|
@ -155,7 +171,7 @@ class RegionRule(Rule):
|
|||
found = False
|
||||
for rule in self.grammar.rules:
|
||||
if rule.match(lexer, context + [self.name], d):
|
||||
found = True
|
||||
found = True
|
||||
null_t = None
|
||||
break
|
||||
|
||||
|
@ -171,17 +187,19 @@ class RegionRule(Rule):
|
|||
|
||||
# ok, since we're soon going to be on a different line (or
|
||||
# already are), we want a new null token. so forget about the
|
||||
# current one.
|
||||
# current one (i.e. stop adding to it).
|
||||
null_t = None
|
||||
|
||||
# if we're still on the same line at this point (and not done)
|
||||
# then that means we're finished with the line and should move
|
||||
# on to the next one here
|
||||
if not done and old_y == lexer.y:
|
||||
lexer.save_context()
|
||||
lexer.y += 1
|
||||
lexer.x = 0
|
||||
|
||||
# alright, we're finally done procesing the region, so return true
|
||||
lexer.context.pop(-1)
|
||||
return True
|
||||
else:
|
||||
# region was not matched; we never started. so return false
|
||||
|
@ -198,9 +216,9 @@ class DualRegionRule(Rule):
|
|||
self.grammar2 = grammar2
|
||||
self.end = end
|
||||
self.start_re = re.compile(start)
|
||||
def _add_from_regex(self, context, name, lexer, m, parent=None):
|
||||
def _add_from_regex(self, context, name, lexer, m, grammar=None):
|
||||
t_name = '.'.join(context + [self.name, name])
|
||||
t = self.make_token(lexer, m.group(0), t_name, parent=parent)
|
||||
t = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
|
||||
lexer.add_token(t)
|
||||
lexer.x += len(m.group(0))
|
||||
def match(self, lexer, context=[], d={}):
|
||||
|
@ -208,11 +226,13 @@ class DualRegionRule(Rule):
|
|||
# see if we can match out start token
|
||||
if m1:
|
||||
# ok, so create our start token, and get ready to start reading data
|
||||
self._add_from_regex(context, 'start', lexer, m1)
|
||||
self._add_from_regex(context, 'start', lexer, m1, lexer.grammar)
|
||||
null_t_name = '.'.join(context + [self.name, 'null'])
|
||||
null_t = None
|
||||
|
||||
d1 = m1.groupdict()
|
||||
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
|
||||
list(context), dict(d1)))
|
||||
d2 = {}
|
||||
|
||||
middle_re = re.compile(self.middle % d1)
|
||||
|
@ -237,7 +257,7 @@ class DualRegionRule(Rule):
|
|||
m2 = middle_re.match(lexer.lines[lexer.y], lexer.x)
|
||||
if m2:
|
||||
d2 = m2.groupdict()
|
||||
self._add_from_regex(context, 'middle', lexer, m2)
|
||||
self._add_from_regex(context, 'middle', lexer, m2, None)
|
||||
done = True
|
||||
break
|
||||
|
||||
|
@ -269,6 +289,7 @@ class DualRegionRule(Rule):
|
|||
# then that means we're finished with the line and should move
|
||||
# on to the next one here
|
||||
if not done and old_y == lexer.y:
|
||||
lexer.save_context()
|
||||
lexer.y += 1
|
||||
lexer.x = 0
|
||||
|
||||
|
@ -276,6 +297,9 @@ class DualRegionRule(Rule):
|
|||
# instead of middle tokens
|
||||
d3 = dict(d1.items() + d2.items())
|
||||
end_re = re.compile(self.end % d3)
|
||||
lexer.context.pop(-1)
|
||||
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'middle',
|
||||
list(context), dict(d3)))
|
||||
|
||||
# ok, so as long as we aren't done (we haven't found an end token),
|
||||
# keep reading input
|
||||
|
@ -296,7 +320,7 @@ class DualRegionRule(Rule):
|
|||
# proceed to "stage 2"
|
||||
m3 = end_re.match(lexer.lines[lexer.y], lexer.x)
|
||||
if m3:
|
||||
self._add_from_regex(context, 'end', lexer, m3)
|
||||
self._add_from_regex(context, 'end', lexer, m3, None)
|
||||
done = True
|
||||
break
|
||||
|
||||
|
@ -328,10 +352,12 @@ class DualRegionRule(Rule):
|
|||
# then that means we're finished with the line and should move
|
||||
# on to the next one here
|
||||
if not done and old_y == lexer.y:
|
||||
lexer.save_context()
|
||||
lexer.y += 1
|
||||
lexer.x = 0
|
||||
|
||||
# alright, we're finally done procesing the dual region; return true
|
||||
# alright, we're finally done processing; return true
|
||||
lexer.context.pop(-1)
|
||||
return True
|
||||
else:
|
||||
# dual region was not matched; we never started. so return false
|
||||
|
@ -346,27 +372,36 @@ class Grammar:
|
|||
|
||||
class Lexer:
|
||||
def __init__(self, name, grammar):
|
||||
self.name = name
|
||||
self.grammar = grammar
|
||||
self.y = 0
|
||||
self.x = 0
|
||||
self.lines = None
|
||||
self.tokens = []
|
||||
self.name = name
|
||||
self.grammar = grammar
|
||||
self.y = 0
|
||||
self.x = 0
|
||||
self.lines = None
|
||||
self.tokens = []
|
||||
|
||||
self.context = []
|
||||
self.line_contexts = {}
|
||||
|
||||
def add_token(self, t):
|
||||
self.tokens.append(t)
|
||||
|
||||
def lex(self, lines, y=0, x=0):
|
||||
self.y = y
|
||||
self.x = x
|
||||
self.lines = lines
|
||||
self.y = y
|
||||
self.x = x
|
||||
self.lines = lines
|
||||
self.tokens = []
|
||||
|
||||
self.context = []
|
||||
self.line_contexts = {}
|
||||
|
||||
def __iter__(self):
|
||||
if self.lines is None:
|
||||
raise Exception, "no lines to lex"
|
||||
return self
|
||||
|
||||
def save_context(self):
|
||||
self.line_contexts[self.y] = list(self.context)
|
||||
|
||||
def next(self):
|
||||
null_t_name = 'null'
|
||||
null_t = None
|
||||
|
@ -377,13 +412,14 @@ class Lexer:
|
|||
curr_t = None
|
||||
for rule in self.grammar.rules:
|
||||
if rule.match(self):
|
||||
assert self.tokens, "AAAAA %s" % repr(self.tokens)
|
||||
assert self.tokens, "match rendered no tokens?"
|
||||
return self.tokens.pop(0)
|
||||
if null_t is None:
|
||||
null_t = Token(null_t_name, None, self.y, self.x, '')
|
||||
self.add_token(null_t)
|
||||
null_t.add_to_string(line[self.x])
|
||||
self.x += 1
|
||||
self.save_context()
|
||||
null_t = None
|
||||
self.y += 1
|
||||
self.x = 0
|
||||
|
|
|
@ -25,10 +25,11 @@ class Python(mode2.Fundamental):
|
|||
self.default_color = color.build('default', 'default')
|
||||
|
||||
self.colors = {
|
||||
'keyword': color.build('cyan', 'default'),
|
||||
'builtin_method': color.build('cyan', 'default'),
|
||||
'methodname': color.build('blue', 'default'),
|
||||
'classname': color.build('green', 'default'),
|
||||
'keyword': color.build('cyan', 'default'),
|
||||
'reserved': color.build('cyan', 'default'),
|
||||
'builtin_method': color.build('cyan', 'default'),
|
||||
'methodname': color.build('blue', 'default'),
|
||||
'classname': color.build('green', 'default'),
|
||||
|
||||
'string.start': color.build('green', 'default'),
|
||||
'string.null': color.build('green', 'default'),
|
||||
|
@ -37,9 +38,12 @@ class Python(mode2.Fundamental):
|
|||
'string.format': color.build('yellow', 'default'),
|
||||
'string.end': color.build('green', 'default'),
|
||||
|
||||
'integer': color.build('red', 'default'),
|
||||
'float': color.build('red', 'default'),
|
||||
'imaginary': color.build('red', 'default'),
|
||||
#'integer': color.build('red', 'default'),
|
||||
#'float': color.build('red', 'default'),
|
||||
#'imaginary': color.build('red', 'default'),
|
||||
'integer': color.build('default', 'default'),
|
||||
'float': color.build('default', 'default'),
|
||||
'imaginary': color.build('default', 'default'),
|
||||
|
||||
'tq_string.start': color.build('green', 'default'),
|
||||
'tq_string.null': color.build('green', 'default'),
|
||||
|
|
Loading…
Reference in New Issue