branch : pmacs2
This commit is contained in:
moculus 2007-06-06 00:01:05 +00:00
parent 6334498f3d
commit 7f66ee2ea4
4 changed files with 96 additions and 56 deletions

View File

@ -304,10 +304,8 @@ class Highlighter:
def lex_buffer(self):
'''lexes the buffer according to the grammar'''
if not hasattr(self.mode, "grammar") or \
not hasattr(self.mode, "lexer") or \
self.mode.grammar is None or \
self.mode.lexer is None:
if (not hasattr(self.mode, "grammar") or self.mode.grammar is None or
not hasattr(self.mode, "lexer") or self.mode.lexer is None):
self.tokens = []
return

View File

@ -18,8 +18,9 @@ for i in range(0, len(color_list)):
class Highlighter:
def __init__(self, lexer):
self.lexer = lexer
self.tokens = []
self.lexer = lexer
self.tokens = []
self.line_contexts = {}
def dump(self, fmt='(%3s, %2s) | %s'):
print fmt % ('y', 'x', 'string')
@ -50,10 +51,11 @@ class Highlighter:
sys.stdout.write('\n')
def highlight(self, lines):
self.tokens = [[] for l in lines]
self.tokens = [[] for l in lines]
self.lexer.lex(lines, y=0, x=0)
for token in self.lexer:
self.tokens[token.y].append(token)
self.line_contexts = dict(self.lexer.line_contexts)
# relexing
# ======================
@ -61,12 +63,12 @@ class Highlighter:
# start the relexing process
self.lexer.lex(lines, y1, 0)
# this keeps track of the current y coordinate, the current token index
# these keep track of the current y coordinate, the current token index
# on line[y], and the current "new token", respectively.
y = y1
i = 0
getnext = True
new_token = None
y = y1
i = 0
getnext = True
new_token = None
while True:
# if we have overstepped our bounds, then exit!
@ -77,7 +79,7 @@ class Highlighter:
if getnext:
try:
new_token = self.lexer.next()
getnext = False
getnext = False
except StopIteration:
# ok, so this means that ALL the rest of the tokens didn't
# show up, because we're done. so delete them and exit

108
lex2.py
View File

@ -3,6 +3,20 @@ import re
valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
reserved_names = ['start', 'middle', 'end', 'null']
class RuleContext:
# to be clear:
# x, y: where the rule processing began
# rule: the rule which began
# flag: a signal to be used to resume the rule correctly
# context: the previous rule namespace(s)
# matchd: the dictionary returned by the rule's matching
def __init__(self, y, x, rule, flag, context, matchd):
self.y = y
self.x = x
self.rule = rule
self.context = context
self.matchd = matchd
class Token(object):
def __init__(self, name, rule, y, x, s, **vargs):
self.name = name
@ -45,11 +59,10 @@ class ConstantRule(Rule):
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
self.constant = constant
def match(self, lexer, context=[], d={}, parent=None):
def match(self, lexer, context=[], d={}):
if lexer.lines[lexer.y][lexer.x:].startswith(self.constant):
name = '.'.join(context + [self.name])
lexer.add_token(self.make_token(lexer, self.constant, name,
parent=parent))
lexer.add_token(self.make_token(lexer, self.constant, name, grammar=lexer.grammar))
lexer.x += len(self.constant)
return True
else:
@ -62,12 +75,11 @@ class PatternRule(Rule):
self.name = name
self.pattern = pattern
self.re = re.compile(pattern)
def match(self, lexer, context=[], d={}, parent=None):
def match(self, lexer, context=[], d={}):
m = self.re.match(lexer.lines[lexer.y], lexer.x)
if m:
name = '.'.join(context + [self.name])
lexer.add_token(self.make_token(lexer, m.group(0), name,
parent=parent))
lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
lexer.x += len(m.group(0))
return True
else:
@ -77,11 +89,11 @@ class ContextPatternRule(Rule):
def __init__(self, name, pattern, fallback):
assert valid_name_re.match(name), 'invalid name %r' % name
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
self.pattern = pattern
self.fallback = fallback
self.name = name
self.pattern = pattern
self.fallback = fallback
self.fallback_re = re.compile(fallback)
def match(self, lexer, context=[], d={}, parent=None):
def match(self, lexer, context=[], d={}):
try:
r = re.compile(self.pattern % d)
except KeyError:
@ -89,8 +101,7 @@ class ContextPatternRule(Rule):
m = r.match(lexer.lines[lexer.y], lexer.x)
if m:
name = '.'.join(context + [self.name])
lexer.add_token(self.make_token(lexer, m.group(0), name,
parent=parent))
lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
lexer.x += len(m.group(0))
return True
else:
@ -105,18 +116,23 @@ class RegionRule(Rule):
self.grammar = grammar
self.end = end
self.start_re = re.compile(start)
def _add_from_regex(self, context, name, lexer, m, parent=None):
def _add_from_regex(self, context, name, lexer, m, grammar):
t_name = '.'.join(context + [self.name, name])
t = self.make_token(lexer, m.group(0), t_name, parent=parent)
t = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
lexer.add_token(t)
lexer.x += len(m.group(0))
def restart(self, lexer, rulecontext):
pass
def match(self, lexer, context=[], d={}):
m = self.start_re.match(lexer.lines[lexer.y], lexer.x)
# see if we can match out start token
# see if we can match our start token
if m:
# ok, so create our start token, and get ready to start reading data
d = m.groupdict()
self._add_from_regex(context, 'start', lexer, m)
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
list(context), dict(d)))
self._add_from_regex(context, 'start', lexer, m, lexer.grammar)
null_t_name = '.'.join(context + [self.name, 'null'])
null_t = None
@ -131,7 +147,7 @@ class RegionRule(Rule):
done = False
while not done and lexer.y < len(lexer.lines):
old_y = lexer.y
# if this line is empty, then we will skip it, but here weinsert
# if this line is empty, then we skip it, but here we insert
# an empty null token just so we have something
if len(lexer.lines[lexer.y]) == 0:
null_t = Token(null_t_name, None, lexer.y, lexer.x, '')
@ -146,7 +162,7 @@ class RegionRule(Rule):
if self.end:
m = end_re.match(lexer.lines[lexer.y], lexer.x)
if m:
self._add_from_regex(context, 'end', lexer, m)
self._add_from_regex(context, 'end', lexer, m, None)
done = True
break
@ -155,7 +171,7 @@ class RegionRule(Rule):
found = False
for rule in self.grammar.rules:
if rule.match(lexer, context + [self.name], d):
found = True
found = True
null_t = None
break
@ -171,17 +187,19 @@ class RegionRule(Rule):
# ok, since we're soon going to be on a different line (or
# already are), we want a new null token. so forget about the
# current one.
# current one (i.e. stop adding to it).
null_t = None
# if we're still on the same line at this point (and not done)
# then that means we're finished with the line and should move
# on to the next one here
if not done and old_y == lexer.y:
lexer.save_context()
lexer.y += 1
lexer.x = 0
# alright, we're finally done procesing the region, so return true
lexer.context.pop(-1)
return True
else:
# region was not matched; we never started. so return false
@ -198,9 +216,9 @@ class DualRegionRule(Rule):
self.grammar2 = grammar2
self.end = end
self.start_re = re.compile(start)
def _add_from_regex(self, context, name, lexer, m, parent=None):
def _add_from_regex(self, context, name, lexer, m, grammar=None):
t_name = '.'.join(context + [self.name, name])
t = self.make_token(lexer, m.group(0), t_name, parent=parent)
t = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
lexer.add_token(t)
lexer.x += len(m.group(0))
def match(self, lexer, context=[], d={}):
@ -208,11 +226,13 @@ class DualRegionRule(Rule):
# see if we can match out start token
if m1:
# ok, so create our start token, and get ready to start reading data
self._add_from_regex(context, 'start', lexer, m1)
self._add_from_regex(context, 'start', lexer, m1, lexer.grammar)
null_t_name = '.'.join(context + [self.name, 'null'])
null_t = None
d1 = m1.groupdict()
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
list(context), dict(d1)))
d2 = {}
middle_re = re.compile(self.middle % d1)
@ -237,7 +257,7 @@ class DualRegionRule(Rule):
m2 = middle_re.match(lexer.lines[lexer.y], lexer.x)
if m2:
d2 = m2.groupdict()
self._add_from_regex(context, 'middle', lexer, m2)
self._add_from_regex(context, 'middle', lexer, m2, None)
done = True
break
@ -269,6 +289,7 @@ class DualRegionRule(Rule):
# then that means we're finished with the line and should move
# on to the next one here
if not done and old_y == lexer.y:
lexer.save_context()
lexer.y += 1
lexer.x = 0
@ -276,6 +297,9 @@ class DualRegionRule(Rule):
# instead of middle tokens
d3 = dict(d1.items() + d2.items())
end_re = re.compile(self.end % d3)
lexer.context.pop(-1)
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'middle',
list(context), dict(d3)))
# ok, so as long as we aren't done (we haven't found an end token),
# keep reading input
@ -296,7 +320,7 @@ class DualRegionRule(Rule):
# proceed to "stage 2"
m3 = end_re.match(lexer.lines[lexer.y], lexer.x)
if m3:
self._add_from_regex(context, 'end', lexer, m3)
self._add_from_regex(context, 'end', lexer, m3, None)
done = True
break
@ -328,10 +352,12 @@ class DualRegionRule(Rule):
# then that means we're finished with the line and should move
# on to the next one here
if not done and old_y == lexer.y:
lexer.save_context()
lexer.y += 1
lexer.x = 0
# alright, we're finally done procesing the dual region; return true
# alright, we're finally done processing; return true
lexer.context.pop(-1)
return True
else:
# dual region was not matched; we never started. so return false
@ -346,27 +372,36 @@ class Grammar:
class Lexer:
def __init__(self, name, grammar):
self.name = name
self.grammar = grammar
self.y = 0
self.x = 0
self.lines = None
self.tokens = []
self.name = name
self.grammar = grammar
self.y = 0
self.x = 0
self.lines = None
self.tokens = []
self.context = []
self.line_contexts = {}
def add_token(self, t):
self.tokens.append(t)
def lex(self, lines, y=0, x=0):
self.y = y
self.x = x
self.lines = lines
self.y = y
self.x = x
self.lines = lines
self.tokens = []
self.context = []
self.line_contexts = {}
def __iter__(self):
if self.lines is None:
raise Exception, "no lines to lex"
return self
def save_context(self):
self.line_contexts[self.y] = list(self.context)
def next(self):
null_t_name = 'null'
null_t = None
@ -377,13 +412,14 @@ class Lexer:
curr_t = None
for rule in self.grammar.rules:
if rule.match(self):
assert self.tokens, "AAAAA %s" % repr(self.tokens)
assert self.tokens, "match rendered no tokens?"
return self.tokens.pop(0)
if null_t is None:
null_t = Token(null_t_name, None, self.y, self.x, '')
self.add_token(null_t)
null_t.add_to_string(line[self.x])
self.x += 1
self.save_context()
null_t = None
self.y += 1
self.x = 0

View File

@ -25,10 +25,11 @@ class Python(mode2.Fundamental):
self.default_color = color.build('default', 'default')
self.colors = {
'keyword': color.build('cyan', 'default'),
'builtin_method': color.build('cyan', 'default'),
'methodname': color.build('blue', 'default'),
'classname': color.build('green', 'default'),
'keyword': color.build('cyan', 'default'),
'reserved': color.build('cyan', 'default'),
'builtin_method': color.build('cyan', 'default'),
'methodname': color.build('blue', 'default'),
'classname': color.build('green', 'default'),
'string.start': color.build('green', 'default'),
'string.null': color.build('green', 'default'),
@ -37,9 +38,12 @@ class Python(mode2.Fundamental):
'string.format': color.build('yellow', 'default'),
'string.end': color.build('green', 'default'),
'integer': color.build('red', 'default'),
'float': color.build('red', 'default'),
'imaginary': color.build('red', 'default'),
#'integer': color.build('red', 'default'),
#'float': color.build('red', 'default'),
#'imaginary': color.build('red', 'default'),
'integer': color.build('default', 'default'),
'float': color.build('default', 'default'),
'imaginary': color.build('default', 'default'),
'tq_string.start': color.build('green', 'default'),
'tq_string.null': color.build('green', 'default'),