branch : pmacs2
This commit is contained in:
moculus 2007-06-06 00:01:05 +00:00
parent 6334498f3d
commit 7f66ee2ea4
4 changed files with 96 additions and 56 deletions

View File

@ -304,10 +304,8 @@ class Highlighter:
def lex_buffer(self):
'''lexes the buffer according to the grammar'''
if not hasattr(self.mode, "grammar") or \
not hasattr(self.mode, "lexer") or \
self.mode.grammar is None or \
self.mode.lexer is None:
if (not hasattr(self.mode, "grammar") or self.mode.grammar is None or
not hasattr(self.mode, "lexer") or self.mode.lexer is None):
self.tokens = []
return

View File

@ -20,6 +20,7 @@ class Highlighter:
def __init__(self, lexer):
self.lexer = lexer
self.tokens = []
self.line_contexts = {}
def dump(self, fmt='(%3s, %2s) | %s'):
print fmt % ('y', 'x', 'string')
@ -54,6 +55,7 @@ class Highlighter:
self.lexer.lex(lines, y=0, x=0)
for token in self.lexer:
self.tokens[token.y].append(token)
self.line_contexts = dict(self.lexer.line_contexts)
# relexing
# ======================
@ -61,7 +63,7 @@ class Highlighter:
# start the relexing process
self.lexer.lex(lines, y1, 0)
# this keeps track of the current y coordinate, the current token index
# these keep track of the current y coordinate, the current token index
# on line[y], and the current "new token", respectively.
y = y1
i = 0

82
lex2.py
View File

@ -3,6 +3,20 @@ import re
valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
reserved_names = ['start', 'middle', 'end', 'null']
class RuleContext:
# to be clear:
# x, y: where the rule processing began
# rule: the rule which began
# flag: a signal to be used to resume the rule correctly
# context: the previous rule namespace(s)
# matchd: the dictionary returned by the rule's matching
def __init__(self, y, x, rule, flag, context, matchd):
self.y = y
self.x = x
self.rule = rule
self.context = context
self.matchd = matchd
class Token(object):
def __init__(self, name, rule, y, x, s, **vargs):
self.name = name
@ -45,11 +59,10 @@ class ConstantRule(Rule):
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
self.constant = constant
def match(self, lexer, context=[], d={}, parent=None):
def match(self, lexer, context=[], d={}):
if lexer.lines[lexer.y][lexer.x:].startswith(self.constant):
name = '.'.join(context + [self.name])
lexer.add_token(self.make_token(lexer, self.constant, name,
parent=parent))
lexer.add_token(self.make_token(lexer, self.constant, name, grammar=lexer.grammar))
lexer.x += len(self.constant)
return True
else:
@ -62,12 +75,11 @@ class PatternRule(Rule):
self.name = name
self.pattern = pattern
self.re = re.compile(pattern)
def match(self, lexer, context=[], d={}, parent=None):
def match(self, lexer, context=[], d={}):
m = self.re.match(lexer.lines[lexer.y], lexer.x)
if m:
name = '.'.join(context + [self.name])
lexer.add_token(self.make_token(lexer, m.group(0), name,
parent=parent))
lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
lexer.x += len(m.group(0))
return True
else:
@ -81,7 +93,7 @@ class ContextPatternRule(Rule):
self.pattern = pattern
self.fallback = fallback
self.fallback_re = re.compile(fallback)
def match(self, lexer, context=[], d={}, parent=None):
def match(self, lexer, context=[], d={}):
try:
r = re.compile(self.pattern % d)
except KeyError:
@ -89,8 +101,7 @@ class ContextPatternRule(Rule):
m = r.match(lexer.lines[lexer.y], lexer.x)
if m:
name = '.'.join(context + [self.name])
lexer.add_token(self.make_token(lexer, m.group(0), name,
parent=parent))
lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
lexer.x += len(m.group(0))
return True
else:
@ -105,18 +116,23 @@ class RegionRule(Rule):
self.grammar = grammar
self.end = end
self.start_re = re.compile(start)
def _add_from_regex(self, context, name, lexer, m, parent=None):
def _add_from_regex(self, context, name, lexer, m, grammar):
t_name = '.'.join(context + [self.name, name])
t = self.make_token(lexer, m.group(0), t_name, parent=parent)
t = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
lexer.add_token(t)
lexer.x += len(m.group(0))
def restart(self, lexer, rulecontext):
pass
def match(self, lexer, context=[], d={}):
m = self.start_re.match(lexer.lines[lexer.y], lexer.x)
# see if we can match out start token
# see if we can match our start token
if m:
# ok, so create our start token, and get ready to start reading data
d = m.groupdict()
self._add_from_regex(context, 'start', lexer, m)
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
list(context), dict(d)))
self._add_from_regex(context, 'start', lexer, m, lexer.grammar)
null_t_name = '.'.join(context + [self.name, 'null'])
null_t = None
@ -131,7 +147,7 @@ class RegionRule(Rule):
done = False
while not done and lexer.y < len(lexer.lines):
old_y = lexer.y
# if this line is empty, then we will skip it, but here weinsert
# if this line is empty, then we skip it, but here we insert
# an empty null token just so we have something
if len(lexer.lines[lexer.y]) == 0:
null_t = Token(null_t_name, None, lexer.y, lexer.x, '')
@ -146,7 +162,7 @@ class RegionRule(Rule):
if self.end:
m = end_re.match(lexer.lines[lexer.y], lexer.x)
if m:
self._add_from_regex(context, 'end', lexer, m)
self._add_from_regex(context, 'end', lexer, m, None)
done = True
break
@ -171,17 +187,19 @@ class RegionRule(Rule):
# ok, since we're soon going to be on a different line (or
# already are), we want a new null token. so forget about the
# current one.
# current one (i.e. stop adding to it).
null_t = None
# if we're still on the same line at this point (and not done)
# then that means we're finished with the line and should move
# on to the next one here
if not done and old_y == lexer.y:
lexer.save_context()
lexer.y += 1
lexer.x = 0
# alright, we're finally done procesing the region, so return true
lexer.context.pop(-1)
return True
else:
# region was not matched; we never started. so return false
@ -198,9 +216,9 @@ class DualRegionRule(Rule):
self.grammar2 = grammar2
self.end = end
self.start_re = re.compile(start)
def _add_from_regex(self, context, name, lexer, m, parent=None):
def _add_from_regex(self, context, name, lexer, m, grammar=None):
t_name = '.'.join(context + [self.name, name])
t = self.make_token(lexer, m.group(0), t_name, parent=parent)
t = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
lexer.add_token(t)
lexer.x += len(m.group(0))
def match(self, lexer, context=[], d={}):
@ -208,11 +226,13 @@ class DualRegionRule(Rule):
# see if we can match out start token
if m1:
# ok, so create our start token, and get ready to start reading data
self._add_from_regex(context, 'start', lexer, m1)
self._add_from_regex(context, 'start', lexer, m1, lexer.grammar)
null_t_name = '.'.join(context + [self.name, 'null'])
null_t = None
d1 = m1.groupdict()
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
list(context), dict(d1)))
d2 = {}
middle_re = re.compile(self.middle % d1)
@ -237,7 +257,7 @@ class DualRegionRule(Rule):
m2 = middle_re.match(lexer.lines[lexer.y], lexer.x)
if m2:
d2 = m2.groupdict()
self._add_from_regex(context, 'middle', lexer, m2)
self._add_from_regex(context, 'middle', lexer, m2, None)
done = True
break
@ -269,6 +289,7 @@ class DualRegionRule(Rule):
# then that means we're finished with the line and should move
# on to the next one here
if not done and old_y == lexer.y:
lexer.save_context()
lexer.y += 1
lexer.x = 0
@ -276,6 +297,9 @@ class DualRegionRule(Rule):
# instead of middle tokens
d3 = dict(d1.items() + d2.items())
end_re = re.compile(self.end % d3)
lexer.context.pop(-1)
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'middle',
list(context), dict(d3)))
# ok, so as long as we aren't done (we haven't found an end token),
# keep reading input
@ -296,7 +320,7 @@ class DualRegionRule(Rule):
# proceed to "stage 2"
m3 = end_re.match(lexer.lines[lexer.y], lexer.x)
if m3:
self._add_from_regex(context, 'end', lexer, m3)
self._add_from_regex(context, 'end', lexer, m3, None)
done = True
break
@ -328,10 +352,12 @@ class DualRegionRule(Rule):
# then that means we're finished with the line and should move
# on to the next one here
if not done and old_y == lexer.y:
lexer.save_context()
lexer.y += 1
lexer.x = 0
# alright, we're finally done procesing the dual region; return true
# alright, we're finally done processing; return true
lexer.context.pop(-1)
return True
else:
# dual region was not matched; we never started. so return false
@ -353,6 +379,9 @@ class Lexer:
self.lines = None
self.tokens = []
self.context = []
self.line_contexts = {}
def add_token(self, t):
self.tokens.append(t)
@ -362,11 +391,17 @@ class Lexer:
self.lines = lines
self.tokens = []
self.context = []
self.line_contexts = {}
def __iter__(self):
if self.lines is None:
raise Exception, "no lines to lex"
return self
def save_context(self):
self.line_contexts[self.y] = list(self.context)
def next(self):
null_t_name = 'null'
null_t = None
@ -377,13 +412,14 @@ class Lexer:
curr_t = None
for rule in self.grammar.rules:
if rule.match(self):
assert self.tokens, "AAAAA %s" % repr(self.tokens)
assert self.tokens, "match rendered no tokens?"
return self.tokens.pop(0)
if null_t is None:
null_t = Token(null_t_name, None, self.y, self.x, '')
self.add_token(null_t)
null_t.add_to_string(line[self.x])
self.x += 1
self.save_context()
null_t = None
self.y += 1
self.x = 0

View File

@ -26,6 +26,7 @@ class Python(mode2.Fundamental):
self.colors = {
'keyword': color.build('cyan', 'default'),
'reserved': color.build('cyan', 'default'),
'builtin_method': color.build('cyan', 'default'),
'methodname': color.build('blue', 'default'),
'classname': color.build('green', 'default'),
@ -37,9 +38,12 @@ class Python(mode2.Fundamental):
'string.format': color.build('yellow', 'default'),
'string.end': color.build('green', 'default'),
'integer': color.build('red', 'default'),
'float': color.build('red', 'default'),
'imaginary': color.build('red', 'default'),
#'integer': color.build('red', 'default'),
#'float': color.build('red', 'default'),
#'imaginary': color.build('red', 'default'),
'integer': color.build('default', 'default'),
'float': color.build('default', 'default'),
'imaginary': color.build('default', 'default'),
'tq_string.start': color.build('green', 'default'),
'tq_string.null': color.build('green', 'default'),