branch : pmacs2
This commit is contained in:
moculus 2007-06-06 00:01:05 +00:00
parent 6334498f3d
commit 7f66ee2ea4
4 changed files with 96 additions and 56 deletions

View File

@ -304,10 +304,8 @@ class Highlighter:
def lex_buffer(self): def lex_buffer(self):
'''lexes the buffer according to the grammar''' '''lexes the buffer according to the grammar'''
if not hasattr(self.mode, "grammar") or \ if (not hasattr(self.mode, "grammar") or self.mode.grammar is None or
not hasattr(self.mode, "lexer") or \ not hasattr(self.mode, "lexer") or self.mode.lexer is None):
self.mode.grammar is None or \
self.mode.lexer is None:
self.tokens = [] self.tokens = []
return return

View File

@ -18,8 +18,9 @@ for i in range(0, len(color_list)):
class Highlighter: class Highlighter:
def __init__(self, lexer): def __init__(self, lexer):
self.lexer = lexer self.lexer = lexer
self.tokens = [] self.tokens = []
self.line_contexts = {}
def dump(self, fmt='(%3s, %2s) | %s'): def dump(self, fmt='(%3s, %2s) | %s'):
print fmt % ('y', 'x', 'string') print fmt % ('y', 'x', 'string')
@ -50,10 +51,11 @@ class Highlighter:
sys.stdout.write('\n') sys.stdout.write('\n')
def highlight(self, lines): def highlight(self, lines):
self.tokens = [[] for l in lines] self.tokens = [[] for l in lines]
self.lexer.lex(lines, y=0, x=0) self.lexer.lex(lines, y=0, x=0)
for token in self.lexer: for token in self.lexer:
self.tokens[token.y].append(token) self.tokens[token.y].append(token)
self.line_contexts = dict(self.lexer.line_contexts)
# relexing # relexing
# ====================== # ======================
@ -61,12 +63,12 @@ class Highlighter:
# start the relexing process # start the relexing process
self.lexer.lex(lines, y1, 0) self.lexer.lex(lines, y1, 0)
# this keeps track of the current y coordinate, the current token index # these keep track of the current y coordinate, the current token index
# on line[y], and the current "new token", respectively. # on line[y], and the current "new token", respectively.
y = y1 y = y1
i = 0 i = 0
getnext = True getnext = True
new_token = None new_token = None
while True: while True:
# if we have overstepped our bounds, then exit! # if we have overstepped our bounds, then exit!
@ -77,7 +79,7 @@ class Highlighter:
if getnext: if getnext:
try: try:
new_token = self.lexer.next() new_token = self.lexer.next()
getnext = False getnext = False
except StopIteration: except StopIteration:
# ok, so this means that ALL the rest of the tokens didn't # ok, so this means that ALL the rest of the tokens didn't
# show up, because we're done. so delete them and exit # show up, because we're done. so delete them and exit

108
lex2.py
View File

@ -3,6 +3,20 @@ import re
valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$') valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
reserved_names = ['start', 'middle', 'end', 'null'] reserved_names = ['start', 'middle', 'end', 'null']
class RuleContext:
# to be clear:
# x, y: where the rule processing began
# rule: the rule which began
# flag: a signal to be used to resume the rule correctly
# context: the previous rule namespace(s)
# matchd: the dictionary returned by the rule's matching
def __init__(self, y, x, rule, flag, context, matchd):
self.y = y
self.x = x
self.rule = rule
self.context = context
self.matchd = matchd
class Token(object): class Token(object):
def __init__(self, name, rule, y, x, s, **vargs): def __init__(self, name, rule, y, x, s, **vargs):
self.name = name self.name = name
@ -45,11 +59,10 @@ class ConstantRule(Rule):
assert name not in reserved_names, "reserved rule name: %r" % name assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name self.name = name
self.constant = constant self.constant = constant
def match(self, lexer, context=[], d={}, parent=None): def match(self, lexer, context=[], d={}):
if lexer.lines[lexer.y][lexer.x:].startswith(self.constant): if lexer.lines[lexer.y][lexer.x:].startswith(self.constant):
name = '.'.join(context + [self.name]) name = '.'.join(context + [self.name])
lexer.add_token(self.make_token(lexer, self.constant, name, lexer.add_token(self.make_token(lexer, self.constant, name, grammar=lexer.grammar))
parent=parent))
lexer.x += len(self.constant) lexer.x += len(self.constant)
return True return True
else: else:
@ -62,12 +75,11 @@ class PatternRule(Rule):
self.name = name self.name = name
self.pattern = pattern self.pattern = pattern
self.re = re.compile(pattern) self.re = re.compile(pattern)
def match(self, lexer, context=[], d={}, parent=None): def match(self, lexer, context=[], d={}):
m = self.re.match(lexer.lines[lexer.y], lexer.x) m = self.re.match(lexer.lines[lexer.y], lexer.x)
if m: if m:
name = '.'.join(context + [self.name]) name = '.'.join(context + [self.name])
lexer.add_token(self.make_token(lexer, m.group(0), name, lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
parent=parent))
lexer.x += len(m.group(0)) lexer.x += len(m.group(0))
return True return True
else: else:
@ -77,11 +89,11 @@ class ContextPatternRule(Rule):
def __init__(self, name, pattern, fallback): def __init__(self, name, pattern, fallback):
assert valid_name_re.match(name), 'invalid name %r' % name assert valid_name_re.match(name), 'invalid name %r' % name
assert name not in reserved_names, "reserved rule name: %r" % name assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name self.name = name
self.pattern = pattern self.pattern = pattern
self.fallback = fallback self.fallback = fallback
self.fallback_re = re.compile(fallback) self.fallback_re = re.compile(fallback)
def match(self, lexer, context=[], d={}, parent=None): def match(self, lexer, context=[], d={}):
try: try:
r = re.compile(self.pattern % d) r = re.compile(self.pattern % d)
except KeyError: except KeyError:
@ -89,8 +101,7 @@ class ContextPatternRule(Rule):
m = r.match(lexer.lines[lexer.y], lexer.x) m = r.match(lexer.lines[lexer.y], lexer.x)
if m: if m:
name = '.'.join(context + [self.name]) name = '.'.join(context + [self.name])
lexer.add_token(self.make_token(lexer, m.group(0), name, lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
parent=parent))
lexer.x += len(m.group(0)) lexer.x += len(m.group(0))
return True return True
else: else:
@ -105,18 +116,23 @@ class RegionRule(Rule):
self.grammar = grammar self.grammar = grammar
self.end = end self.end = end
self.start_re = re.compile(start) self.start_re = re.compile(start)
def _add_from_regex(self, context, name, lexer, m, parent=None): def _add_from_regex(self, context, name, lexer, m, grammar):
t_name = '.'.join(context + [self.name, name]) t_name = '.'.join(context + [self.name, name])
t = self.make_token(lexer, m.group(0), t_name, parent=parent) t = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
lexer.add_token(t) lexer.add_token(t)
lexer.x += len(m.group(0)) lexer.x += len(m.group(0))
def restart(self, lexer, rulecontext):
pass
def match(self, lexer, context=[], d={}): def match(self, lexer, context=[], d={}):
m = self.start_re.match(lexer.lines[lexer.y], lexer.x) m = self.start_re.match(lexer.lines[lexer.y], lexer.x)
# see if we can match out start token # see if we can match our start token
if m: if m:
# ok, so create our start token, and get ready to start reading data # ok, so create our start token, and get ready to start reading data
d = m.groupdict() d = m.groupdict()
self._add_from_regex(context, 'start', lexer, m) lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
list(context), dict(d)))
self._add_from_regex(context, 'start', lexer, m, lexer.grammar)
null_t_name = '.'.join(context + [self.name, 'null']) null_t_name = '.'.join(context + [self.name, 'null'])
null_t = None null_t = None
@ -131,7 +147,7 @@ class RegionRule(Rule):
done = False done = False
while not done and lexer.y < len(lexer.lines): while not done and lexer.y < len(lexer.lines):
old_y = lexer.y old_y = lexer.y
# if this line is empty, then we will skip it, but here weinsert # if this line is empty, then we skip it, but here we insert
# an empty null token just so we have something # an empty null token just so we have something
if len(lexer.lines[lexer.y]) == 0: if len(lexer.lines[lexer.y]) == 0:
null_t = Token(null_t_name, None, lexer.y, lexer.x, '') null_t = Token(null_t_name, None, lexer.y, lexer.x, '')
@ -146,7 +162,7 @@ class RegionRule(Rule):
if self.end: if self.end:
m = end_re.match(lexer.lines[lexer.y], lexer.x) m = end_re.match(lexer.lines[lexer.y], lexer.x)
if m: if m:
self._add_from_regex(context, 'end', lexer, m) self._add_from_regex(context, 'end', lexer, m, None)
done = True done = True
break break
@ -155,7 +171,7 @@ class RegionRule(Rule):
found = False found = False
for rule in self.grammar.rules: for rule in self.grammar.rules:
if rule.match(lexer, context + [self.name], d): if rule.match(lexer, context + [self.name], d):
found = True found = True
null_t = None null_t = None
break break
@ -171,17 +187,19 @@ class RegionRule(Rule):
# ok, since we're soon going to be on a different line (or # ok, since we're soon going to be on a different line (or
# already are), we want a new null token. so forget about the # already are), we want a new null token. so forget about the
# current one. # current one (i.e. stop adding to it).
null_t = None null_t = None
# if we're still on the same line at this point (and not done) # if we're still on the same line at this point (and not done)
# then that means we're finished with the line and should move # then that means we're finished with the line and should move
# on to the next one here # on to the next one here
if not done and old_y == lexer.y: if not done and old_y == lexer.y:
lexer.save_context()
lexer.y += 1 lexer.y += 1
lexer.x = 0 lexer.x = 0
# alright, we're finally done procesing the region, so return true # alright, we're finally done procesing the region, so return true
lexer.context.pop(-1)
return True return True
else: else:
# region was not matched; we never started. so return false # region was not matched; we never started. so return false
@ -198,9 +216,9 @@ class DualRegionRule(Rule):
self.grammar2 = grammar2 self.grammar2 = grammar2
self.end = end self.end = end
self.start_re = re.compile(start) self.start_re = re.compile(start)
def _add_from_regex(self, context, name, lexer, m, parent=None): def _add_from_regex(self, context, name, lexer, m, grammar=None):
t_name = '.'.join(context + [self.name, name]) t_name = '.'.join(context + [self.name, name])
t = self.make_token(lexer, m.group(0), t_name, parent=parent) t = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
lexer.add_token(t) lexer.add_token(t)
lexer.x += len(m.group(0)) lexer.x += len(m.group(0))
def match(self, lexer, context=[], d={}): def match(self, lexer, context=[], d={}):
@ -208,11 +226,13 @@ class DualRegionRule(Rule):
# see if we can match out start token # see if we can match out start token
if m1: if m1:
# ok, so create our start token, and get ready to start reading data # ok, so create our start token, and get ready to start reading data
self._add_from_regex(context, 'start', lexer, m1) self._add_from_regex(context, 'start', lexer, m1, lexer.grammar)
null_t_name = '.'.join(context + [self.name, 'null']) null_t_name = '.'.join(context + [self.name, 'null'])
null_t = None null_t = None
d1 = m1.groupdict() d1 = m1.groupdict()
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
list(context), dict(d1)))
d2 = {} d2 = {}
middle_re = re.compile(self.middle % d1) middle_re = re.compile(self.middle % d1)
@ -237,7 +257,7 @@ class DualRegionRule(Rule):
m2 = middle_re.match(lexer.lines[lexer.y], lexer.x) m2 = middle_re.match(lexer.lines[lexer.y], lexer.x)
if m2: if m2:
d2 = m2.groupdict() d2 = m2.groupdict()
self._add_from_regex(context, 'middle', lexer, m2) self._add_from_regex(context, 'middle', lexer, m2, None)
done = True done = True
break break
@ -269,6 +289,7 @@ class DualRegionRule(Rule):
# then that means we're finished with the line and should move # then that means we're finished with the line and should move
# on to the next one here # on to the next one here
if not done and old_y == lexer.y: if not done and old_y == lexer.y:
lexer.save_context()
lexer.y += 1 lexer.y += 1
lexer.x = 0 lexer.x = 0
@ -276,6 +297,9 @@ class DualRegionRule(Rule):
# instead of middle tokens # instead of middle tokens
d3 = dict(d1.items() + d2.items()) d3 = dict(d1.items() + d2.items())
end_re = re.compile(self.end % d3) end_re = re.compile(self.end % d3)
lexer.context.pop(-1)
lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'middle',
list(context), dict(d3)))
# ok, so as long as we aren't done (we haven't found an end token), # ok, so as long as we aren't done (we haven't found an end token),
# keep reading input # keep reading input
@ -296,7 +320,7 @@ class DualRegionRule(Rule):
# proceed to "stage 2" # proceed to "stage 2"
m3 = end_re.match(lexer.lines[lexer.y], lexer.x) m3 = end_re.match(lexer.lines[lexer.y], lexer.x)
if m3: if m3:
self._add_from_regex(context, 'end', lexer, m3) self._add_from_regex(context, 'end', lexer, m3, None)
done = True done = True
break break
@ -328,10 +352,12 @@ class DualRegionRule(Rule):
# then that means we're finished with the line and should move # then that means we're finished with the line and should move
# on to the next one here # on to the next one here
if not done and old_y == lexer.y: if not done and old_y == lexer.y:
lexer.save_context()
lexer.y += 1 lexer.y += 1
lexer.x = 0 lexer.x = 0
# alright, we're finally done procesing the dual region; return true # alright, we're finally done processing; return true
lexer.context.pop(-1)
return True return True
else: else:
# dual region was not matched; we never started. so return false # dual region was not matched; we never started. so return false
@ -346,27 +372,36 @@ class Grammar:
class Lexer: class Lexer:
def __init__(self, name, grammar): def __init__(self, name, grammar):
self.name = name self.name = name
self.grammar = grammar self.grammar = grammar
self.y = 0 self.y = 0
self.x = 0 self.x = 0
self.lines = None self.lines = None
self.tokens = [] self.tokens = []
self.context = []
self.line_contexts = {}
def add_token(self, t): def add_token(self, t):
self.tokens.append(t) self.tokens.append(t)
def lex(self, lines, y=0, x=0): def lex(self, lines, y=0, x=0):
self.y = y self.y = y
self.x = x self.x = x
self.lines = lines self.lines = lines
self.tokens = [] self.tokens = []
self.context = []
self.line_contexts = {}
def __iter__(self): def __iter__(self):
if self.lines is None: if self.lines is None:
raise Exception, "no lines to lex" raise Exception, "no lines to lex"
return self return self
def save_context(self):
self.line_contexts[self.y] = list(self.context)
def next(self): def next(self):
null_t_name = 'null' null_t_name = 'null'
null_t = None null_t = None
@ -377,13 +412,14 @@ class Lexer:
curr_t = None curr_t = None
for rule in self.grammar.rules: for rule in self.grammar.rules:
if rule.match(self): if rule.match(self):
assert self.tokens, "AAAAA %s" % repr(self.tokens) assert self.tokens, "match rendered no tokens?"
return self.tokens.pop(0) return self.tokens.pop(0)
if null_t is None: if null_t is None:
null_t = Token(null_t_name, None, self.y, self.x, '') null_t = Token(null_t_name, None, self.y, self.x, '')
self.add_token(null_t) self.add_token(null_t)
null_t.add_to_string(line[self.x]) null_t.add_to_string(line[self.x])
self.x += 1 self.x += 1
self.save_context()
null_t = None null_t = None
self.y += 1 self.y += 1
self.x = 0 self.x = 0

View File

@ -25,10 +25,11 @@ class Python(mode2.Fundamental):
self.default_color = color.build('default', 'default') self.default_color = color.build('default', 'default')
self.colors = { self.colors = {
'keyword': color.build('cyan', 'default'), 'keyword': color.build('cyan', 'default'),
'builtin_method': color.build('cyan', 'default'), 'reserved': color.build('cyan', 'default'),
'methodname': color.build('blue', 'default'), 'builtin_method': color.build('cyan', 'default'),
'classname': color.build('green', 'default'), 'methodname': color.build('blue', 'default'),
'classname': color.build('green', 'default'),
'string.start': color.build('green', 'default'), 'string.start': color.build('green', 'default'),
'string.null': color.build('green', 'default'), 'string.null': color.build('green', 'default'),
@ -37,9 +38,12 @@ class Python(mode2.Fundamental):
'string.format': color.build('yellow', 'default'), 'string.format': color.build('yellow', 'default'),
'string.end': color.build('green', 'default'), 'string.end': color.build('green', 'default'),
'integer': color.build('red', 'default'), #'integer': color.build('red', 'default'),
'float': color.build('red', 'default'), #'float': color.build('red', 'default'),
'imaginary': color.build('red', 'default'), #'imaginary': color.build('red', 'default'),
'integer': color.build('default', 'default'),
'float': color.build('default', 'default'),
'imaginary': color.build('default', 'default'),
'tq_string.start': color.build('green', 'default'), 'tq_string.start': color.build('green', 'default'),
'tq_string.null': color.build('green', 'default'), 'tq_string.null': color.build('green', 'default'),