diff --git a/highlight.py b/highlight.py index 8beb7ec..e53c85b 100644 --- a/highlight.py +++ b/highlight.py @@ -304,10 +304,8 @@ class Highlighter: def lex_buffer(self): '''lexes the buffer according to the grammar''' - if not hasattr(self.mode, "grammar") or \ - not hasattr(self.mode, "lexer") or \ - self.mode.grammar is None or \ - self.mode.lexer is None: + if (not hasattr(self.mode, "grammar") or self.mode.grammar is None or + not hasattr(self.mode, "lexer") or self.mode.lexer is None): self.tokens = [] return diff --git a/highlight2.py b/highlight2.py index 695730b..cd088f8 100644 --- a/highlight2.py +++ b/highlight2.py @@ -18,8 +18,9 @@ for i in range(0, len(color_list)): class Highlighter: def __init__(self, lexer): - self.lexer = lexer - self.tokens = [] + self.lexer = lexer + self.tokens = [] + self.line_contexts = {} def dump(self, fmt='(%3s, %2s) | %s'): print fmt % ('y', 'x', 'string') @@ -50,10 +51,11 @@ class Highlighter: sys.stdout.write('\n') def highlight(self, lines): - self.tokens = [[] for l in lines] + self.tokens = [[] for l in lines] self.lexer.lex(lines, y=0, x=0) for token in self.lexer: self.tokens[token.y].append(token) + self.line_contexts = dict(self.lexer.line_contexts) # relexing # ====================== @@ -61,12 +63,12 @@ class Highlighter: # start the relexing process self.lexer.lex(lines, y1, 0) - # this keeps track of the current y coordinate, the current token index + # these keep track of the current y coordinate, the current token index # on line[y], and the current "new token", respectively. - y = y1 - i = 0 - getnext = True - new_token = None + y = y1 + i = 0 + getnext = True + new_token = None while True: # if we have overstepped our bounds, then exit! @@ -77,7 +79,7 @@ class Highlighter: if getnext: try: new_token = self.lexer.next() - getnext = False + getnext = False except StopIteration: # ok, so this means that ALL the rest of the tokens didn't # show up, because we're done. so delete them and exit diff --git a/lex2.py b/lex2.py index 714cdeb..72f1b9c 100755 --- a/lex2.py +++ b/lex2.py @@ -3,6 +3,20 @@ import re valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$') reserved_names = ['start', 'middle', 'end', 'null'] +class RuleContext: + # to be clear: + # x, y: where the rule processing began + # rule: the rule which began + # flag: a signal to be used to resume the rule correctly + # context: the previous rule namespace(s) + # matchd: the dictionary returned by the rule's matching + def __init__(self, y, x, rule, flag, context, matchd): + self.y = y + self.x = x + self.rule = rule + self.context = context + self.matchd = matchd + class Token(object): def __init__(self, name, rule, y, x, s, **vargs): self.name = name @@ -45,11 +59,10 @@ class ConstantRule(Rule): assert name not in reserved_names, "reserved rule name: %r" % name self.name = name self.constant = constant - def match(self, lexer, context=[], d={}, parent=None): + def match(self, lexer, context=[], d={}): if lexer.lines[lexer.y][lexer.x:].startswith(self.constant): name = '.'.join(context + [self.name]) - lexer.add_token(self.make_token(lexer, self.constant, name, - parent=parent)) + lexer.add_token(self.make_token(lexer, self.constant, name, grammar=lexer.grammar)) lexer.x += len(self.constant) return True else: @@ -62,12 +75,11 @@ class PatternRule(Rule): self.name = name self.pattern = pattern self.re = re.compile(pattern) - def match(self, lexer, context=[], d={}, parent=None): + def match(self, lexer, context=[], d={}): m = self.re.match(lexer.lines[lexer.y], lexer.x) if m: name = '.'.join(context + [self.name]) - lexer.add_token(self.make_token(lexer, m.group(0), name, - parent=parent)) + lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar)) lexer.x += len(m.group(0)) return True else: @@ -77,11 +89,11 @@ class ContextPatternRule(Rule): def __init__(self, name, pattern, fallback): assert valid_name_re.match(name), 'invalid name %r' % name assert name not in reserved_names, "reserved rule name: %r" % name - self.name = name - self.pattern = pattern - self.fallback = fallback + self.name = name + self.pattern = pattern + self.fallback = fallback self.fallback_re = re.compile(fallback) - def match(self, lexer, context=[], d={}, parent=None): + def match(self, lexer, context=[], d={}): try: r = re.compile(self.pattern % d) except KeyError: @@ -89,8 +101,7 @@ class ContextPatternRule(Rule): m = r.match(lexer.lines[lexer.y], lexer.x) if m: name = '.'.join(context + [self.name]) - lexer.add_token(self.make_token(lexer, m.group(0), name, - parent=parent)) + lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar)) lexer.x += len(m.group(0)) return True else: @@ -105,18 +116,23 @@ class RegionRule(Rule): self.grammar = grammar self.end = end self.start_re = re.compile(start) - def _add_from_regex(self, context, name, lexer, m, parent=None): + def _add_from_regex(self, context, name, lexer, m, grammar): t_name = '.'.join(context + [self.name, name]) - t = self.make_token(lexer, m.group(0), t_name, parent=parent) + t = self.make_token(lexer, m.group(0), t_name, grammar=grammar) lexer.add_token(t) lexer.x += len(m.group(0)) + def restart(self, lexer, rulecontext): + pass def match(self, lexer, context=[], d={}): m = self.start_re.match(lexer.lines[lexer.y], lexer.x) - # see if we can match out start token + # see if we can match our start token if m: + # ok, so create our start token, and get ready to start reading data d = m.groupdict() - self._add_from_regex(context, 'start', lexer, m) + lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start', + list(context), dict(d))) + self._add_from_regex(context, 'start', lexer, m, lexer.grammar) null_t_name = '.'.join(context + [self.name, 'null']) null_t = None @@ -131,7 +147,7 @@ class RegionRule(Rule): done = False while not done and lexer.y < len(lexer.lines): old_y = lexer.y - # if this line is empty, then we will skip it, but here weinsert + # if this line is empty, then we skip it, but here we insert # an empty null token just so we have something if len(lexer.lines[lexer.y]) == 0: null_t = Token(null_t_name, None, lexer.y, lexer.x, '') @@ -146,7 +162,7 @@ class RegionRule(Rule): if self.end: m = end_re.match(lexer.lines[lexer.y], lexer.x) if m: - self._add_from_regex(context, 'end', lexer, m) + self._add_from_regex(context, 'end', lexer, m, None) done = True break @@ -155,7 +171,7 @@ class RegionRule(Rule): found = False for rule in self.grammar.rules: if rule.match(lexer, context + [self.name], d): - found = True + found = True null_t = None break @@ -171,17 +187,19 @@ class RegionRule(Rule): # ok, since we're soon going to be on a different line (or # already are), we want a new null token. so forget about the - # current one. + # current one (i.e. stop adding to it). null_t = None # if we're still on the same line at this point (and not done) # then that means we're finished with the line and should move # on to the next one here if not done and old_y == lexer.y: + lexer.save_context() lexer.y += 1 lexer.x = 0 # alright, we're finally done procesing the region, so return true + lexer.context.pop(-1) return True else: # region was not matched; we never started. so return false @@ -198,9 +216,9 @@ class DualRegionRule(Rule): self.grammar2 = grammar2 self.end = end self.start_re = re.compile(start) - def _add_from_regex(self, context, name, lexer, m, parent=None): + def _add_from_regex(self, context, name, lexer, m, grammar=None): t_name = '.'.join(context + [self.name, name]) - t = self.make_token(lexer, m.group(0), t_name, parent=parent) + t = self.make_token(lexer, m.group(0), t_name, grammar=grammar) lexer.add_token(t) lexer.x += len(m.group(0)) def match(self, lexer, context=[], d={}): @@ -208,11 +226,13 @@ class DualRegionRule(Rule): # see if we can match out start token if m1: # ok, so create our start token, and get ready to start reading data - self._add_from_regex(context, 'start', lexer, m1) + self._add_from_regex(context, 'start', lexer, m1, lexer.grammar) null_t_name = '.'.join(context + [self.name, 'null']) null_t = None d1 = m1.groupdict() + lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start', + list(context), dict(d1))) d2 = {} middle_re = re.compile(self.middle % d1) @@ -237,7 +257,7 @@ class DualRegionRule(Rule): m2 = middle_re.match(lexer.lines[lexer.y], lexer.x) if m2: d2 = m2.groupdict() - self._add_from_regex(context, 'middle', lexer, m2) + self._add_from_regex(context, 'middle', lexer, m2, None) done = True break @@ -269,6 +289,7 @@ class DualRegionRule(Rule): # then that means we're finished with the line and should move # on to the next one here if not done and old_y == lexer.y: + lexer.save_context() lexer.y += 1 lexer.x = 0 @@ -276,6 +297,9 @@ class DualRegionRule(Rule): # instead of middle tokens d3 = dict(d1.items() + d2.items()) end_re = re.compile(self.end % d3) + lexer.context.pop(-1) + lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'middle', + list(context), dict(d3))) # ok, so as long as we aren't done (we haven't found an end token), # keep reading input @@ -296,7 +320,7 @@ class DualRegionRule(Rule): # proceed to "stage 2" m3 = end_re.match(lexer.lines[lexer.y], lexer.x) if m3: - self._add_from_regex(context, 'end', lexer, m3) + self._add_from_regex(context, 'end', lexer, m3, None) done = True break @@ -328,10 +352,12 @@ class DualRegionRule(Rule): # then that means we're finished with the line and should move # on to the next one here if not done and old_y == lexer.y: + lexer.save_context() lexer.y += 1 lexer.x = 0 - # alright, we're finally done procesing the dual region; return true + # alright, we're finally done processing; return true + lexer.context.pop(-1) return True else: # dual region was not matched; we never started. so return false @@ -346,27 +372,36 @@ class Grammar: class Lexer: def __init__(self, name, grammar): - self.name = name - self.grammar = grammar - self.y = 0 - self.x = 0 - self.lines = None - self.tokens = [] + self.name = name + self.grammar = grammar + self.y = 0 + self.x = 0 + self.lines = None + self.tokens = [] + + self.context = [] + self.line_contexts = {} def add_token(self, t): self.tokens.append(t) def lex(self, lines, y=0, x=0): - self.y = y - self.x = x - self.lines = lines + self.y = y + self.x = x + self.lines = lines self.tokens = [] + self.context = [] + self.line_contexts = {} + def __iter__(self): if self.lines is None: raise Exception, "no lines to lex" return self + def save_context(self): + self.line_contexts[self.y] = list(self.context) + def next(self): null_t_name = 'null' null_t = None @@ -377,13 +412,14 @@ class Lexer: curr_t = None for rule in self.grammar.rules: if rule.match(self): - assert self.tokens, "AAAAA %s" % repr(self.tokens) + assert self.tokens, "match rendered no tokens?" return self.tokens.pop(0) if null_t is None: null_t = Token(null_t_name, None, self.y, self.x, '') self.add_token(null_t) null_t.add_to_string(line[self.x]) self.x += 1 + self.save_context() null_t = None self.y += 1 self.x = 0 diff --git a/mode_python.py b/mode_python.py index 00efbea..aa6d084 100644 --- a/mode_python.py +++ b/mode_python.py @@ -25,10 +25,11 @@ class Python(mode2.Fundamental): self.default_color = color.build('default', 'default') self.colors = { - 'keyword': color.build('cyan', 'default'), - 'builtin_method': color.build('cyan', 'default'), - 'methodname': color.build('blue', 'default'), - 'classname': color.build('green', 'default'), + 'keyword': color.build('cyan', 'default'), + 'reserved': color.build('cyan', 'default'), + 'builtin_method': color.build('cyan', 'default'), + 'methodname': color.build('blue', 'default'), + 'classname': color.build('green', 'default'), 'string.start': color.build('green', 'default'), 'string.null': color.build('green', 'default'), @@ -37,9 +38,12 @@ class Python(mode2.Fundamental): 'string.format': color.build('yellow', 'default'), 'string.end': color.build('green', 'default'), - 'integer': color.build('red', 'default'), - 'float': color.build('red', 'default'), - 'imaginary': color.build('red', 'default'), + #'integer': color.build('red', 'default'), + #'float': color.build('red', 'default'), + #'imaginary': color.build('red', 'default'), + 'integer': color.build('default', 'default'), + 'float': color.build('default', 'default'), + 'imaginary': color.build('default', 'default'), 'tq_string.start': color.build('green', 'default'), 'tq_string.null': color.build('green', 'default'),