--HG--

branch : pmacs2
2007-06-06 00:01:05 +00:00 · 2007-06-06 00:01:05 +00:00 · 7f66ee2ea4
parent 6334498f3d
commit 7f66ee2ea4
4 changed files with 96 additions and 56 deletions
--- a/highlight.py
+++ b/highlight.py
@ -304,10 +304,8 @@ class Highlighter:

    def lex_buffer(self):
        '''lexes the buffer according to the grammar'''
-        if not hasattr(self.mode, "grammar") or \
-           not hasattr(self.mode, "lexer") or \
-           self.mode.grammar is None or \
-           self.mode.lexer is None:
+        if (not hasattr(self.mode, "grammar") or self.mode.grammar is None or
+            not hasattr(self.mode, "lexer") or self.mode.lexer is None):
            self.tokens = []
            return

--- a/highlight2.py
+++ b/highlight2.py
@ -18,8 +18,9 @@ for i in range(0, len(color_list)):

 class Highlighter:
    def __init__(self, lexer):
-        self.lexer = lexer
-        self.tokens = []
+        self.lexer         = lexer
+        self.tokens        = []
+        self.line_contexts = {}

    def dump(self, fmt='(%3s, %2s) | %s'):
        print fmt % ('y', 'x', 'string')
@ -50,10 +51,11 @@ class Highlighter:
            sys.stdout.write('\n')

    def highlight(self, lines):
-        self.tokens = [[] for l in lines]
+        self.tokens        = [[] for l in lines]
        self.lexer.lex(lines, y=0, x=0)
        for token in self.lexer:
            self.tokens[token.y].append(token)
+        self.line_contexts = dict(self.lexer.line_contexts)

    # relexing
    # ======================
@ -61,12 +63,12 @@ class Highlighter:
        # start the relexing process
        self.lexer.lex(lines, y1, 0)

-        # this keeps track of the current y coordinate, the current token index
+        # these keep track of the current y coordinate, the current token index
        # on line[y], and the current "new token", respectively.
-        y = y1
-        i = 0
-        getnext = True
-        new_token = None
+        y           = y1
+        i           = 0
+        getnext     = True
+        new_token   = None

        while True:
            # if we have overstepped our bounds, then exit!
@ -77,7 +79,7 @@ class Highlighter:
            if getnext:
                try:
                    new_token = self.lexer.next()
-                    getnext = False
+                    getnext   = False
                except StopIteration:
                    # ok, so this means that ALL the rest of the tokens didn't
                    # show up, because we're done. so delete them and exit
--- a/lex2.py
+++ b/lex2.py
@ -3,6 +3,20 @@ import re
 valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
 reserved_names = ['start', 'middle', 'end', 'null']

+class RuleContext:
+    # to be clear:
+    # x, y:    where the rule processing began
+    # rule:    the rule which began
+    # flag:    a signal to be used to resume the rule correctly
+    # context: the previous rule namespace(s)
+    # matchd:  the dictionary returned by the rule's matching
+    def __init__(self, y, x, rule, flag, context, matchd):
+        self.y       = y
+        self.x       = x
+        self.rule    = rule
+        self.context = context
+        self.matchd  = matchd
+
 class Token(object):
    def __init__(self, name, rule, y, x, s, **vargs):
        self.name = name
@ -45,11 +59,10 @@ class ConstantRule(Rule):
        assert name not in reserved_names, "reserved rule name: %r" % name
        self.name     = name
        self.constant = constant
-    def match(self, lexer, context=[], d={}, parent=None):
+    def match(self, lexer, context=[], d={}):
        if lexer.lines[lexer.y][lexer.x:].startswith(self.constant):
            name = '.'.join(context + [self.name])
-            lexer.add_token(self.make_token(lexer, self.constant, name,
-                                            parent=parent))
+            lexer.add_token(self.make_token(lexer, self.constant, name, grammar=lexer.grammar))
            lexer.x += len(self.constant)
            return True
        else:
@ -62,12 +75,11 @@ class PatternRule(Rule):
        self.name    = name
        self.pattern = pattern
        self.re      = re.compile(pattern)
-    def match(self, lexer, context=[], d={}, parent=None):
+    def match(self, lexer, context=[], d={}):
        m = self.re.match(lexer.lines[lexer.y], lexer.x)
        if m:
            name = '.'.join(context + [self.name])
-            lexer.add_token(self.make_token(lexer, m.group(0), name,
-                                            parent=parent))
+            lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
            lexer.x += len(m.group(0))
            return True
        else:
@ -77,11 +89,11 @@ class ContextPatternRule(Rule):
    def __init__(self, name, pattern, fallback):
        assert valid_name_re.match(name), 'invalid name %r' % name
        assert name not in reserved_names, "reserved rule name: %r" % name
-        self.name    = name
-        self.pattern = pattern
-        self.fallback = fallback
+        self.name        = name
+        self.pattern     = pattern
+        self.fallback    = fallback
        self.fallback_re = re.compile(fallback)
-    def match(self, lexer, context=[], d={}, parent=None):
+    def match(self, lexer, context=[], d={}):
        try:
            r = re.compile(self.pattern % d)
        except KeyError:
@ -89,8 +101,7 @@ class ContextPatternRule(Rule):
        m = r.match(lexer.lines[lexer.y], lexer.x)
        if m:
            name = '.'.join(context + [self.name])
-            lexer.add_token(self.make_token(lexer, m.group(0), name,
-                                            parent=parent))
+            lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar))
            lexer.x += len(m.group(0))
            return True
        else:
@ -105,18 +116,23 @@ class RegionRule(Rule):
        self.grammar  = grammar
        self.end      = end
        self.start_re = re.compile(start)
-    def _add_from_regex(self, context, name, lexer, m, parent=None):
+    def _add_from_regex(self, context, name, lexer, m, grammar):
        t_name = '.'.join(context + [self.name, name])
-        t      = self.make_token(lexer, m.group(0), t_name, parent=parent)
+        t      = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
        lexer.add_token(t)
        lexer.x += len(m.group(0))
+    def restart(self, lexer, rulecontext):
+        pass
    def match(self, lexer, context=[], d={}):
        m = self.start_re.match(lexer.lines[lexer.y], lexer.x)
-        # see if we can match out start token
+        # see if we can match our start token
        if m:
+
            # ok, so create our start token, and get ready to start reading data
            d = m.groupdict()
-            self._add_from_regex(context, 'start', lexer, m)
+            lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
+                                             list(context), dict(d)))
+            self._add_from_regex(context, 'start', lexer, m, lexer.grammar)
            null_t_name = '.'.join(context + [self.name, 'null'])
            null_t = None

@ -131,7 +147,7 @@ class RegionRule(Rule):
            done = False
            while not done and lexer.y < len(lexer.lines):
                old_y = lexer.y
-                # if this line is empty, then we will skip it, but here weinsert
+                # if this line is empty, then we skip it, but here we insert
                # an empty null token just so we have something
                if len(lexer.lines[lexer.y]) == 0:
                    null_t = Token(null_t_name, None, lexer.y, lexer.x, '')                    
@ -146,7 +162,7 @@ class RegionRule(Rule):
                    if self.end:
                        m = end_re.match(lexer.lines[lexer.y], lexer.x)
                        if m:
-                            self._add_from_regex(context, 'end', lexer, m)
+                            self._add_from_regex(context, 'end', lexer, m, None)
                            done = True
                            break

@ -155,7 +171,7 @@ class RegionRule(Rule):
                    found = False
                    for rule in self.grammar.rules:
                        if rule.match(lexer, context + [self.name], d):
-                            found = True
+                            found  = True
                            null_t = None
                            break

@ -171,17 +187,19 @@ class RegionRule(Rule):

                # ok, since we're soon going to be on a different line (or
                # already are), we want a new null token. so forget about the
-                # current one.
+                # current one (i.e. stop adding to it).
                null_t = None

                # if we're still on the same line at this point (and not done)
                # then that means we're finished with the line and should move
                # on to the next one here
                if not done and old_y == lexer.y:
+                    lexer.save_context()
                    lexer.y += 1
                    lexer.x = 0

            # alright, we're finally done procesing the region, so return true
+            lexer.context.pop(-1)
            return True
        else:
            # region was not matched; we never started. so return false
@ -198,9 +216,9 @@ class DualRegionRule(Rule):
        self.grammar2 = grammar2
        self.end      = end
        self.start_re = re.compile(start)
-    def _add_from_regex(self, context, name, lexer, m, parent=None):
+    def _add_from_regex(self, context, name, lexer, m, grammar=None):
        t_name = '.'.join(context + [self.name, name])
-        t      = self.make_token(lexer, m.group(0), t_name, parent=parent)
+        t      = self.make_token(lexer, m.group(0), t_name, grammar=grammar)
        lexer.add_token(t)
        lexer.x += len(m.group(0))
    def match(self, lexer, context=[], d={}):
@ -208,11 +226,13 @@ class DualRegionRule(Rule):
        # see if we can match out start token
        if m1:
            # ok, so create our start token, and get ready to start reading data
-            self._add_from_regex(context, 'start', lexer, m1)
+            self._add_from_regex(context, 'start', lexer, m1, lexer.grammar)
            null_t_name = '.'.join(context + [self.name, 'null'])
            null_t = None

            d1 = m1.groupdict()
+            lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start',
+                                             list(context), dict(d1)))
            d2 = {}

            middle_re = re.compile(self.middle % d1)
@ -237,7 +257,7 @@ class DualRegionRule(Rule):
                    m2 = middle_re.match(lexer.lines[lexer.y], lexer.x)
                    if m2:
                        d2 = m2.groupdict()
-                        self._add_from_regex(context, 'middle', lexer, m2)
+                        self._add_from_regex(context, 'middle', lexer, m2, None)
                        done = True
                        break

@ -269,6 +289,7 @@ class DualRegionRule(Rule):
                # then that means we're finished with the line and should move
                # on to the next one here
                if not done and old_y == lexer.y:
+                    lexer.save_context()
                    lexer.y += 1
                    lexer.x = 0

@ -276,6 +297,9 @@ class DualRegionRule(Rule):
            # instead of middle tokens
            d3 = dict(d1.items() + d2.items())
            end_re = re.compile(self.end % d3)
+            lexer.context.pop(-1)
+            lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'middle',
+                                             list(context), dict(d3)))

            # ok, so as long as we aren't done (we haven't found an end token),
            # keep reading input
@ -296,7 +320,7 @@ class DualRegionRule(Rule):
                    # proceed to "stage 2"
                    m3 = end_re.match(lexer.lines[lexer.y], lexer.x)
                    if m3:
-                        self._add_from_regex(context, 'end', lexer, m3)
+                        self._add_from_regex(context, 'end', lexer, m3, None)
                        done = True
                        break

@ -328,10 +352,12 @@ class DualRegionRule(Rule):
                # then that means we're finished with the line and should move
                # on to the next one here
                if not done and old_y == lexer.y:
+                    lexer.save_context()
                    lexer.y += 1
                    lexer.x = 0

-            # alright, we're finally done procesing the dual region; return true
+            # alright, we're finally done processing; return true
+            lexer.context.pop(-1)
            return True
        else:
            # dual region was not matched; we never started. so return false
@ -346,27 +372,36 @@ class Grammar:

 class Lexer:
    def __init__(self, name, grammar):
-        self.name        = name
-        self.grammar     = grammar
-        self.y           = 0
-        self.x           = 0
-        self.lines       = None
-        self.tokens = []
+        self.name    = name
+        self.grammar = grammar
+        self.y       = 0
+        self.x       = 0
+        self.lines   = None
+        self.tokens  = []
+
+        self.context       = []
+        self.line_contexts = {}

    def add_token(self, t):
        self.tokens.append(t)

    def lex(self, lines, y=0, x=0):
-        self.y           = y
-        self.x           = x
-        self.lines       = lines
+        self.y      = y
+        self.x      = x
+        self.lines  = lines
        self.tokens = []

+        self.context       = []
+        self.line_contexts = {}
+
    def __iter__(self):
        if self.lines is None:
            raise Exception, "no lines to lex"
        return self

+    def save_context(self):
+        self.line_contexts[self.y] = list(self.context)
+
    def next(self):
        null_t_name = 'null'
        null_t      = None
@ -377,13 +412,14 @@ class Lexer:
                curr_t = None
                for rule in self.grammar.rules:
                    if rule.match(self):
-                        assert self.tokens, "AAAAA %s" % repr(self.tokens)
+                        assert self.tokens, "match rendered no tokens?"
                        return self.tokens.pop(0)
                if null_t is None:
                    null_t = Token(null_t_name, None, self.y, self.x, '')
                    self.add_token(null_t)
                null_t.add_to_string(line[self.x])
                self.x += 1
+            self.save_context()
            null_t = None
            self.y += 1
            self.x = 0
--- a/mode_python.py
+++ b/mode_python.py
@ -25,10 +25,11 @@ class Python(mode2.Fundamental):
        self.default_color = color.build('default', 'default')

        self.colors = {
-            'keyword':           color.build('cyan', 'default'),
-            'builtin_method':    color.build('cyan', 'default'),
-            'methodname':        color.build('blue', 'default'),
-            'classname':         color.build('green', 'default'),
+            'keyword':        color.build('cyan', 'default'),
+            'reserved':       color.build('cyan', 'default'),
+            'builtin_method': color.build('cyan', 'default'),
+            'methodname':     color.build('blue', 'default'),
+            'classname':      color.build('green', 'default'),
    
            'string.start':   color.build('green', 'default'),
            'string.null':    color.build('green', 'default'),
@ -37,9 +38,12 @@ class Python(mode2.Fundamental):
            'string.format':  color.build('yellow', 'default'),
            'string.end':     color.build('green', 'default'),
    
-            'integer':   color.build('red', 'default'),
-            'float':     color.build('red', 'default'),
-            'imaginary': color.build('red', 'default'),
+            #'integer':   color.build('red', 'default'),
+            #'float':     color.build('red', 'default'),
+            #'imaginary': color.build('red', 'default'),
+            'integer':   color.build('default', 'default'),
+            'float':     color.build('default', 'default'),
+            'imaginary': color.build('default', 'default'),
    
            'tq_string.start': color.build('green', 'default'),
            'tq_string.null':  color.build('green', 'default'),