still workin on it

--HG-- branch : pmacs2
2007-07-14 23:02:42 +00:00 · 2007-07-14 23:02:42 +00:00 · a780d9d61a
parent 6749e4c1c8
commit a780d9d61a
1 changed files with 40 additions and 35 deletions
--- a/lex3.py
+++ b/lex3.py
@ -116,26 +116,27 @@ class RegionRule(Rule):
        self.grammar  = grammar
        self.end      = end
        self.start_re = re.compile(start, self.reflags)
-
    def resume(self, lexer, toresume):
-        assert toresume, "can't resume without tokens to resume!"
-        self._lex(lexer, None, None, toresume)
-        return True
+        if not toresume:
+            raise Exception, "can't resume without tokens to resume!"
+        for t in self._lex(lexer, None, None, toresume):
+            yield t
+        raise StopIteration
    def match(self, lexer, parent):
        return self.start_re.match(self.get_line(lexer), lexer.x)
    def lex(self, lexer, parent, m):
-        self._lex(lexer, parent, m, [])
-
-    def _add_from_regex(self, name, lexer, parent, m, matchd={}):
-        s = m.group(0)
-        token = self.make_token(lexer, s, name, parent, matchd)
-        lexer.add_token(token)
-        lexer.x += len(s)
-        return token
+        for t in self._lex(lexer, parent, m, []):
+            yield t
+        raise StopIteration

    def _lex(self, lexer, parent, m, toresume=[]):
+        # this determines whether we are still reentering. if len(toresume) == 1
+        # then it means that we have been reentering but will not continue, so
+        # reenter will be false.
+        reenter = len(toresume) > 1
+
        # we either need a match object, or a token to resume
-        assert m or len(toresume) > 0
+        assert m or reenter, "we need a current match, or a previous match"

        if m:
            # if we had a match, then it becomes the parent, and we save its
@ -148,12 +149,11 @@ class RegionRule(Rule):
            parent = toresume[0]
            d = parent.matchd
            assert parent.name == 'start'
-        null_t = None

-        # this determines whether we are still reentering. if len(toresume) == 1
-        # then it means that we have been reentering but will not continue, so
-        # reenter will be false.
-        reenter = len(toresume) > 1
+        # this token, when set, will store unmatched characters which will be
+        # combined into a single "null" token when the end of the document, or
+        # a named-token, is reached.
+        null_t = None

        # if we have an end regex, then build it here. notice that it can
        # reference named groups from the start token. if we have no end,
@ -166,22 +166,24 @@ class RegionRule(Rule):
        done = False
        while not done and lexer.y < len(lexer.lines):
            old_y = lexer.y
+            line = self.get_line(lexer)

            # ok, as long as we haven't found the end token, and have more
            # data on the current line to read, we will process tokens
-            while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
-                # if we are reentering mid-parse, then that takes precedence
+            while not done and lexer.y == old_y and lexer.x < len(line):
+                # if we are reentering mid-parse, then that takes precedence.
+                # afterwards, we need to clean-up and get our new state in order
                if reenter:
                    reenter = False
                    for t in toresume[1].rule.resume(lexer, toresume[1:]):
                        yield t
                if lexer.y >= len(lexer.lines):
                    raise StopIteration
-                elif lexer.x >= len(lexer.lines[lexer.y]) + 1:
+                elif lexer.x >= len(line):
                    lexer.y += 1
                    lexer.x = 0
-
                line = self.get_line(lexer)
+
                # if we are looking for an end token, then see if we've
                # found it. if so, then we are done!
                if self.end:
@ -194,8 +196,8 @@ class RegionRule(Rule):
                        done = True
                        break

-                # ok, we need to check all our rules now, in order. if we
-                # find a token, note that we found one and exit the loop
+                # ok, we need to check all our rules now, in order. if we find a
+                # token, note that we found one and exit the loop
                found = False
                for rule in self.grammar.rules:
                    m = rule.match(lexer, parent)
@ -233,7 +235,6 @@ class RegionRule(Rule):
                lexer.x = 0

        raise StopIteration
-
 class NocaseRegionRule(RegionRule):
    reflags = re.IGNORECASE

@ -426,10 +427,18 @@ class DualRegionRule(Rule):

        # alright, we're finally done processing; return true
        return t3
+class NocaseDualRegionRule(DualRegionRule):
+    def _compile_start(self):
+        return re.compile(self.start, re.IGNORECASE)
+    def _compile_middle(self, d):
+        return re.compile(self.middle % d, re.IGNORECASE)
+    def _compile_end(self, d):
+        return re.compile(self.end % d, re.IGNORECASE)

 class Grammar:
    rules = []
    def __init__(self):
+        # XYZ maybe this is unnecessary
        for rule in self.rules:
            if hasattr(rule, 'grammar') and rule.grammar is None:
                rule.grammar = self
@ -473,7 +482,11 @@ class Lexer:
                i += 1

        if toresume:
-            toresume[0].rule.resume(self, toresume)
+            for t in toresume[0].rule.resume(self, toresume):
+                yield t
+        for t in self._lex():
+            yield t
+        raise StopIteration

    def __iter__(self):
        if self.lines is None:
@ -504,11 +517,3 @@ class Lexer:
            return self.tokens.pop(0)
        else:
            raise StopIteration
-
-class NocaseDualRegionRule(DualRegionRule):
-    def _compile_start(self):
-        return re.compile(self.start, re.IGNORECASE)
-    def _compile_middle(self, d):
-        return re.compile(self.middle % d, re.IGNORECASE)
-    def _compile_end(self, d):
-        return re.compile(self.end % d, re.IGNORECASE)