--HG--

branch : pmacs2
2007-07-14 14:21:22 +00:00 · 2007-07-14 14:21:22 +00:00 · cb663fa934
parent b681024851
commit cb663fa934
13 changed files with 130 additions and 139 deletions
--- a/26
+++ b/26
@ -1,10 +1,20 @@
-2007/07/11:
+2007/07/14:

-We need a way to have a rule that matches the end of the line. Many languages
-have regions whose "end" token is merely the end of the line. In those cases,
-our grammars employ hacks (with varying levels of success) to get around the
-fact that rules must match 1-or-more characters from the buffer.
+The rules are currently confusingly implemented, and have poor performance when
+used in deeply nested grammars.

-One solution would be to artificially include a newline character at the end of
-the line, which could be matched in regexes. Another would be to create a new
-type of rule and write some special-case code in the region rules.
+We need to refactor lex2 so that rules have two methods:
+
+1. match():
+This method should return whether or not the rule can match the current input
+that the lexer is lexing. If its result is true, the result will be passed
+(along with the lexer, etc.) to the rule's lex() method. Otherwise, the next
+rule will be tried.
+
+2. lex():
+This method is a generator, which is expected to return one or more tokens. In
+addition to the arguments given to match() it will be passed the result of the
+call to match() (which is guaranteed to be true, and will most often be a
+re.Match object). As all generators, this method will raise StopIteration when
+there are no more tokens to return, and will raise LexError if there are other
+problems.
--- a/code_examples/Reporting2.pm
+++ b/code_examples/Reporting2.pm
@ -1,5 +1,7 @@
 package TBB::Reporting2;

+my $bar =~ s/foob/blag/g;
+
 my $foo = {
    'foo',
    'bar',
--- a/highlight2.py
+++ b/highlight2.py
@ -1,5 +1,5 @@
 import sys
-import lex2
+from lex2 import Token

 color_list = []
 color_list.extend(['\033[3%dm' % x for x in range(0, 8)])
@ -274,10 +274,10 @@ class Highlighter:
                post_change_list.append(t2)

        # add in the new data
-        newtokens[y1].append(lex2.Token('new', '', y1, x1, newlines[0]))
+        newtokens[y1].append(Token('new', '', y1, x1, newlines[0]))
        for i in range(1, len(newlines)):
            yi = y1 + i
-            newtokens[yi].append(lex2.Token('new', '', yi, 0, newlines[i]))
+            newtokens[yi].append(Token('new', '', yi, 0, newlines[i]))

        # add the post-change tokens back
        for t in post_change_list:
--- a/lex2.py
+++ b/lex2.py
@ -1,6 +1,8 @@
 import re
+import util

 valid_name_re  = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
+full_name_re   = re.compile('^([a-zA-Z_]+)([0-9]*)$')
 reserved_names = ['start', 'middle', 'end', 'null']

 class Token(object):
@ -59,7 +61,10 @@ class Token(object):
        return "<Token(%r, %r, %d, %d, %r)>" % fields

 class Rule:
-    name = 'abstract'
+    def __init__(self, name):
+        assert valid_name_re.match(name), 'invalid name %r' % name
+        assert name not in reserved_names, "reserved rule name: %r" % name
+        self.name = name
    def match(self, lexer, parent):
        raise Exception, "%s rule cannot match!" % self.name
    def make_token(self, lexer, s, name, parent=None, matchd={}):
@ -74,9 +79,7 @@ class Rule:

 class ConstantRule(Rule):
    def __init__(self, name, constant, group=None):
-        assert valid_name_re.match(name), 'invalid name %r' % name
-        assert name not in reserved_names, "reserved rule name: %r" % name
-        self.name     = name
+        Rule.__init__(self, name)
        self.constant = constant
        self.length   = len(self.constant)
        self._set_group(group)
@ -92,9 +95,7 @@ class ConstantRule(Rule):

 class PatternRule(Rule):
    def __init__(self, name, pattern, group=None):
-        assert valid_name_re.match(name), 'invalid name %r' % name
-        assert name not in reserved_names, "reserved rule name: %r" % name
-        self.name    = name
+        Rule.__init__(self, name)
        self.pattern = pattern
        self._compile()
        self._set_group(group)
@ -120,9 +121,7 @@ class NocasePatternRule(PatternRule):

 class ContextPatternRule(PatternRule):
    def __init__(self, name, pattern, fallback, group=None):
-        assert valid_name_re.match(name), 'invalid name %r' % name
-        assert name not in reserved_names, "reserved rule name: %r" % name
-        self.name        = name
+        Rule.__init__(self, name)
        self.pattern     = pattern
        self.fallback    = fallback
        self.fallback_re = re.compile(fallback)
@ -142,9 +141,7 @@ class ContextPatternRule(PatternRule):

 class RegionRule(Rule):
    def __init__(self, name, start, grammar, end, group=None):
-        assert valid_name_re.match(name), 'invalid name %r' % name
-        assert name not in reserved_names, "reserved rule name: %r" % name
-        self.name     = name
+        Rule.__init__(self, name)
        self.start    = start
        self.grammar  = grammar
        self.end      = end
@ -157,7 +154,6 @@ class RegionRule(Rule):
        return re.compile(self.end % d)

    def resume(self, lexer, toresume):
-        #raise Exception, "%r %r" % (lexer, toresume) #XYZ
        assert toresume, "can't resume without tokens to resume!"
        self._match(lexer, None, None, toresume)
        return True
@ -204,7 +200,6 @@ class RegionRule(Rule):
        # reference named groups from the start token. if we have no end,
        # well, then, we're never getting out of here alive!
        if self.end:
-            #end_re = re.compile(self.end % d)
            end_re = self._compile_end(d)

        # ok, so as long as we aren't done (we haven't found an end token),
@ -213,17 +208,8 @@ class RegionRule(Rule):
        while not done and lexer.y < len(lexer.lines):
            old_y = lexer.y

-            # if this line is empty, then we skip it, but here we insert
-            # an empty null token just so we have something
-            #if not reenter and len(lexer.lines[lexer.y]) == 0:
-            #    null_t = Token('null', None, lexer.y, lexer.x, '', parent)
-            #    lexer.add_token(null_t)
-            #    null_t = None
-
            # ok, as long as we haven't found the end token, and have more
            # data on the current line to read, we will process tokens
-            #while (not done and lexer.y == old_y and
-            #       lexer.x < len(lexer.lines[lexer.y])):
            while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
                # if we are reentering mid-parse, then that takes precedence
                if reenter:
@ -233,7 +219,6 @@ class RegionRule(Rule):
                    null_t = None
                if lexer.y >= len(lexer.lines):
                    return True
-                #elif lexer.x >= len(lexer.lines[lexer.y]):
                elif lexer.x >= len(lexer.lines[lexer.y]) + 1:
                    lexer.y += 1
                    lexer.x = 0
@ -266,9 +251,7 @@ class RegionRule(Rule):
                    if null_t is None:
                        null_t = Token('null', None, lexer.y, lexer.x, '', parent)
                        lexer.add_token(null_t)
-                    #if len(lexer.lines[lexer.y]) > lexer.x:
                    if lexer.x < len(line):
-                        #null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
                        null_t.add_to_string(line[lexer.x])
                        lexer.x += 1

@ -294,15 +277,12 @@ class NocaseRegionRule(RegionRule):

 class DualRegionRule(Rule):
    def __init__(self, name, start, grammar1, middle, grammar2, end, group=None):
-        assert valid_name_re.match(name), 'invalid name %r' % name
-        assert name not in reserved_names, "reserved rule name: %r" % name
-        self.name     = name
+        Rule.__init__(self, name)
        self.start    = start
        self.grammar1 = grammar1
        self.middle   = middle
        self.grammar2 = grammar2
        self.end      = end
-        #self.start_re = re.compile(start)
        self.start_re = self._compile_start()
        self._set_group(group)

@ -353,7 +333,6 @@ class DualRegionRule(Rule):
        d1 = parent.matchd
        assert parent.name == 'start'
        null_t = None
-        #middle_re = re.compile(self.middle % d1)
        middle_re = self._compile_middle(d1)
        d2 = {}

@ -364,28 +343,15 @@ class DualRegionRule(Rule):
        while not done and lexer.y < len(lexer.lines):
            old_y = lexer.y

-            # if this line is empty, then we will skip it, but here we insert
-            # an empty null token just so we have something
-            #if len(lexer.lines[lexer.y]) == 0:
-            #    null_t = Token('null', None, lexer.y, lexer.x, '', parent)
-            #    lexer.add_token(null_t)
-            #    null_t = None
-
            # ok, as long as we haven't found the end token, and have more
            # data on the current line to read, we will process tokens
-            #while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]):
            while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
                # if we are reentering mid-parse, then that takes precedence
                if reenter:
-                    raise Exception, "aw damn1"
-                    #reenter  = False
-                    #xrule    = rulecontext[0].rule
-                    #xd       = rulecontext[0].matchd
-                    #assert rule2.resume(lexer, xcontext, xd, rulecontext[1:]), \
-                    #    "%r %r %r %r" % (lexer, xcontext, xd, rulecontext[1:])
-                    #found  = True
-                    #null_t = None
-                    #break
+                    reenter = False
+                    rule2 = toresume[1].rule
+                    rule2.resume(lexer, toresume[1:])
+                    null_t = None

                line = self._get_line(lexer)

@ -414,7 +380,6 @@ class DualRegionRule(Rule):
                    if null_t is None:
                        null_t = Token('null', None, lexer.y, lexer.x, '', parent)
                        lexer.add_token(null_t)
-                    #null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
                    null_t.add_to_string(line[lexer.x])
                    lexer.x += 1

@ -436,10 +401,8 @@ class DualRegionRule(Rule):
        if reenter:
            assert parent is toresume[0]
        assert parent.name == 'middle'
-        #assert parent.name == 'middle'
        d3 = parent.matchd
        null_t = None
-        #end_re = re.compile(self.end % d3)
        end_re = self._compile_end(d3)

        # ok, so as long as we aren't done (we haven't found an end token),
@ -451,22 +414,10 @@ class DualRegionRule(Rule):

            # if we are reentering mid-parse, then that takes precedence
            if reenter:
-                raise Exception, "aw damn2"
-                #reenter  = False
-                #xrule    = rulecontext[0].rule
-                #xd       = rulecontext[0].matchd
-                #assert rule2.resume(lexer, xcontext, xd, rulecontext[1:]), \
-                #    "%r %r %r %r" % (lexer, xcontext, xd, rulecontext[1:])
-                #found  = True
-                #null_t = None
-                #break
-
-            # if this line is empty, then we will skip it, but here weinsert
-            # an empty null token just so we have something
-            #if len(lexer.lines[lexer.y]) == 0:
-            #    null_t = Token('null', None, lexer.y, lexer.x, '', parent)
-            #    lexer.add_token(null_t)
-            #    null_t = None
+                reenter = False
+                rule2 = toresume[1].rule
+                rule2.resume(lexer, toresume[1:])
+                null_t = None

            # ok, as long as we haven't found the end token, and have more
            # data on the current line to read, we will process tokens
@ -496,7 +447,6 @@ class DualRegionRule(Rule):
                    if null_t is None:
                        null_t = Token('null', None, lexer.y, lexer.x, '', parent)
                        lexer.add_token(null_t)
-                    #null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
                    null_t.add_to_string(line[lexer.x])
                    lexer.x += 1

@ -525,30 +475,7 @@ class Grammar:
                rule.grammar = self
            if hasattr(rule, 'grammar2') and rule.grammar is None:
                rule.grammar = self
-
-grammars = {}
-grammars['null'] = Grammar()
-crash = False
-
-def add(name, grammar):
-    global crash, grammars
-    if crash and name in grammars:
-        raise Exception, "oh no! already have a grammar for %r" %name
-    else:
-        grammars[name] = grammar
-            
-
-def get(name):
-    global crash, grammars
-    try:
-        return grammars[name]
-    except KeyError:
-        if crash:
-            raise
-        elif name == 'null':
-            return Grammar()
-        else:
-            return get('null')
+grammar = Grammar()

 class Lexer:
    def __init__(self, name, grammar):
@ -569,16 +496,22 @@ class Lexer:
        self.tokens = []

    def resume(self, lines, y, x, token):
-        #raise Exception, "%r %r" % (self, token) #XYZ
        self.y      = y
        self.x      = x
        self.lines  = lines
        self.tokens = []
        toresume = token.parents()
+
+        # this is a special case for the "middle" rule of a dual region rule
+        i = 0
+        while i < len(toresume):
+            if i > 0 and toresume[i].name == 'middle' and toresume[i-1].name == 'start':
+                del toresume[i-1]
+            else:
+                i += 1
+
        if toresume:
            toresume[0].rule.resume(self, toresume)
-        #else:
-        #    raise Exception, "dammmmit"

    def __iter__(self):
        if self.lines is None:
@ -586,13 +519,10 @@ class Lexer:
        return self

    def next(self):
-        null_t      = None
-
+        null_t = None
        if self.tokens:
            return self.tokens.pop(0)
-
        while self.y < len(self.lines):
-            #line = self.lines[self.y] 
            line = self.lines[self.y] + '\n'
            while self.x < len(line):
                curr_t = None
@ -603,14 +533,11 @@ class Lexer:
                if null_t is None:
                    null_t = Token('null', None, self.y, self.x, '')
                    self.add_token(null_t)
-                #assert line[self.x] != '\n', "DAMN"
-                #assert line[self.x] != '$', "DAMN"
                null_t.add_to_string(line[self.x])
                self.x += 1
            null_t = None
            self.y += 1
            self.x = 0
-
        if self.tokens:
            return self.tokens.pop(0)
        else:
--- a/mode2.py
+++ b/mode2.py
@ -1,5 +1,6 @@
 import os, sets, string
-import color, lex2, method
+import color, method
+from lex2 import Lexer

 DEBUG = False

@ -156,7 +157,7 @@ class Fundamental(Handler):

        # lexing for highlighting, etc.
        if self.grammar:
-            self.lexer = lex2.Lexer(self.name(), self.grammar)
+            self.lexer = Lexer(self.name(), self.grammar)

        # tab handling
        if self.tabbercls:
--- a/mode_bds.py
+++ b/mode_bds.py
@ -1,4 +1,4 @@
-import color, lex2, mode2
+import color, mode2
 from lex2 import Grammar, PatternRule, RegionRule, Grammar
 from mode_perl import PerlGrammar
 from mode_xml import OpenTagGrammar
--- a/mode_console.py
+++ b/mode_console.py
@ -1,12 +1,17 @@
 import color, mode2
-from lex2 import Grammar, PatternRule
+from lex2 import Grammar, PatternRule, RegionRule
+from mode_python import StringGrammar

 class ConsoleGrammar(Grammar):
    rules = [
-        PatternRule(name=r'mesg', pattern=r'^[A-Za-z].*$'),
-        PatternRule(name=r'input', pattern=r'^>>>.*$'),
-        PatternRule(name=r'input', pattern=r'^-->.*$'),
-        PatternRule(name=r'output', pattern=r'^   .*$'),
+        PatternRule(r'mesg', r'^[A-Za-z].*$'),
+        PatternRule(r'input', r'^>>>.*$'),
+        PatternRule(r'input', r'^-->.*$'),
+        #PatternRule(r'output', r'^   .*$'),
+
+        RegionRule(r'string', r'"', StringGrammar, r'"'),
+        RegionRule(r'string', r"'", StringGrammar, r"'"),
+        PatternRule(r'bareword', r'[a-zA-Z_][a-zA-Z0-9_]*'),
    ]
 class Console(mode2.Fundamental):
    grammar = ConsoleGrammar()
@ -16,6 +21,12 @@ class Console(mode2.Fundamental):
            'mesg':   color.build('blue', 'default'),
            'input':  color.build('cyan', 'default'),
            'output': color.build('default', 'default'),
+
+            'string.start':   color.build('green', 'default'),
+            'string.octal':   color.build('magenta', 'default'),
+            'string.escaped': color.build('magenta', 'default'),
+            'string.null':    color.build('green', 'default'),
+            'string.end':     color.build('green', 'default'),
        }
    def name(self):
        return "Console"
--- a/mode_life.py
+++ b/mode_life.py
@ -1,5 +1,5 @@
 import re, sets, string, sys
-import color, commands, default, lex2, method, mode2, regex, tab2
+import color, commands, default, method, mode2, regex, tab2
 from point2 import Point
 from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, \
    RegionRule, DualRegionRule
--- a/mode_perl.py
+++ b/mode_perl.py
@ -1,5 +1,5 @@
 import re, sets, string, sys
-import color, commands, default, lex2, method, mode2, regex, tab2
+import color, commands, default, method, mode2, regex, tab2
 from point2 import Point
 from lex2 import Grammar, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
 from method import Argument, Method
@ -11,11 +11,6 @@ class PodGrammar(Grammar):
        RegionRule(r'entry', r'(?<=^=item) +.*$', Grammar, '^\n$'),
        RegionRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$', Grammar, '^\n$'),
        RegionRule(r'entry', r'(?<=^=encoding) +.*$', Grammar, '^\n$'),
-        #PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'),
-        #PatternRule(r'entry', r'(?<=^=over) +.*$'),
-        #PatternRule(r'entry', r'(?<=^=item) +.*$'),
-        #PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'),
-        #PatternRule(r'entry', r'(?<=^=encoding) +.*$'),
    ]

 class StringGrammar(Grammar):
@ -168,12 +163,12 @@ class PerlTabber(tab2.StackTabber):
        return currlvl

 class Perl(mode2.Fundamental):
-    tabbercls  = PerlTabber
-    grammar    = PerlGrammar
-    opentokens = ('delimiter',)
-    opentags   = {'(': ')', '[': ']', '{': '}'}
-    closetoken = ('delimiter',)
-    closetags  = {')': '(', ']': '[', '}': '{'}
+    tabbercls   = PerlTabber
+    grammar     = PerlGrammar
+    opentokens  = ('delimiter',)
+    opentags    = {'(': ')', '[': ']', '{': '}'}
+    closetokens = ('delimiter',)
+    closetags   = {')': '(', ']': '[', '}': '{'}
    def __init__(self, w):
        mode2.Fundamental.__init__(self, w)

@ -183,7 +178,7 @@ class Perl(mode2.Fundamental):
        #self.add_action_and_bindings(PerlHashCleanup2(), ('C-c h',))
        self.add_action_and_bindings(PerlViewModulePerldoc(), ('C-c v',))
        self.add_action_and_bindings(PerlViewWordPerldoc(), ('C-c p',))
-        #self.add_action_and_bindings(PerlWrapLine(), ('M-q',))
+        self.add_action_and_bindings(PerlWrapLine(), ('M-q',))
        self.add_action_and_bindings(PerlGotoFunction(), ('C-c M-g',))
        self.add_action_and_bindings(PerlWhichFunction(), ('C-c w',))
        self.add_action_and_bindings(PerlListFunctions(), ('C-c W',))
@ -497,6 +492,46 @@ class PerlHashCleanup(Method):
        window.kill(start_p, end_p)
        window.insert_string(start_p, data)

+class PerlWrapLine(Method):
+    '''Wrap Comments and POD'''
+    margin = 80
+    comment_re = re.compile('(#+)( *)(.*)')
+    def _is_newline(self, t):
+        return t.name == 'eol'
+    def _is_space(self, t):
+        return t.name == 'null' and regex.space.match(t.string)
+
+    def _detect_line_type(self, w, y):
+        c = w.logical_cursor()
+        highlighter = w.buffer.highlights[w.mode.name()]
+        ltype = None
+        for t in highlighter.tokens[c.y]:
+            if self._is_space(t):
+                pass
+            elif t.name == 'comment':
+                if ltype:
+                    return None
+                else:
+                    ltype = 'comment'
+            elif t.name == 'eol':
+                return ltype
+            else:
+                return None
+
+    def _execute(self, w, **vargs):
+        c = w.logical_cursor()
+        ltype = self._detect_line_type(w, c.y)
+        if ltype == 'comment':
+            return self._fix_comments(c, w)
+        elif ltype == 'pod':
+            return self._fix_pod(c, w)
+        else:
+            w.set_error("did not detect comment or pod lines")
+            return            
+    def _fix_comments(self, c, w):
+        w.set_error("comment!")
+    def _fix_pod(self, c, w):
+        pass
 #class PerlWrapLine(Method):
 #    '''Wrap lines, comments, POD'''
 #    margin = 80
--- a/mode_python.py
+++ b/mode_python.py
@ -1,5 +1,5 @@
 import commands, os.path, sets, string
-import color, completer, default, mode2, lex2, method, regex, tab2
+import color, completer, default, mode2, method, regex, tab2
 import ctag_python
 from point2 import Point
 from lex2 import Grammar, PatternRule, RegionRule, ConstantRule
--- a/mode_search.py
+++ b/mode_search.py
@ -136,4 +136,3 @@ def _end(w):
    w.application.last_search  = w.buffer.make_string()
    w.buffer.method.old_cursor = None
    w.buffer.method.old_window = None
-    w.buffer.method.is_literal = None
--- a/mode_xml.py
+++ b/mode_xml.py
@ -1,4 +1,4 @@
-import color, lex2, mode2
+import color, mode2
 from lex2 import Grammar, PatternRule, RegionRule

 class OpenTagGrammar(Grammar):
--- a/util.py
+++ b/util.py
@ -50,3 +50,9 @@ def count_leading_whitespace(s):
    m = regex.leading_whitespace.match(s)
    assert m, "count leading whitespace failed somehow"
    return m.end() - m.start()
+
+def dump(x):
+    d = {}
+    for name in dir(x):
+        d[name] = getattr(x, name)
+    return '%s: %r' % (x, d)