From cb663fa93400bb1e38b5aeb6f0917e2974f9dac6 Mon Sep 17 00:00:00 2001
From: moculus <none@none>
Date: Sat, 14 Jul 2007 14:21:22 +0000
Subject: [PATCH] --HG-- branch : pmacs2

---
 IDEAS                       |  26 ++++---
 code_examples/Reporting2.pm |   2 +
 highlight2.py               |   6 +-
 lex2.py                     | 133 ++++++++----------------------------
 mode2.py                    |   5 +-
 mode_bds.py                 |   2 +-
 mode_console.py             |  21 ++++--
 mode_life.py                |   2 +-
 mode_perl.py                |  61 +++++++++++++----
 mode_python.py              |   2 +-
 mode_search.py              |   1 -
 mode_xml.py                 |   2 +-
 util.py                     |   6 ++
 13 files changed, 130 insertions(+), 139 deletions(-)

diff --git a/IDEAS b/IDEAS
index 9aec508..ccf005f 100644
--- a/IDEAS
+++ b/IDEAS
@@ -1,10 +1,20 @@
-2007/07/11:
+2007/07/14:
 
-We need a way to have a rule that matches the end of the line. Many languages
-have regions whose "end" token is merely the end of the line. In those cases,
-our grammars employ hacks (with varying levels of success) to get around the
-fact that rules must match 1-or-more characters from the buffer.
+The rules are currently confusingly implemented, and have poor performance when
+used in deeply nested grammars.
 
-One solution would be to artificially include a newline character at the end of
-the line, which could be matched in regexes. Another would be to create a new
-type of rule and write some special-case code in the region rules.
+We need to refactor lex2 so that rules have two methods:
+
+1. match():
+This method should return whether or not the rule can match the current input
+that the lexer is lexing. If its result is true, the result will be passed
+(along with the lexer, etc.) to the rule's lex() method. Otherwise, the next
+rule will be tried.
+
+2. lex():
+This method is a generator, which is expected to return one or more tokens. In
+addition to the arguments given to match() it will be passed the result of the
+call to match() (which is guaranteed to be true, and will most often be a
+re.Match object). As all generators, this method will raise StopIteration when
+there are no more tokens to return, and will raise LexError if there are other
+problems.
diff --git a/code_examples/Reporting2.pm b/code_examples/Reporting2.pm
index 7cab633..ecaf425 100644
--- a/code_examples/Reporting2.pm
+++ b/code_examples/Reporting2.pm
@@ -1,5 +1,7 @@
 package TBB::Reporting2;
 
+my $bar =~ s/foob/blag/g;
+
 my $foo = {
     'foo',
     'bar',
diff --git a/highlight2.py b/highlight2.py
index 32776f2..d48bb1c 100644
--- a/highlight2.py
+++ b/highlight2.py
@@ -1,5 +1,5 @@
 import sys
-import lex2
+from lex2 import Token
 
 color_list = []
 color_list.extend(['\033[3%dm' % x for x in range(0, 8)])
@@ -274,10 +274,10 @@ class Highlighter:
                 post_change_list.append(t2)
 
         # add in the new data
-        newtokens[y1].append(lex2.Token('new', '', y1, x1, newlines[0]))
+        newtokens[y1].append(Token('new', '', y1, x1, newlines[0]))
         for i in range(1, len(newlines)):
             yi = y1 + i
-            newtokens[yi].append(lex2.Token('new', '', yi, 0, newlines[i]))
+            newtokens[yi].append(Token('new', '', yi, 0, newlines[i]))
 
         # add the post-change tokens back
         for t in post_change_list:
diff --git a/lex2.py b/lex2.py
index a3cfd5f..bbc9a7c 100755
--- a/lex2.py
+++ b/lex2.py
@@ -1,6 +1,8 @@
 import re
+import util
 
 valid_name_re  = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
+full_name_re   = re.compile('^([a-zA-Z_]+)([0-9]*)$')
 reserved_names = ['start', 'middle', 'end', 'null']
 
 class Token(object):
@@ -59,7 +61,10 @@ class Token(object):
         return "<Token(%r, %r, %d, %d, %r)>" % fields
 
 class Rule:
-    name = 'abstract'
+    def __init__(self, name):
+        assert valid_name_re.match(name), 'invalid name %r' % name
+        assert name not in reserved_names, "reserved rule name: %r" % name
+        self.name = name
     def match(self, lexer, parent):
         raise Exception, "%s rule cannot match!" % self.name
     def make_token(self, lexer, s, name, parent=None, matchd={}):
@@ -74,9 +79,7 @@ class Rule:
 
 class ConstantRule(Rule):
     def __init__(self, name, constant, group=None):
-        assert valid_name_re.match(name), 'invalid name %r' % name
-        assert name not in reserved_names, "reserved rule name: %r" % name
-        self.name     = name
+        Rule.__init__(self, name)
         self.constant = constant
         self.length   = len(self.constant)
         self._set_group(group)
@@ -92,9 +95,7 @@ class ConstantRule(Rule):
 
 class PatternRule(Rule):
     def __init__(self, name, pattern, group=None):
-        assert valid_name_re.match(name), 'invalid name %r' % name
-        assert name not in reserved_names, "reserved rule name: %r" % name
-        self.name    = name
+        Rule.__init__(self, name)
         self.pattern = pattern
         self._compile()
         self._set_group(group)
@@ -120,9 +121,7 @@ class NocasePatternRule(PatternRule):
 
 class ContextPatternRule(PatternRule):
     def __init__(self, name, pattern, fallback, group=None):
-        assert valid_name_re.match(name), 'invalid name %r' % name
-        assert name not in reserved_names, "reserved rule name: %r" % name
-        self.name        = name
+        Rule.__init__(self, name)
         self.pattern     = pattern
         self.fallback    = fallback
         self.fallback_re = re.compile(fallback)
@@ -142,9 +141,7 @@ class ContextPatternRule(PatternRule):
 
 class RegionRule(Rule):
     def __init__(self, name, start, grammar, end, group=None):
-        assert valid_name_re.match(name), 'invalid name %r' % name
-        assert name not in reserved_names, "reserved rule name: %r" % name
-        self.name     = name
+        Rule.__init__(self, name)
         self.start    = start
         self.grammar  = grammar
         self.end      = end
@@ -157,7 +154,6 @@ class RegionRule(Rule):
         return re.compile(self.end % d)
 
     def resume(self, lexer, toresume):
-        #raise Exception, "%r %r" % (lexer, toresume) #XYZ
         assert toresume, "can't resume without tokens to resume!"
         self._match(lexer, None, None, toresume)
         return True
@@ -204,7 +200,6 @@ class RegionRule(Rule):
         # reference named groups from the start token. if we have no end,
         # well, then, we're never getting out of here alive!
         if self.end:
-            #end_re = re.compile(self.end % d)
             end_re = self._compile_end(d)
 
         # ok, so as long as we aren't done (we haven't found an end token),
@@ -213,17 +208,8 @@ class RegionRule(Rule):
         while not done and lexer.y < len(lexer.lines):
             old_y = lexer.y
 
-            # if this line is empty, then we skip it, but here we insert
-            # an empty null token just so we have something
-            #if not reenter and len(lexer.lines[lexer.y]) == 0:
-            #    null_t = Token('null', None, lexer.y, lexer.x, '', parent)
-            #    lexer.add_token(null_t)
-            #    null_t = None
-
             # ok, as long as we haven't found the end token, and have more
             # data on the current line to read, we will process tokens
-            #while (not done and lexer.y == old_y and
-            #       lexer.x < len(lexer.lines[lexer.y])):
             while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
                 # if we are reentering mid-parse, then that takes precedence
                 if reenter:
@@ -233,7 +219,6 @@ class RegionRule(Rule):
                     null_t = None
                 if lexer.y >= len(lexer.lines):
                     return True
-                #elif lexer.x >= len(lexer.lines[lexer.y]):
                 elif lexer.x >= len(lexer.lines[lexer.y]) + 1:
                     lexer.y += 1
                     lexer.x = 0
@@ -266,9 +251,7 @@ class RegionRule(Rule):
                     if null_t is None:
                         null_t = Token('null', None, lexer.y, lexer.x, '', parent)
                         lexer.add_token(null_t)
-                    #if len(lexer.lines[lexer.y]) > lexer.x:
                     if lexer.x < len(line):
-                        #null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
                         null_t.add_to_string(line[lexer.x])
                         lexer.x += 1
 
@@ -294,15 +277,12 @@ class NocaseRegionRule(RegionRule):
 
 class DualRegionRule(Rule):
     def __init__(self, name, start, grammar1, middle, grammar2, end, group=None):
-        assert valid_name_re.match(name), 'invalid name %r' % name
-        assert name not in reserved_names, "reserved rule name: %r" % name
-        self.name     = name
+        Rule.__init__(self, name)
         self.start    = start
         self.grammar1 = grammar1
         self.middle   = middle
         self.grammar2 = grammar2
         self.end      = end
-        #self.start_re = re.compile(start)
         self.start_re = self._compile_start()
         self._set_group(group)
 
@@ -353,7 +333,6 @@ class DualRegionRule(Rule):
         d1 = parent.matchd
         assert parent.name == 'start'
         null_t = None
-        #middle_re = re.compile(self.middle % d1)
         middle_re = self._compile_middle(d1)
         d2 = {}
 
@@ -364,28 +343,15 @@ class DualRegionRule(Rule):
         while not done and lexer.y < len(lexer.lines):
             old_y = lexer.y
 
-            # if this line is empty, then we will skip it, but here we insert
-            # an empty null token just so we have something
-            #if len(lexer.lines[lexer.y]) == 0:
-            #    null_t = Token('null', None, lexer.y, lexer.x, '', parent)
-            #    lexer.add_token(null_t)
-            #    null_t = None
-
             # ok, as long as we haven't found the end token, and have more
             # data on the current line to read, we will process tokens
-            #while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]):
             while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
                 # if we are reentering mid-parse, then that takes precedence
                 if reenter:
-                    raise Exception, "aw damn1"
-                    #reenter  = False
-                    #xrule    = rulecontext[0].rule
-                    #xd       = rulecontext[0].matchd
-                    #assert rule2.resume(lexer, xcontext, xd, rulecontext[1:]), \
-                    #    "%r %r %r %r" % (lexer, xcontext, xd, rulecontext[1:])
-                    #found  = True
-                    #null_t = None
-                    #break
+                    reenter = False
+                    rule2 = toresume[1].rule
+                    rule2.resume(lexer, toresume[1:])
+                    null_t = None
 
                 line = self._get_line(lexer)
 
@@ -414,7 +380,6 @@ class DualRegionRule(Rule):
                     if null_t is None:
                         null_t = Token('null', None, lexer.y, lexer.x, '', parent)
                         lexer.add_token(null_t)
-                    #null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
                     null_t.add_to_string(line[lexer.x])
                     lexer.x += 1
 
@@ -436,10 +401,8 @@ class DualRegionRule(Rule):
         if reenter:
             assert parent is toresume[0]
         assert parent.name == 'middle'
-        #assert parent.name == 'middle'
         d3 = parent.matchd
         null_t = None
-        #end_re = re.compile(self.end % d3)
         end_re = self._compile_end(d3)
 
         # ok, so as long as we aren't done (we haven't found an end token),
@@ -451,22 +414,10 @@ class DualRegionRule(Rule):
 
             # if we are reentering mid-parse, then that takes precedence
             if reenter:
-                raise Exception, "aw damn2"
-                #reenter  = False
-                #xrule    = rulecontext[0].rule
-                #xd       = rulecontext[0].matchd
-                #assert rule2.resume(lexer, xcontext, xd, rulecontext[1:]), \
-                #    "%r %r %r %r" % (lexer, xcontext, xd, rulecontext[1:])
-                #found  = True
-                #null_t = None
-                #break
-
-            # if this line is empty, then we will skip it, but here weinsert
-            # an empty null token just so we have something
-            #if len(lexer.lines[lexer.y]) == 0:
-            #    null_t = Token('null', None, lexer.y, lexer.x, '', parent)
-            #    lexer.add_token(null_t)
-            #    null_t = None
+                reenter = False
+                rule2 = toresume[1].rule
+                rule2.resume(lexer, toresume[1:])
+                null_t = None
 
             # ok, as long as we haven't found the end token, and have more
             # data on the current line to read, we will process tokens
@@ -496,7 +447,6 @@ class DualRegionRule(Rule):
                     if null_t is None:
                         null_t = Token('null', None, lexer.y, lexer.x, '', parent)
                         lexer.add_token(null_t)
-                    #null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
                     null_t.add_to_string(line[lexer.x])
                     lexer.x += 1
 
@@ -525,30 +475,7 @@ class Grammar:
                 rule.grammar = self
             if hasattr(rule, 'grammar2') and rule.grammar is None:
                 rule.grammar = self
-
-grammars = {}
-grammars['null'] = Grammar()
-crash = False
-
-def add(name, grammar):
-    global crash, grammars
-    if crash and name in grammars:
-        raise Exception, "oh no! already have a grammar for %r" %name
-    else:
-        grammars[name] = grammar
-            
-
-def get(name):
-    global crash, grammars
-    try:
-        return grammars[name]
-    except KeyError:
-        if crash:
-            raise
-        elif name == 'null':
-            return Grammar()
-        else:
-            return get('null')
+grammar = Grammar()
 
 class Lexer:
     def __init__(self, name, grammar):
@@ -569,16 +496,22 @@ class Lexer:
         self.tokens = []
 
     def resume(self, lines, y, x, token):
-        #raise Exception, "%r %r" % (self, token) #XYZ
         self.y      = y
         self.x      = x
         self.lines  = lines
         self.tokens = []
         toresume = token.parents()
+
+        # this is a special case for the "middle" rule of a dual region rule
+        i = 0
+        while i < len(toresume):
+            if i > 0 and toresume[i].name == 'middle' and toresume[i-1].name == 'start':
+                del toresume[i-1]
+            else:
+                i += 1
+
         if toresume:
             toresume[0].rule.resume(self, toresume)
-        #else:
-        #    raise Exception, "dammmmit"
 
     def __iter__(self):
         if self.lines is None:
@@ -586,13 +519,10 @@ class Lexer:
         return self
 
     def next(self):
-        null_t      = None
-
+        null_t = None
         if self.tokens:
             return self.tokens.pop(0)
-
         while self.y < len(self.lines):
-            #line = self.lines[self.y] 
             line = self.lines[self.y] + '\n'
             while self.x < len(line):
                 curr_t = None
@@ -603,14 +533,11 @@ class Lexer:
                 if null_t is None:
                     null_t = Token('null', None, self.y, self.x, '')
                     self.add_token(null_t)
-                #assert line[self.x] != '\n', "DAMN"
-                #assert line[self.x] != '$', "DAMN"
                 null_t.add_to_string(line[self.x])
                 self.x += 1
             null_t = None
             self.y += 1
             self.x = 0
-
         if self.tokens:
             return self.tokens.pop(0)
         else:
diff --git a/mode2.py b/mode2.py
index f16eb9c..bfe636c 100644
--- a/mode2.py
+++ b/mode2.py
@@ -1,5 +1,6 @@
 import os, sets, string
-import color, lex2, method
+import color, method
+from lex2 import Lexer
 
 DEBUG = False
 
@@ -156,7 +157,7 @@ class Fundamental(Handler):
 
         # lexing for highlighting, etc.
         if self.grammar:
-            self.lexer = lex2.Lexer(self.name(), self.grammar)
+            self.lexer = Lexer(self.name(), self.grammar)
 
         # tab handling
         if self.tabbercls:
diff --git a/mode_bds.py b/mode_bds.py
index bce8242..5f40a2a 100644
--- a/mode_bds.py
+++ b/mode_bds.py
@@ -1,4 +1,4 @@
-import color, lex2, mode2
+import color, mode2
 from lex2 import Grammar, PatternRule, RegionRule, Grammar
 from mode_perl import PerlGrammar
 from mode_xml import OpenTagGrammar
diff --git a/mode_console.py b/mode_console.py
index 3bb2339..49b2a4f 100644
--- a/mode_console.py
+++ b/mode_console.py
@@ -1,12 +1,17 @@
 import color, mode2
-from lex2 import Grammar, PatternRule
+from lex2 import Grammar, PatternRule, RegionRule
+from mode_python import StringGrammar
 
 class ConsoleGrammar(Grammar):
     rules = [
-        PatternRule(name=r'mesg', pattern=r'^[A-Za-z].*$'),
-        PatternRule(name=r'input', pattern=r'^>>>.*$'),
-        PatternRule(name=r'input', pattern=r'^-->.*$'),
-        PatternRule(name=r'output', pattern=r'^   .*$'),
+        PatternRule(r'mesg', r'^[A-Za-z].*$'),
+        PatternRule(r'input', r'^>>>.*$'),
+        PatternRule(r'input', r'^-->.*$'),
+        #PatternRule(r'output', r'^   .*$'),
+
+        RegionRule(r'string', r'"', StringGrammar, r'"'),
+        RegionRule(r'string', r"'", StringGrammar, r"'"),
+        PatternRule(r'bareword', r'[a-zA-Z_][a-zA-Z0-9_]*'),
     ]
 class Console(mode2.Fundamental):
     grammar = ConsoleGrammar()
@@ -16,6 +21,12 @@ class Console(mode2.Fundamental):
             'mesg':   color.build('blue', 'default'),
             'input':  color.build('cyan', 'default'),
             'output': color.build('default', 'default'),
+
+            'string.start':   color.build('green', 'default'),
+            'string.octal':   color.build('magenta', 'default'),
+            'string.escaped': color.build('magenta', 'default'),
+            'string.null':    color.build('green', 'default'),
+            'string.end':     color.build('green', 'default'),
         }
     def name(self):
         return "Console"
diff --git a/mode_life.py b/mode_life.py
index 43b8152..e869663 100644
--- a/mode_life.py
+++ b/mode_life.py
@@ -1,5 +1,5 @@
 import re, sets, string, sys
-import color, commands, default, lex2, method, mode2, regex, tab2
+import color, commands, default, method, mode2, regex, tab2
 from point2 import Point
 from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, \
     RegionRule, DualRegionRule
diff --git a/mode_perl.py b/mode_perl.py
index f652cb8..f6a8e3c 100644
--- a/mode_perl.py
+++ b/mode_perl.py
@@ -1,5 +1,5 @@
 import re, sets, string, sys
-import color, commands, default, lex2, method, mode2, regex, tab2
+import color, commands, default, method, mode2, regex, tab2
 from point2 import Point
 from lex2 import Grammar, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
 from method import Argument, Method
@@ -11,11 +11,6 @@ class PodGrammar(Grammar):
         RegionRule(r'entry', r'(?<=^=item) +.*$', Grammar, '^\n$'),
         RegionRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$', Grammar, '^\n$'),
         RegionRule(r'entry', r'(?<=^=encoding) +.*$', Grammar, '^\n$'),
-        #PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'),
-        #PatternRule(r'entry', r'(?<=^=over) +.*$'),
-        #PatternRule(r'entry', r'(?<=^=item) +.*$'),
-        #PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'),
-        #PatternRule(r'entry', r'(?<=^=encoding) +.*$'),
     ]
 
 class StringGrammar(Grammar):
@@ -168,12 +163,12 @@ class PerlTabber(tab2.StackTabber):
         return currlvl
 
 class Perl(mode2.Fundamental):
-    tabbercls  = PerlTabber
-    grammar    = PerlGrammar
-    opentokens = ('delimiter',)
-    opentags   = {'(': ')', '[': ']', '{': '}'}
-    closetoken = ('delimiter',)
-    closetags  = {')': '(', ']': '[', '}': '{'}
+    tabbercls   = PerlTabber
+    grammar     = PerlGrammar
+    opentokens  = ('delimiter',)
+    opentags    = {'(': ')', '[': ']', '{': '}'}
+    closetokens = ('delimiter',)
+    closetags   = {')': '(', ']': '[', '}': '{'}
     def __init__(self, w):
         mode2.Fundamental.__init__(self, w)
 
@@ -183,7 +178,7 @@ class Perl(mode2.Fundamental):
         #self.add_action_and_bindings(PerlHashCleanup2(), ('C-c h',))
         self.add_action_and_bindings(PerlViewModulePerldoc(), ('C-c v',))
         self.add_action_and_bindings(PerlViewWordPerldoc(), ('C-c p',))
-        #self.add_action_and_bindings(PerlWrapLine(), ('M-q',))
+        self.add_action_and_bindings(PerlWrapLine(), ('M-q',))
         self.add_action_and_bindings(PerlGotoFunction(), ('C-c M-g',))
         self.add_action_and_bindings(PerlWhichFunction(), ('C-c w',))
         self.add_action_and_bindings(PerlListFunctions(), ('C-c W',))
@@ -497,6 +492,46 @@ class PerlHashCleanup(Method):
         window.kill(start_p, end_p)
         window.insert_string(start_p, data)
 
+class PerlWrapLine(Method):
+    '''Wrap Comments and POD'''
+    margin = 80
+    comment_re = re.compile('(#+)( *)(.*)')
+    def _is_newline(self, t):
+        return t.name == 'eol'
+    def _is_space(self, t):
+        return t.name == 'null' and regex.space.match(t.string)
+
+    def _detect_line_type(self, w, y):
+        c = w.logical_cursor()
+        highlighter = w.buffer.highlights[w.mode.name()]
+        ltype = None
+        for t in highlighter.tokens[c.y]:
+            if self._is_space(t):
+                pass
+            elif t.name == 'comment':
+                if ltype:
+                    return None
+                else:
+                    ltype = 'comment'
+            elif t.name == 'eol':
+                return ltype
+            else:
+                return None
+
+    def _execute(self, w, **vargs):
+        c = w.logical_cursor()
+        ltype = self._detect_line_type(w, c.y)
+        if ltype == 'comment':
+            return self._fix_comments(c, w)
+        elif ltype == 'pod':
+            return self._fix_pod(c, w)
+        else:
+            w.set_error("did not detect comment or pod lines")
+            return            
+    def _fix_comments(self, c, w):
+        w.set_error("comment!")
+    def _fix_pod(self, c, w):
+        pass
 #class PerlWrapLine(Method):
 #    '''Wrap lines, comments, POD'''
 #    margin = 80
diff --git a/mode_python.py b/mode_python.py
index 3446ef1..3632614 100644
--- a/mode_python.py
+++ b/mode_python.py
@@ -1,5 +1,5 @@
 import commands, os.path, sets, string
-import color, completer, default, mode2, lex2, method, regex, tab2
+import color, completer, default, mode2, method, regex, tab2
 import ctag_python
 from point2 import Point
 from lex2 import Grammar, PatternRule, RegionRule, ConstantRule
diff --git a/mode_search.py b/mode_search.py
index 3ebc075..1646c8c 100644
--- a/mode_search.py
+++ b/mode_search.py
@@ -136,4 +136,3 @@ def _end(w):
     w.application.last_search  = w.buffer.make_string()
     w.buffer.method.old_cursor = None
     w.buffer.method.old_window = None
-    w.buffer.method.is_literal = None
diff --git a/mode_xml.py b/mode_xml.py
index 8d0db82..c623e06 100644
--- a/mode_xml.py
+++ b/mode_xml.py
@@ -1,4 +1,4 @@
-import color, lex2, mode2
+import color, mode2
 from lex2 import Grammar, PatternRule, RegionRule
 
 class OpenTagGrammar(Grammar):
diff --git a/util.py b/util.py
index 4630f5f..e268faf 100644
--- a/util.py
+++ b/util.py
@@ -50,3 +50,9 @@ def count_leading_whitespace(s):
     m = regex.leading_whitespace.match(s)
     assert m, "count leading whitespace failed somehow"
     return m.end() - m.start()
+
+def dump(x):
+    d = {}
+    for name in dir(x):
+        d[name] = getattr(x, name)
+    return '%s: %r' % (x, d)