From 1fbc9d4ceb267c7f119cfa35d1918ed2ffd5c9e4 Mon Sep 17 00:00:00 2001
From: moculus <none@none>
Date: Wed, 11 Jul 2007 19:36:52 +0000
Subject: [PATCH] big lexing improvements

--HG--
branch : pmacs2
---
 application.py |  8 +++++++-
 mode_perl.py   | 51 +++++++++++++++++++++++++++++---------------------
 mode_python.py | 13 +++++++------
 regex.py       |  3 ++-
 tab2.py        | 11 +++++++++--
 5 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/application.py b/application.py
index 09ec245..1a15b13 100755
--- a/application.py
+++ b/application.py
@@ -606,6 +606,11 @@ class Application(object):
             if y < len(w.buffer.lines):
                 while j < len(highlighter.tokens[y]):
                     token = highlighter.tokens[y][j]
+                    if token.string.endswith('\n'):
+                        tstring = token.string[:-1]
+                    else:
+                        tstring = token.string
+                        
                     assert token.y == y, '%d == %d' % (token.y, y)
 
                     s_offset = max(x - token.x, 0)
@@ -613,7 +618,8 @@ class Application(object):
                     assert x_offset <= slot.width, '%d <= %d' % (x_offset, slot.width)
 
                     c          = self._get_token_color(w, token)
-                    s          = token.string[s_offset:] 
+                    #s          = token.string[s_offset:] 
+                    s          = tstring[s_offset:] 
                     token_done = x_offset + len(s) <= slot.width
                     token_wrap = x_offset + len(s) > slot.width
                     self.win.addstr(slot.offset + count, x_offset, s[:slot.width - x_offset], c)
diff --git a/mode_perl.py b/mode_perl.py
index e415168..ad65805 100644
--- a/mode_perl.py
+++ b/mode_perl.py
@@ -6,11 +6,16 @@ from method import Argument, Method
 
 class PodGrammar(Grammar):
     rules = [
-        PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'),
-        PatternRule(r'entry', r'(?<=^=over) +.*$'),
-        PatternRule(r'entry', r'(?<=^=item) +.*$'),
-        PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'),
-        PatternRule(r'entry', r'(?<=^=encoding) +.*$'),
+        RegionRule(r'entry', r'(?<=^=head[1-4]) +.*$', Grammar, '^\n$'),
+        RegionRule(r'entry', r'(?<=^=over) +.*$', Grammar, '^\n$'),
+        RegionRule(r'entry', r'(?<=^=item) +.*$', Grammar, '^\n$'),
+        RegionRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$', Grammar, '^\n$'),
+        RegionRule(r'entry', r'(?<=^=encoding) +.*$', Grammar, '^\n$'),
+        #PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'),
+        #PatternRule(r'entry', r'(?<=^=over) +.*$'),
+        #PatternRule(r'entry', r'(?<=^=item) +.*$'),
+        #PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'),
+        #PatternRule(r'entry', r'(?<=^=encoding) +.*$'),
     ]
 
 class StringGrammar(Grammar):
@@ -92,7 +97,9 @@ class PerlGrammar(Grammar):
         PatternRule(r'operator', r"\+=|-=|\*=|/=|//=|%=|&=\|\^=|>>=|<<=|\*\*="),
         PatternRule(r'operator', r"\+\+|\+|<=>|<>|<<|<=|<|-|>>|>=|>|\*\*|\*|&&|&|\|\||\||/|\^|==|//|~|=~|!~|!=|%|!|\.|x(?![a-zA-Z_])"),
         PatternRule(r'operator2', r"(?:xor|or|not|ne|lt|le|gt|ge|eq|cmp|and)(?![a-zA-Z_])"),
-        PatternRule(r'bareword', r'(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*')
+        PatternRule(r'bareword', r'(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*'),
+
+
     ]
 
 class PerlTabber(tab2.StackTabber):
@@ -216,21 +223,23 @@ class Perl(mode2.Fundamental):
             'method':    color.build('cyan', 'default'),
 
             # heredoc
-            'heredoc1.start': color.build('green', 'default'),
-            'heredoc1.null':  color.build('green', 'default'),
-            'heredoc1.end':   color.build('green', 'default'),
-            'heredoc2.start': color.build('green', 'default'),
-            'heredoc2.null':  color.build('green', 'default'),
-            'heredoc2.end':   color.build('green', 'default'),
+            'heredoc1.start':     color.build('green', 'default'),
+            'heredoc1.null':      color.build('green', 'default'),
+            'heredoc1.end':       color.build('green', 'default'),
+            'heredoc2.start':     color.build('green', 'default'),
+            'heredoc2.null':      color.build('green', 'default'),
+            'heredoc2.end':       color.build('green', 'default'),
             'eval_heredoc.start': color.build('cyan', 'default'),
             'eval_heredoc.null':  color.build('cyan', 'default'),
             'eval_heredoc.end':   color.build('cyan', 'default'),
             
             # pod
-            'pod.start': color.build('red', 'default'),
-            'pod.null':  color.build('red', 'default'),
-            'pod.entry': color.build('magenta', 'default'),
-            'pod.end':   color.build('red', 'default'),
+            'pod.start':       color.build('red', 'default'),
+            'pod.null':        color.build('red', 'default'),
+            'pod.entry.start': color.build('magenta', 'default'),
+            'pod.entry.null':  color.build('magenta', 'default'),
+            'pod.entry.end':   color.build('magenta', 'default'),
+            'pod.end':         color.build('red', 'default'),
             
             # "" strings
             'string1.start':   color.build('green', 'default'),
@@ -245,11 +254,11 @@ class Perl(mode2.Fundamental):
             'string2.end':   color.build('green', 'default'),
             
             # `` strings
-            'evalstring.start': color.build('cyan', 'default'),
-            'evalstring.null':   color.build('cyan', 'default'),
-            'string1.escaped':   color.build('magenta', 'default'),
-            'string1.deref':     color.build('yellow', 'default'),
-            'evalstring.end':    color.build('cyan', 'default'),
+            'evalstring.start':   color.build('cyan', 'default'),
+            'evalstring.null':    color.build('cyan', 'default'),
+            'evalstring.escaped': color.build('magenta', 'default'),
+            'evalstring.deref':   color.build('yellow', 'default'),
+            'evalstring.end':     color.build('cyan', 'default'),
             
             # quoted region
             'quoted':       color.build('cyan', 'default'),
diff --git a/mode_python.py b/mode_python.py
index 85feee4..d1b96b5 100644
--- a/mode_python.py
+++ b/mode_python.py
@@ -49,10 +49,11 @@ class PythonTabber(tab2.StackTabber):
             # we always know that line 0 is indented at the 0 level
             return True
         tokens = self.get_tokens(y)
-        if not tokens:
-            # if a line has no tokens, we don't know much about its indentation
-            return False
-        elif tokens[0].name in self.startlevel_names:
+        #if not tokens:
+        #    # if a line has no tokens, we don't know much about its indentation
+        #    return False
+        #elif tokens[0].name in self.startlevel_names:
+        if tokens[0].name in self.startlevel_names:
             # if a line has no whitespace and beings with something like
             # 'while','class','def','if',etc. then we can start at it
             return True
@@ -86,8 +87,8 @@ class PythonTabber(tab2.StackTabber):
                 self._pop()
             # if we haven't reached the target-line yet, we can detect how many
             # levels of unindention, if any, the user chose on previous lines
-            if y < target and tokens:
-                if self.token_is_whitespace(y, 0):
+            if y < target and len(tokens) > 2:
+                if self.token_is_space(y, 0):
                     l = len(tokens[0].string)
                 else:
                     l = 0
diff --git a/regex.py b/regex.py
index c8977e8..b560021 100644
--- a/regex.py
+++ b/regex.py
@@ -9,7 +9,8 @@ shell_command = re.compile(r'^[^ ]+')
 # whitespace regexes
 leading_whitespace = re.compile('^ *')
 trailing_whitespace = re.compile(' *$')
-whitespace = re.compile('^ *$')
+whitespace = re.compile('^[ \n]*$')
+space = re.compile('^ *$')
 
 # word regexes
 word = re.compile('^[A-Za-z0-9_]+$')
diff --git a/tab2.py b/tab2.py
index 850bb98..058d17a 100644
--- a/tab2.py
+++ b/tab2.py
@@ -10,7 +10,9 @@ class Marker:
 
 class Tabber:
     wsre = regex.whitespace
-    wst  = 'null'
+    wst  = ('null', 'eol',)
+    sre  = regex.space
+    st   = ('null',)
     def __init__(self, m):
         self.mode  = m
         self.lines = {}
@@ -21,9 +23,14 @@ class Tabber:
         return self.mode.window.buffer.highlights[self.mode.name()].tokens[y]
     def get_token(self, y, i):
         return self.mode.window.buffer.highlights[self.mode.name()].tokens[y][i]
+
     def token_is_whitespace(self, y, i):
         token = self.get_token(y, i)
-        return token.name == self.wst and self.wsre.match(token.string)
+        return token.name in self.wst and self.wsre.match(token.string)
+    def token_is_space(self, y, i):
+        token = self.get_token(y, i)
+        return token.name in self.st and self.sre.match(token.string)
+
     def get_next_left_token(self, y, i):
         tokens = self.get_tokens(y)
         assert i >= 0 and i < len(tokens)