big lexing improvements

--HG-- branch : pmacs2
2007-07-11 19:36:52 +00:00 · 2007-07-11 19:36:52 +00:00 · 1fbc9d4ceb
parent eb37a919b7
commit 1fbc9d4ceb
5 changed files with 55 additions and 31 deletions
--- a/application.py
+++ b/application.py
@ -606,6 +606,11 @@ class Application(object):
            if y < len(w.buffer.lines):
                while j < len(highlighter.tokens[y]):
                    token = highlighter.tokens[y][j]
                    if token.string.endswith('\n'):
                        tstring = token.string[:-1]
                    else:
                        tstring = token.string
                    assert token.y == y, '%d == %d' % (token.y, y)
                    s_offset = max(x - token.x, 0)
@ -613,7 +618,8 @@ class Application(object):
                    assert x_offset <= slot.width, '%d <= %d' % (x_offset, slot.width)
                    c          = self._get_token_color(w, token)
-                    s          = token.string[s_offset:] 
+                    #s          = token.string[s_offset:] 
                    s          = tstring[s_offset:] 
                    token_done = x_offset + len(s) <= slot.width
                    token_wrap = x_offset + len(s) > slot.width
                    self.win.addstr(slot.offset + count, x_offset, s[:slot.width - x_offset], c)
--- a/mode_perl.py
+++ b/mode_perl.py
@ -6,11 +6,16 @@ from method import Argument, Method
 class PodGrammar(Grammar):
    rules = [
-        PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'),
+        RegionRule(r'entry', r'(?<=^=head[1-4]) +.*$', Grammar, '^\n$'),
-        PatternRule(r'entry', r'(?<=^=over) +.*$'),
+        RegionRule(r'entry', r'(?<=^=over) +.*$', Grammar, '^\n$'),
-        PatternRule(r'entry', r'(?<=^=item) +.*$'),
+        RegionRule(r'entry', r'(?<=^=item) +.*$', Grammar, '^\n$'),
-        PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'),
+        RegionRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$', Grammar, '^\n$'),
-        PatternRule(r'entry', r'(?<=^=encoding) +.*$'),
+        RegionRule(r'entry', r'(?<=^=encoding) +.*$', Grammar, '^\n$'),
        #PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'),
        #PatternRule(r'entry', r'(?<=^=over) +.*$'),
        #PatternRule(r'entry', r'(?<=^=item) +.*$'),
        #PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'),
        #PatternRule(r'entry', r'(?<=^=encoding) +.*$'),
    ]
 class StringGrammar(Grammar):
@ -92,7 +97,9 @@ class PerlGrammar(Grammar):
        PatternRule(r'operator', r"\+=|-=|\*=|/=|//=|%=|&=\|\^=|>>=|<<=|\*\*="),
        PatternRule(r'operator', r"\+\+|\+|<=>|<>|<<|<=|<|-|>>|>=|>|\*\*|\*|&&|&|\|\||\||/|\^|==|//|~|=~|!~|!=|%|!|\.|x(?![a-zA-Z_])"),
        PatternRule(r'operator2', r"(?:xor|or|not|ne|lt|le|gt|ge|eq|cmp|and)(?![a-zA-Z_])"),
-        PatternRule(r'bareword', r'(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*')
+        PatternRule(r'bareword', r'(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*'),
    ]
 class PerlTabber(tab2.StackTabber):
@ -216,21 +223,23 @@ class Perl(mode2.Fundamental):
            'method':    color.build('cyan', 'default'),
            # heredoc
-            'heredoc1.start': color.build('green', 'default'),
+            'heredoc1.start':     color.build('green', 'default'),
-            'heredoc1.null':  color.build('green', 'default'),
+            'heredoc1.null':      color.build('green', 'default'),
-            'heredoc1.end':   color.build('green', 'default'),
+            'heredoc1.end':       color.build('green', 'default'),
-            'heredoc2.start': color.build('green', 'default'),
+            'heredoc2.start':     color.build('green', 'default'),
-            'heredoc2.null':  color.build('green', 'default'),
+            'heredoc2.null':      color.build('green', 'default'),
-            'heredoc2.end':   color.build('green', 'default'),
+            'heredoc2.end':       color.build('green', 'default'),
            'eval_heredoc.start': color.build('cyan', 'default'),
            'eval_heredoc.null':  color.build('cyan', 'default'),
            'eval_heredoc.end':   color.build('cyan', 'default'),
            # pod
-            'pod.start': color.build('red', 'default'),
+            'pod.start':       color.build('red', 'default'),
-            'pod.null':  color.build('red', 'default'),
+            'pod.null':        color.build('red', 'default'),
-            'pod.entry': color.build('magenta', 'default'),
+            'pod.entry.start': color.build('magenta', 'default'),
-            'pod.end':   color.build('red', 'default'),
+            'pod.entry.null':  color.build('magenta', 'default'),
            'pod.entry.end':   color.build('magenta', 'default'),
            'pod.end':         color.build('red', 'default'),
            # "" strings
            'string1.start':   color.build('green', 'default'),
@ -245,11 +254,11 @@ class Perl(mode2.Fundamental):
            'string2.end':   color.build('green', 'default'),
            # `` strings
-            'evalstring.start': color.build('cyan', 'default'),
+            'evalstring.start':   color.build('cyan', 'default'),
-            'evalstring.null':   color.build('cyan', 'default'),
+            'evalstring.null':    color.build('cyan', 'default'),
-            'string1.escaped':   color.build('magenta', 'default'),
+            'evalstring.escaped': color.build('magenta', 'default'),
-            'string1.deref':     color.build('yellow', 'default'),
+            'evalstring.deref':   color.build('yellow', 'default'),
-            'evalstring.end':    color.build('cyan', 'default'),
+            'evalstring.end':     color.build('cyan', 'default'),
            # quoted region
            'quoted':       color.build('cyan', 'default'),
--- a/mode_python.py
+++ b/mode_python.py
@ -49,10 +49,11 @@ class PythonTabber(tab2.StackTabber):
            # we always know that line 0 is indented at the 0 level
            return True
        tokens = self.get_tokens(y)
-        if not tokens:
+        #if not tokens:
-            # if a line has no tokens, we don't know much about its indentation
+        #    # if a line has no tokens, we don't know much about its indentation
-            return False
+        #    return False
-        elif tokens[0].name in self.startlevel_names:
+        #elif tokens[0].name in self.startlevel_names:
        if tokens[0].name in self.startlevel_names:
            # if a line has no whitespace and beings with something like
            # 'while','class','def','if',etc. then we can start at it
            return True
@ -86,8 +87,8 @@ class PythonTabber(tab2.StackTabber):
                self._pop()
            # if we haven't reached the target-line yet, we can detect how many
            # levels of unindention, if any, the user chose on previous lines
-            if y < target and tokens:
+            if y < target and len(tokens) > 2:
-                if self.token_is_whitespace(y, 0):
+                if self.token_is_space(y, 0):
                    l = len(tokens[0].string)
                else:
                    l = 0
--- a/regex.py
+++ b/regex.py
@ -9,7 +9,8 @@ shell_command = re.compile(r'^[^ ]+')
 # whitespace regexes
 leading_whitespace = re.compile('^ *')
 trailing_whitespace = re.compile(' *$')
-whitespace = re.compile('^ *$')
+whitespace = re.compile('^[ \n]*$')
 space = re.compile('^ *$')
 # word regexes
 word = re.compile('^[A-Za-z0-9_]+$')
--- a/tab2.py
+++ b/tab2.py
@ -10,7 +10,9 @@ class Marker:
 class Tabber:
    wsre = regex.whitespace
-    wst  = 'null'
+    wst  = ('null', 'eol',)
    sre  = regex.space
    st   = ('null',)
    def __init__(self, m):
        self.mode  = m
        self.lines = {}
@ -21,9 +23,14 @@ class Tabber:
        return self.mode.window.buffer.highlights[self.mode.name()].tokens[y]
    def get_token(self, y, i):
        return self.mode.window.buffer.highlights[self.mode.name()].tokens[y][i]
    def token_is_whitespace(self, y, i):
        token = self.get_token(y, i)
-        return token.name == self.wst and self.wsre.match(token.string)
+        return token.name in self.wst and self.wsre.match(token.string)
    def token_is_space(self, y, i):
        token = self.get_token(y, i)
        return token.name in self.st and self.sre.match(token.string)
    def get_next_left_token(self, y, i):
        tokens = self.get_tokens(y)
        assert i >= 0 and i < len(tokens)