oh no

--HG-- branch : pmacs2
2007-03-28 22:38:32 +00:00 · 2007-03-28 22:38:32 +00:00 · 8d33c8b0e2
parent 210f8f1a7c
commit 8d33c8b0e2
4 changed files with 144 additions and 10 deletions
--- a/highlight2.py
+++ b/highlight2.py
@ -0,0 +1,24 @@
 class Highlighter:
    def __init__(self, lexer):
        self.lexer = lexer
        self.tokens = []
    def display(self):
        for group in self.tokens:
            for token in group:
                sys.stdout.write(token.string)
            sys.stdout.write('\n')
        sys.stdout.write('\n')
    def highlight(self, lines):
        self.tokens = [[] for l in lines]
        self.lexer.lex(lines, y=0, x=0)
        y = 0
        for token in self.lexer:
            assert y <= token.y, "oh no %d %d" % (y, token.y)
            assert token.y < len(lines), "uh oh %d %d" % (token.y, len(lines))
            self.tokens[token.y].append(token)
            y = token.y
    def update(self):
        pass
--- a/lex2.py
+++ b/lex2.py
@ -113,8 +113,9 @@ class RegionRule(Rule):
                        lexer.x += 1
                null_t = None
-                lexer.y += 1
+                if not done:
-                lexer.x = 0
+                    lexer.y += 1
                    lexer.x = 0
            return True
        else:
            return False
@ -211,8 +212,9 @@ class DualRegionRule(Rule):
                        lexer.x += 1
                null_t = None
-                lexer.y += 1
+                if not done:
-                lexer.x = 0
+                    lexer.y += 1
                    lexer.x = 0
            return True
        else:
            return False
@ -250,9 +252,12 @@ class Lexer:
        while self.y < len(self.lines):
            line = self.lines[self.y]
            while self.x < len(line):
                curr_t = None
                #print 'Checking(%d) %r' % (self.x, line[self.x:])
                for rule in self.grammar.rules:
                    if rule.match(self):
                        assert self.tokens, "AAAAA %s" % repr(self.tokens)
                        #print 'Returning(%d)' % self.x
                        return self.tokens.pop(0)
                if null_t is None:
                    null_t = Token(null_t_name, self.y, self.x, '')
--- a/lex2_perl.py
+++ b/lex2_perl.py
@ -31,9 +31,13 @@ class StringGrammar(Grammar):
            pattern=r'\\.',
        ),
        PatternRule(
-            name=r'hash_bareword_index',
+            name=r'hash_deref',
-            pattern=r'(?<={) *[A-Za-z0-9_]+(?=})',
+            pattern=r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*(?:->{(?:[a-zA-Z_][a-zA-Z_0-9]*|'(?:\\.|[^'\\])*'|\"(\\.|[^\\\"])*\")})+",
        ),
        #PatternRule(
        #    name=r'hash_bareword_index',
        #    pattern=r'(?<={) *[A-Za-z0-9_]+(?=})',
        #),
        PatternRule(
            name=r'length_scalar',
            pattern=r"\$#[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*",
@ -93,7 +97,7 @@ class PerlGrammar(Grammar):
        RegionRule(
            name=r'string2',
            start=r"'",
-            grammar=StringGrammar(),
+            grammar=Grammar(),
            end=r"'",
        ),
        RegionRule(
@ -190,7 +194,7 @@ class PerlGrammar(Grammar):
        ),
        RegionRule(
            name=r'explicit_match_regex1',
-            start=r'm *(?P<delim>[^ #])',
+            start=r'm *(?P<delim>[^ #a-zA-Z0-9_])',
            grammar=StringGrammar(),
            end=r'%(delim)s',
        ),
@ -230,7 +234,7 @@ class PerlGrammar(Grammar):
        ),
        PatternRule(
            name=r'label',
-            pattern=r'[a-zA-Z_][a-zA-Z0-9_]*:',
+            pattern=r'[a-zA-Z_][a-zA-Z0-9_]*:(?!:)',
        ),
        PatternRule(
            name=r'instance_method',
--- a/test3.py
+++ b/test3.py
@ -0,0 +1,101 @@
 import sys
 import lex2, lex2_perl
 color_list = []
 color_list.extend(['\033[3%dm' % x for x in range(0, 8)])
 color_list.extend(['\033[3%d;1m' % x for x in range(0, 8)])
 color_list.append('\033[0m')
 color_names = [
    'black', 'dred', 'dgreen', 'brown', 'dblue', 'dpurple', 'dcyan', 'lgrey',
    'dgrey', 'lred', 'lgreen', 'yellow', 'lblue', 'lpurple', 'lcyan', 'white',
    'unset',
 ]
 color_dict ={}
 for i in range(0, len(color_list)):
    color_dict[color_names[i]] = color_list[i]
 token_colors = {
    'null':                        'white',
    'delimiter':                   'white',
    'pod.start':                   'lred',
    'pod.null':                    'lred',
    'pod.end':                     'lred',
    'pod.header':                  'lpurple',
    'sub':                         'lcyan',
    'number':                      'white',
    'operator':                    'white',
    'heredoc':                     'lgreen',
    'endblock':                    'lred',
    'pod':                         'lred',
    'comment':                     'lred',
    'string1':                     'lgreen',
    'string1.start':               'lgreen',
    'string1.null':                'lgreen',
    'string1.escaped':             'lpurple',
    'string1.scalar':              'yellow',
    'string1.system_scalar':       'yellow',
    'string1.hash_deref':          'yellow',
    'string1.hash_bareword_index': 'lgreen',
    'string1.end':                 'lgreen',
    'string2':                     'lgreen',
    'string2.start':               'lgreen',
    'string2.null':                'lgreen',
    'string2.end':                 'lgreen',
    'evalstring':                  'lcyan',
    'default_string':              'lgreen',
    'keyword':                     'lpurple',
    'length_scalar':               'yellow',
    'system_scalar':               'yellow',
    'system_array':                'yellow',
    'scalar':                      'yellow',
    'dereference':                 'yellow',
    'array':                       'yellow',
    'hash':                        'yellow',
    'hash_bareword_index':         'lgreen',
    'quoted_region':               'lcyan',
    'match_regex':                 'lcyan',
    'replace_regex':               'lcyan',
    'literal_hash_bareword_index': 'lgreen',
    'interpolated_scalar':         'yellow',
    'interpolated_system_scalar':  'yellow',
    'interpolated_array':          'yellow',
    'interpolated_system_array':   'yellow',
    'interpolated_hash':           'yellow',
    'label':                       'lcyan',
    'package':                     'lcyan',
    'use':                         'lcyan',
    'method':                      'lcyan',
    'methodref':                   'lcyan',
    'method_declaration':          'lcyan',
    'instance_method':             'lcyan',
    'static_method':               'lcyan',
    'builtin_method':              'lpurple',
    'bareword_method':             'lcyan',
    'bareword':                    'yellow',
    'bizzaro':                     'lpurple',
 }
 paths = sys.argv[1:]
 for path in paths:
    f = open(path, 'r')
    data = f.read()
    f.close()
    lines = data.split('\n')
    grammar = lex2_perl.PerlGrammar()
    lexer = lex2.Lexer('lexer', grammar)
    lexer.lex(lines)
    y = 0
    for token in lexer:
        while token.y > y:
            sys.stdout.write('\n')
            y += 1
        #color_name = token_colors.get(token.name, 'white')
        color_name = token_colors[token.name]
        sys.stdout.write(color_dict[color_name])
        sys.stdout.write(token.string)
    sys.stdout.write('\n')