YES

--HG-- branch : pmacs2
2007-05-02 04:17:12 +00:00 · 2007-05-02 04:17:12 +00:00 · 9e1a1711ab
parent 759ceeb805
commit 9e1a1711ab
4 changed files with 264 additions and 57 deletions
--- a/highlight2.py
+++ b/highlight2.py
@ -20,6 +20,12 @@ class Highlighter:
        self.lexer = lexer
        self.tokens = []

+    def dump(self, fmt='(%3s, %2s) | %s'):
+        print fmt % ('y', 'x', 'string')
+        for group in self.tokens:
+            for token in group:
+                print fmt % (token.y, token.x, token.string)
+
    def display(self, token_colors={}, debug=False):
        for group in self.tokens:
            for token in group:
@ -46,50 +52,116 @@ class Highlighter:
        for token in self.lexer:
            self.tokens[token.y].append(token)

-    def update(self, lines, y1=0, x1=0, y2=-1, x2=-1):
-        # basically, we are syncing up our cached internal state with the lexer
-        # so... we need to keep track of where we are in our internal structure.
-        insertion_index = None
-        line_index = None
-        x = x1
+    def update_del(self, lines, y1, x1, y2, x2):
+        assert y1 >= 0
+        assert y1 <= y2
+        assert y2 < len(lines)
+
+        xdelta = x2 - x1
+        ydelta = y2 - y1
+
+        newtokens = [[] for x in range(0, len(self.tokens) - ydelta)]
+        for y in range(0, y1):
+            newtokens[y] = self.tokens[y]
+
+        for y in range(y1, len(lines)):
+            while self.tokens[y]:
+                token = self.tokens[y].pop(0)
+                tx1 = token.x
+                tx2 = token.x + len(token.string)
+                if (y, tx2) <= (y1, x1):
+                    # *| |
+                    newtokens[y].append(token)
+                elif (y, tx1) >= (y2, x2):
+                    # | |*
+                    token.y -= ydelta
+                    if y == y2:
+                        token.x -= xdelta
+                    newtokens[token.y].append(token)
+                elif (y, tx1) < (y1, x1):
+                    token2 = token.copy()
+                    if (y, tx2) <= (y2, x2):
+                        # *|*|
+                        s = token2.string[:x1 - tx1]
+                    else:
+                        # *|*|*
+                        s = token2.string[:x1 - tx1] + token2.string[x2 - tx1:]
+                    token2.string = s
+                    newtokens[y].append(token2)
+                elif (y, tx1) < (y2, x2):
+                    if (y, tx2) <= (y2, x2):
+                        # |*|
+                        pass
+                    else:
+                        # |*|*
+                        token2 = token.copy()
+                        token2.x = x1
+                        token2.y = token2.y - ydelta
+                        token2.string = token2.string[x2 - tx1:]
+                        newtokens[token2.y].append(token2)
+        self.tokens = newtokens
+
+    def relex_del(self, lines, y1, x1, y2, x2):
+        self.update_del(lines, y1, x1, y2, x2)
+        self.lexer.lex(lines, y1, 0)
+
        y = y1
+        i = 0
+        getnext = True

-        # so figure out where exactly the document has changed and how this
-        # necessarily affects our internal structure
-        for i in range(0, len(self.tokens[y1])):
-            t = self.tokens[y1][i]
-            if t.x < x1 and t.x + len(t.string) > x1:
-                # this token spans our region, so invalidate it and start our
-                # update from its start instead
-                x1 = t.x
-                insertion_index = i
-                line_index = i
-                del self.tokens[i]
+        while True:
+            if y >= len(lines):
                break
-            elif t.x == x1:
-                # ok, so it looks like the change starts on a token
-                line_index = i

-        assert line_index is not None:
+            if getnext:
+                try:
+                    new_token = self.lexer.next()
+                    getnext = False
+                except StopIteration:
+                    for j in range(y, len(lines)):
+                        print 'DELETE END ROW %d[%d:]: %r' % (j, i, [x.string for x in self.tokens[j][i:]])
+                        del self.tokens[j][i:]
+                        i = 0
+                    break

-        self.lexer.lex(lines, y1, x1)
-        for lt in self.lexer.lex:
-            if y != lt.y:
-                y = lt.y
-            if insertion_index:
-                # ok, so we have a "gap" that we have to fill, so just insert
-                # the token in our strucutre, and then see if it overlaps
-                # something else that has to go
-                self.tokens[y].insert(insertion_index, lt)
-                line_index = insertion_index + 1
-                insertion_index = None
-                for i in range(line_index, len(self.tokens[y])):
-                    if self.tokens[y][i].start < None: #GJIE
-                        pass #GGGJGEI
-                insertion_index = None
-                
-        
+            # if our next token is one a future line, we need to just get rid of
+            # all our old tokens until we get there
+            while new_token.y > y:
+                print 'DELETE MID ROW %d[%d:]: %r' % (y, i, [x.string for x in self.tokens[y][i:]])
+                del self.tokens[y][i:]
+                i = 0
+                y += 1

-        if y2 > 0:
-            for i in range(y1, y2):
-                self.tokens
+            if i < len(self.tokens[y]):
+                old_token = self.tokens[y][i]
+                assert old_token.y == y
+            else:
+                old_token = None
+
+            if old_token is None:
+                print 'DEFAULT INSERT %d[%d]: %r' % (y, i, new_token.string)
+                self.tokens[y].insert(i, new_token)
+                i += 1
+                getnext = True
+                continue
+            elif old_token == new_token:
+                print 'MATCH %d[%d]: %r == %r' % (y, i, old_token.string, new_token.string)
+                i += 1
+                getnext = True
+                if new_token.y >= y2 and new_token.end_x() >= x2:
+                    break
+                else:
+                    continue
+            elif old_token.x < new_token.end_x():
+                print 'DELETE BEFORE %d[%d]: %r' % (y, i, old_token.string)
+                del self.tokens[y][i]
+                continue
+            elif old_token.x >= new_token.end_x():
+                print 'INSERT %d[%d]: %r' % (y, i, new_token.string)
+                self.tokens[y].insert(i, new_token)
+                i += 1
+                getnext = True
+                continue
+            else:
+                raise Exception, "what what?"
+    
--- a/lex2.py
+++ b/lex2.py
@ -6,16 +6,22 @@ reserved_names = ['start', 'middle', 'end', 'null']
 class Token(object):
    def __init__(self, name, rule, y, x, s, **vargs):
        self.name = name
+        self.rule = rule
        self.y = y
        self.x = x
        self.string = s
        self.vargs = vargs
+    def copy(self):
+        return Token(self.name, None, self.y, self.x, self.string, **self.vargs)
    def add_to_string(self, s):
        self.string += s
+    def end_x(self):
+        return self.x + len(self.string)
    def __eq__(self, other):
        return (self.y == other.y and
                self.x == other.x and
                self.string == other.string and
+                self.name == other.name and
                self.vargs == other.vargs)
    def __repr__(self):
        if len(self.string) < 10:
--- a/lex2_python.py
+++ b/lex2_python.py
@ -0,0 +1,113 @@
+from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
+
+class StringGrammar(Grammar):
+    rules = [
+        PatternRule(
+            name=r'octal',
+            pattern=r'\\[0-7]{3}',
+        ),
+        PatternRule(
+            name=r'escaped',
+            pattern=r'\\.',
+        ),
+        PatternRule(
+            name=r'format',
+            pattern=r'%(?:\([a-zA-Z_]+\))?[-# +]*(?:[0-9]+|\*)?\.?(?:[0-9]+|\*)?[hlL]?[a-zA-Z%]',
+        ),
+    ]
+
+class PythonGrammar(Grammar):
+    rules = [
+        PatternRule(
+            name=r'import',
+            pattern=r'(?:^|(?<= ))import(?= |$)',
+        ),
+        PatternRule(
+            name=r'methodname',
+            pattern=r'(?<=def )[a-zA-Z_][a-zA-Z0-9_]*',
+        ),
+        PatternRule(
+            name=r'classname',
+            pattern=r'(?<=class )[a-zA-Z_][a-zA-Z0-9_]*',
+        ),
+        PatternRule(
+            name=r'reserved',
+            pattern=r'(?:True|None|False|Exception|self)(?![a-zA-Z0-9_])',
+        ),
+        PatternRule(
+            name=r'keyword',
+            pattern=r'(?:yield|while|try|return|raise|print|pass|or|not|lambda|is|in|import|if|global|from|for|finally|exec|except|else|elif|del|def|continue|class|break|assert|as|and)(?![a-zA-Z0-9_])',
+        ),
+        PatternRule(
+            name=r"builtin_method",
+            pattern=r'(?<!\.)(?:zip|xrange|vars|unicode|unichr|type|tuple|super|sum|str|staticmethod|sorted|slice|setattr|set|round|repr|reduce|raw_input|range|property|pow|ord|open|oct|object|max|min|map|long|locals|list|len|iter|issubclass|isinstance|int|input|id|hex|hash|hasattr|globals|getattr|frozenset|float|filter|file|execfile|eval|enumerate|divmod|dir|dict|delattr|complex|compile|coerce|cmp|classmethod|chr|callable|bool)(?![a-zA-Z0-9_])',
+        ),
+        PatternRule(
+            name=r'bound_method',
+            pattern=r'(?<=\. )[a-zA-Z_][a-zA-Z0-9_]*(?= *\()',
+        ),
+        PatternRule(
+            name=r'system_identifier',
+            pattern=r'__[a-zA-Z0-9_]+__',
+        ),
+        PatternRule(
+            name=r'private_identifier',
+            pattern=r'__[a-zA-Z0-9_]*',
+        ),
+        PatternRule(
+            name=r'hidden_identifier',
+            pattern=r'_[a-zA-Z0-9_]*',
+        ),
+        PatternRule(
+            name=r'identifier',
+            pattern=r'[a-zA-Z_][a-zA-Z0-9_]*',
+        ),
+        PatternRule(
+            name=r'delimiter',
+            pattern=r'\(|\)|\[|\]|{|}|@|,|:|\.|`|=|;|\+=|-=|\*=|/=|//=|%=|&=|\|=|\^=|>>=|<<=|\*\*=',
+        ),
+        PatternRule(
+            name=r"operator",
+            pattern=r"\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%",
+        ),
+        PatternRule(
+            name=r"integer",
+            pattern=r"(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?",
+        ),
+        PatternRule(
+            name=r"float",
+            pattern=r"[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+",
+        ),
+        PatternRule(
+            name=r"imaginary",
+            pattern=r"[0-9]+|(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)[jJ]",
+        ),
+
+        RegionRule(
+            name=r'docstring',
+            start=r'^ *(?P<tag>"""|\'\'\')',
+            grammar=Grammar(),
+            end=r'%(tag)s',
+        ),
+        RegionRule(
+            name=r'tq_string',
+            start=r'(?P<tag>"""|\'\'\')',
+            grammar=Grammar(),
+            end=r'%(tag)s',
+        ),
+        RegionRule(
+            name=r'string',
+            start=r'(?P<tag>"|\')',
+            grammar=StringGrammar(),
+            end=r'%(tag)s',
+        ),
+
+        PatternRule(
+            name=r'comment',
+            pattern=r'#.*$',
+        ),
+        PatternRule(
+            name=r'continuation',
+            pattern=r'\\$',
+        ),
+    ]
--- a/test3.py
+++ b/test3.py
@ -141,28 +141,44 @@ grammars = {
    'python': lex2_python.PythonGrammar,
 }

-#t = 'perl'
-t = 'python'
+import optparse

-m = True
-#m = False
+parser = optparse.OptionParser()
+parser.add_option('-d', '--dump', dest='dump', action='store_true', default=False)
+parser.add_option('-g', '--grammar', dest='grammar', action='store', default='python')
+parser.add_option('-n', '--normal', dest='normal', action='store_true', default=False)

-paths = sys.argv[1:]
-for path in paths:
+(opts, args) = parser.parse_args()
+
+for path in args:
    f = open(path, 'r')
    data = f.read()
    f.close()

    lines = data.split('\n')
-    lexer = lex2.Lexer('lexer', grammars[t]())
+    lexer = lex2.Lexer('lexer', grammars[opts.grammar]())

-    if m:
-        h = highlight2.Highlighter(lexer)
-        h.highlight(lines)
-        h.display(token_colors[t])
+    h = highlight2.Highlighter(lexer)
+    h.highlight(lines)
+
+    if opts.normal:
+        if opts.dump:
+            h.dump()
+        else:
+            h.display(token_colors[opts.grammar])
    else:
-        lexer.lex(lines)
-        for token in lexer:
-            print '%-30s| %-6s | %r' % (token.name,
-                                        '(%d,%d)' % (token.x, token.y),
-                                        token.string)
+        (y1, x1) = (5, 9)
+        (y2, x2) = (7, 14)
+        #(y2, x2) = (82, 2)
+        for i in range(y1 + 1, y2):
+            del lines[y1 + 1]
+        lines[y1] = lines[y1][0:x1] + lines[y1 + 1][x2:]
+        del lines[y1 + 1]
+
+        h.relex_del(lines, y1, x1, y2, x2)
+        #h.update_del(lines, y1, x1, y2, x2)
+        #h.highlight(lines)
+        if opts.dump:
+            h.dump()
+        else:
+            h.display(token_colors[opts.grammar])