From 9e1a1711ab49450493bc775e35ed991c2ccc5f3a Mon Sep 17 00:00:00 2001 From: moculus Date: Wed, 2 May 2007 04:17:12 +0000 Subject: [PATCH] YES --HG-- branch : pmacs2 --- highlight2.py | 154 ++++++++++++++++++++++++++++++++++++------------- lex2.py | 6 ++ lex2_python.py | 113 ++++++++++++++++++++++++++++++++++++ test3.py | 48 ++++++++++----- 4 files changed, 264 insertions(+), 57 deletions(-) create mode 100755 lex2_python.py diff --git a/highlight2.py b/highlight2.py index 42f2300..6622424 100644 --- a/highlight2.py +++ b/highlight2.py @@ -20,6 +20,12 @@ class Highlighter: self.lexer = lexer self.tokens = [] + def dump(self, fmt='(%3s, %2s) | %s'): + print fmt % ('y', 'x', 'string') + for group in self.tokens: + for token in group: + print fmt % (token.y, token.x, token.string) + def display(self, token_colors={}, debug=False): for group in self.tokens: for token in group: @@ -46,50 +52,116 @@ class Highlighter: for token in self.lexer: self.tokens[token.y].append(token) - def update(self, lines, y1=0, x1=0, y2=-1, x2=-1): - # basically, we are syncing up our cached internal state with the lexer - # so... we need to keep track of where we are in our internal structure. - insertion_index = None - line_index = None - x = x1 + def update_del(self, lines, y1, x1, y2, x2): + assert y1 >= 0 + assert y1 <= y2 + assert y2 < len(lines) + + xdelta = x2 - x1 + ydelta = y2 - y1 + + newtokens = [[] for x in range(0, len(self.tokens) - ydelta)] + for y in range(0, y1): + newtokens[y] = self.tokens[y] + + for y in range(y1, len(lines)): + while self.tokens[y]: + token = self.tokens[y].pop(0) + tx1 = token.x + tx2 = token.x + len(token.string) + if (y, tx2) <= (y1, x1): + # *| | + newtokens[y].append(token) + elif (y, tx1) >= (y2, x2): + # | |* + token.y -= ydelta + if y == y2: + token.x -= xdelta + newtokens[token.y].append(token) + elif (y, tx1) < (y1, x1): + token2 = token.copy() + if (y, tx2) <= (y2, x2): + # *|*| + s = token2.string[:x1 - tx1] + else: + # *|*|* + s = token2.string[:x1 - tx1] + token2.string[x2 - tx1:] + token2.string = s + newtokens[y].append(token2) + elif (y, tx1) < (y2, x2): + if (y, tx2) <= (y2, x2): + # |*| + pass + else: + # |*|* + token2 = token.copy() + token2.x = x1 + token2.y = token2.y - ydelta + token2.string = token2.string[x2 - tx1:] + newtokens[token2.y].append(token2) + self.tokens = newtokens + + def relex_del(self, lines, y1, x1, y2, x2): + self.update_del(lines, y1, x1, y2, x2) + self.lexer.lex(lines, y1, 0) + y = y1 + i = 0 + getnext = True - # so figure out where exactly the document has changed and how this - # necessarily affects our internal structure - for i in range(0, len(self.tokens[y1])): - t = self.tokens[y1][i] - if t.x < x1 and t.x + len(t.string) > x1: - # this token spans our region, so invalidate it and start our - # update from its start instead - x1 = t.x - insertion_index = i - line_index = i - del self.tokens[i] + while True: + if y >= len(lines): break - elif t.x == x1: - # ok, so it looks like the change starts on a token - line_index = i - assert line_index is not None: + if getnext: + try: + new_token = self.lexer.next() + getnext = False + except StopIteration: + for j in range(y, len(lines)): + print 'DELETE END ROW %d[%d:]: %r' % (j, i, [x.string for x in self.tokens[j][i:]]) + del self.tokens[j][i:] + i = 0 + break - self.lexer.lex(lines, y1, x1) - for lt in self.lexer.lex: - if y != lt.y: - y = lt.y - if insertion_index: - # ok, so we have a "gap" that we have to fill, so just insert - # the token in our strucutre, and then see if it overlaps - # something else that has to go - self.tokens[y].insert(insertion_index, lt) - line_index = insertion_index + 1 - insertion_index = None - for i in range(line_index, len(self.tokens[y])): - if self.tokens[y][i].start < None: #GJIE - pass #GGGJGEI - insertion_index = None - - + # if our next token is one a future line, we need to just get rid of + # all our old tokens until we get there + while new_token.y > y: + print 'DELETE MID ROW %d[%d:]: %r' % (y, i, [x.string for x in self.tokens[y][i:]]) + del self.tokens[y][i:] + i = 0 + y += 1 - if y2 > 0: - for i in range(y1, y2): - self.tokens + if i < len(self.tokens[y]): + old_token = self.tokens[y][i] + assert old_token.y == y + else: + old_token = None + + if old_token is None: + print 'DEFAULT INSERT %d[%d]: %r' % (y, i, new_token.string) + self.tokens[y].insert(i, new_token) + i += 1 + getnext = True + continue + elif old_token == new_token: + print 'MATCH %d[%d]: %r == %r' % (y, i, old_token.string, new_token.string) + i += 1 + getnext = True + if new_token.y >= y2 and new_token.end_x() >= x2: + break + else: + continue + elif old_token.x < new_token.end_x(): + print 'DELETE BEFORE %d[%d]: %r' % (y, i, old_token.string) + del self.tokens[y][i] + continue + elif old_token.x >= new_token.end_x(): + print 'INSERT %d[%d]: %r' % (y, i, new_token.string) + self.tokens[y].insert(i, new_token) + i += 1 + getnext = True + continue + else: + raise Exception, "what what?" + diff --git a/lex2.py b/lex2.py index 8641e63..714cdeb 100755 --- a/lex2.py +++ b/lex2.py @@ -6,16 +6,22 @@ reserved_names = ['start', 'middle', 'end', 'null'] class Token(object): def __init__(self, name, rule, y, x, s, **vargs): self.name = name + self.rule = rule self.y = y self.x = x self.string = s self.vargs = vargs + def copy(self): + return Token(self.name, None, self.y, self.x, self.string, **self.vargs) def add_to_string(self, s): self.string += s + def end_x(self): + return self.x + len(self.string) def __eq__(self, other): return (self.y == other.y and self.x == other.x and self.string == other.string and + self.name == other.name and self.vargs == other.vargs) def __repr__(self): if len(self.string) < 10: diff --git a/lex2_python.py b/lex2_python.py new file mode 100755 index 0000000..074275f --- /dev/null +++ b/lex2_python.py @@ -0,0 +1,113 @@ +from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, RegionRule, DualRegionRule + +class StringGrammar(Grammar): + rules = [ + PatternRule( + name=r'octal', + pattern=r'\\[0-7]{3}', + ), + PatternRule( + name=r'escaped', + pattern=r'\\.', + ), + PatternRule( + name=r'format', + pattern=r'%(?:\([a-zA-Z_]+\))?[-# +]*(?:[0-9]+|\*)?\.?(?:[0-9]+|\*)?[hlL]?[a-zA-Z%]', + ), + ] + +class PythonGrammar(Grammar): + rules = [ + PatternRule( + name=r'import', + pattern=r'(?:^|(?<= ))import(?= |$)', + ), + PatternRule( + name=r'methodname', + pattern=r'(?<=def )[a-zA-Z_][a-zA-Z0-9_]*', + ), + PatternRule( + name=r'classname', + pattern=r'(?<=class )[a-zA-Z_][a-zA-Z0-9_]*', + ), + PatternRule( + name=r'reserved', + pattern=r'(?:True|None|False|Exception|self)(?![a-zA-Z0-9_])', + ), + PatternRule( + name=r'keyword', + pattern=r'(?:yield|while|try|return|raise|print|pass|or|not|lambda|is|in|import|if|global|from|for|finally|exec|except|else|elif|del|def|continue|class|break|assert|as|and)(?![a-zA-Z0-9_])', + ), + PatternRule( + name=r"builtin_method", + pattern=r'(?>=|<<=|\*\*=', + ), + PatternRule( + name=r"operator", + pattern=r"\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%", + ), + PatternRule( + name=r"integer", + pattern=r"(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?", + ), + PatternRule( + name=r"float", + pattern=r"[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+", + ), + PatternRule( + name=r"imaginary", + pattern=r"[0-9]+|(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)[jJ]", + ), + + RegionRule( + name=r'docstring', + start=r'^ *(?P"""|\'\'\')', + grammar=Grammar(), + end=r'%(tag)s', + ), + RegionRule( + name=r'tq_string', + start=r'(?P"""|\'\'\')', + grammar=Grammar(), + end=r'%(tag)s', + ), + RegionRule( + name=r'string', + start=r'(?P"|\')', + grammar=StringGrammar(), + end=r'%(tag)s', + ), + + PatternRule( + name=r'comment', + pattern=r'#.*$', + ), + PatternRule( + name=r'continuation', + pattern=r'\\$', + ), + ] diff --git a/test3.py b/test3.py index fc290f8..b21656a 100644 --- a/test3.py +++ b/test3.py @@ -141,28 +141,44 @@ grammars = { 'python': lex2_python.PythonGrammar, } -#t = 'perl' -t = 'python' +import optparse -m = True -#m = False +parser = optparse.OptionParser() +parser.add_option('-d', '--dump', dest='dump', action='store_true', default=False) +parser.add_option('-g', '--grammar', dest='grammar', action='store', default='python') +parser.add_option('-n', '--normal', dest='normal', action='store_true', default=False) -paths = sys.argv[1:] -for path in paths: +(opts, args) = parser.parse_args() + +for path in args: f = open(path, 'r') data = f.read() f.close() lines = data.split('\n') - lexer = lex2.Lexer('lexer', grammars[t]()) + lexer = lex2.Lexer('lexer', grammars[opts.grammar]()) - if m: - h = highlight2.Highlighter(lexer) - h.highlight(lines) - h.display(token_colors[t]) + h = highlight2.Highlighter(lexer) + h.highlight(lines) + + if opts.normal: + if opts.dump: + h.dump() + else: + h.display(token_colors[opts.grammar]) else: - lexer.lex(lines) - for token in lexer: - print '%-30s| %-6s | %r' % (token.name, - '(%d,%d)' % (token.x, token.y), - token.string) + (y1, x1) = (5, 9) + (y2, x2) = (7, 14) + #(y2, x2) = (82, 2) + for i in range(y1 + 1, y2): + del lines[y1 + 1] + lines[y1] = lines[y1][0:x1] + lines[y1 + 1][x2:] + del lines[y1 + 1] + + h.relex_del(lines, y1, x1, y2, x2) + #h.update_del(lines, y1, x1, y2, x2) + #h.highlight(lines) + if opts.dump: + h.dump() + else: + h.display(token_colors[opts.grammar])