--HG--
branch : pmacs2
This commit is contained in:
moculus 2007-05-02 04:17:12 +00:00
parent 759ceeb805
commit 9e1a1711ab
4 changed files with 264 additions and 57 deletions

View File

@ -20,6 +20,12 @@ class Highlighter:
self.lexer = lexer self.lexer = lexer
self.tokens = [] self.tokens = []
def dump(self, fmt='(%3s, %2s) | %s'):
print fmt % ('y', 'x', 'string')
for group in self.tokens:
for token in group:
print fmt % (token.y, token.x, token.string)
def display(self, token_colors={}, debug=False): def display(self, token_colors={}, debug=False):
for group in self.tokens: for group in self.tokens:
for token in group: for token in group:
@ -46,50 +52,116 @@ class Highlighter:
for token in self.lexer: for token in self.lexer:
self.tokens[token.y].append(token) self.tokens[token.y].append(token)
def update(self, lines, y1=0, x1=0, y2=-1, x2=-1): def update_del(self, lines, y1, x1, y2, x2):
# basically, we are syncing up our cached internal state with the lexer assert y1 >= 0
# so... we need to keep track of where we are in our internal structure. assert y1 <= y2
insertion_index = None assert y2 < len(lines)
line_index = None
x = x1 xdelta = x2 - x1
ydelta = y2 - y1
newtokens = [[] for x in range(0, len(self.tokens) - ydelta)]
for y in range(0, y1):
newtokens[y] = self.tokens[y]
for y in range(y1, len(lines)):
while self.tokens[y]:
token = self.tokens[y].pop(0)
tx1 = token.x
tx2 = token.x + len(token.string)
if (y, tx2) <= (y1, x1):
# *| |
newtokens[y].append(token)
elif (y, tx1) >= (y2, x2):
# | |*
token.y -= ydelta
if y == y2:
token.x -= xdelta
newtokens[token.y].append(token)
elif (y, tx1) < (y1, x1):
token2 = token.copy()
if (y, tx2) <= (y2, x2):
# *|*|
s = token2.string[:x1 - tx1]
else:
# *|*|*
s = token2.string[:x1 - tx1] + token2.string[x2 - tx1:]
token2.string = s
newtokens[y].append(token2)
elif (y, tx1) < (y2, x2):
if (y, tx2) <= (y2, x2):
# |*|
pass
else:
# |*|*
token2 = token.copy()
token2.x = x1
token2.y = token2.y - ydelta
token2.string = token2.string[x2 - tx1:]
newtokens[token2.y].append(token2)
self.tokens = newtokens
def relex_del(self, lines, y1, x1, y2, x2):
self.update_del(lines, y1, x1, y2, x2)
self.lexer.lex(lines, y1, 0)
y = y1 y = y1
i = 0
getnext = True
# so figure out where exactly the document has changed and how this while True:
# necessarily affects our internal structure if y >= len(lines):
for i in range(0, len(self.tokens[y1])):
t = self.tokens[y1][i]
if t.x < x1 and t.x + len(t.string) > x1:
# this token spans our region, so invalidate it and start our
# update from its start instead
x1 = t.x
insertion_index = i
line_index = i
del self.tokens[i]
break break
elif t.x == x1:
# ok, so it looks like the change starts on a token
line_index = i
assert line_index is not None: if getnext:
try:
new_token = self.lexer.next()
getnext = False
except StopIteration:
for j in range(y, len(lines)):
print 'DELETE END ROW %d[%d:]: %r' % (j, i, [x.string for x in self.tokens[j][i:]])
del self.tokens[j][i:]
i = 0
break
self.lexer.lex(lines, y1, x1) # if our next token is one a future line, we need to just get rid of
for lt in self.lexer.lex: # all our old tokens until we get there
if y != lt.y: while new_token.y > y:
y = lt.y print 'DELETE MID ROW %d[%d:]: %r' % (y, i, [x.string for x in self.tokens[y][i:]])
if insertion_index: del self.tokens[y][i:]
# ok, so we have a "gap" that we have to fill, so just insert i = 0
# the token in our strucutre, and then see if it overlaps y += 1
# something else that has to go
self.tokens[y].insert(insertion_index, lt)
line_index = insertion_index + 1
insertion_index = None
for i in range(line_index, len(self.tokens[y])):
if self.tokens[y][i].start < None: #GJIE
pass #GGGJGEI
insertion_index = None
if i < len(self.tokens[y]):
old_token = self.tokens[y][i]
assert old_token.y == y
else:
old_token = None
if old_token is None:
print 'DEFAULT INSERT %d[%d]: %r' % (y, i, new_token.string)
self.tokens[y].insert(i, new_token)
i += 1
getnext = True
continue
elif old_token == new_token:
print 'MATCH %d[%d]: %r == %r' % (y, i, old_token.string, new_token.string)
i += 1
getnext = True
if new_token.y >= y2 and new_token.end_x() >= x2:
break
else:
continue
elif old_token.x < new_token.end_x():
print 'DELETE BEFORE %d[%d]: %r' % (y, i, old_token.string)
del self.tokens[y][i]
continue
elif old_token.x >= new_token.end_x():
print 'INSERT %d[%d]: %r' % (y, i, new_token.string)
self.tokens[y].insert(i, new_token)
i += 1
getnext = True
continue
else:
raise Exception, "what what?"
if y2 > 0:
for i in range(y1, y2):
self.tokens

View File

@ -6,16 +6,22 @@ reserved_names = ['start', 'middle', 'end', 'null']
class Token(object): class Token(object):
def __init__(self, name, rule, y, x, s, **vargs): def __init__(self, name, rule, y, x, s, **vargs):
self.name = name self.name = name
self.rule = rule
self.y = y self.y = y
self.x = x self.x = x
self.string = s self.string = s
self.vargs = vargs self.vargs = vargs
def copy(self):
return Token(self.name, None, self.y, self.x, self.string, **self.vargs)
def add_to_string(self, s): def add_to_string(self, s):
self.string += s self.string += s
def end_x(self):
return self.x + len(self.string)
def __eq__(self, other): def __eq__(self, other):
return (self.y == other.y and return (self.y == other.y and
self.x == other.x and self.x == other.x and
self.string == other.string and self.string == other.string and
self.name == other.name and
self.vargs == other.vargs) self.vargs == other.vargs)
def __repr__(self): def __repr__(self):
if len(self.string) < 10: if len(self.string) < 10:

113
lex2_python.py Executable file
View File

@ -0,0 +1,113 @@
from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
class StringGrammar(Grammar):
rules = [
PatternRule(
name=r'octal',
pattern=r'\\[0-7]{3}',
),
PatternRule(
name=r'escaped',
pattern=r'\\.',
),
PatternRule(
name=r'format',
pattern=r'%(?:\([a-zA-Z_]+\))?[-# +]*(?:[0-9]+|\*)?\.?(?:[0-9]+|\*)?[hlL]?[a-zA-Z%]',
),
]
class PythonGrammar(Grammar):
rules = [
PatternRule(
name=r'import',
pattern=r'(?:^|(?<= ))import(?= |$)',
),
PatternRule(
name=r'methodname',
pattern=r'(?<=def )[a-zA-Z_][a-zA-Z0-9_]*',
),
PatternRule(
name=r'classname',
pattern=r'(?<=class )[a-zA-Z_][a-zA-Z0-9_]*',
),
PatternRule(
name=r'reserved',
pattern=r'(?:True|None|False|Exception|self)(?![a-zA-Z0-9_])',
),
PatternRule(
name=r'keyword',
pattern=r'(?:yield|while|try|return|raise|print|pass|or|not|lambda|is|in|import|if|global|from|for|finally|exec|except|else|elif|del|def|continue|class|break|assert|as|and)(?![a-zA-Z0-9_])',
),
PatternRule(
name=r"builtin_method",
pattern=r'(?<!\.)(?:zip|xrange|vars|unicode|unichr|type|tuple|super|sum|str|staticmethod|sorted|slice|setattr|set|round|repr|reduce|raw_input|range|property|pow|ord|open|oct|object|max|min|map|long|locals|list|len|iter|issubclass|isinstance|int|input|id|hex|hash|hasattr|globals|getattr|frozenset|float|filter|file|execfile|eval|enumerate|divmod|dir|dict|delattr|complex|compile|coerce|cmp|classmethod|chr|callable|bool)(?![a-zA-Z0-9_])',
),
PatternRule(
name=r'bound_method',
pattern=r'(?<=\. )[a-zA-Z_][a-zA-Z0-9_]*(?= *\()',
),
PatternRule(
name=r'system_identifier',
pattern=r'__[a-zA-Z0-9_]+__',
),
PatternRule(
name=r'private_identifier',
pattern=r'__[a-zA-Z0-9_]*',
),
PatternRule(
name=r'hidden_identifier',
pattern=r'_[a-zA-Z0-9_]*',
),
PatternRule(
name=r'identifier',
pattern=r'[a-zA-Z_][a-zA-Z0-9_]*',
),
PatternRule(
name=r'delimiter',
pattern=r'\(|\)|\[|\]|{|}|@|,|:|\.|`|=|;|\+=|-=|\*=|/=|//=|%=|&=|\|=|\^=|>>=|<<=|\*\*=',
),
PatternRule(
name=r"operator",
pattern=r"\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%",
),
PatternRule(
name=r"integer",
pattern=r"(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?",
),
PatternRule(
name=r"float",
pattern=r"[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+",
),
PatternRule(
name=r"imaginary",
pattern=r"[0-9]+|(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)[jJ]",
),
RegionRule(
name=r'docstring',
start=r'^ *(?P<tag>"""|\'\'\')',
grammar=Grammar(),
end=r'%(tag)s',
),
RegionRule(
name=r'tq_string',
start=r'(?P<tag>"""|\'\'\')',
grammar=Grammar(),
end=r'%(tag)s',
),
RegionRule(
name=r'string',
start=r'(?P<tag>"|\')',
grammar=StringGrammar(),
end=r'%(tag)s',
),
PatternRule(
name=r'comment',
pattern=r'#.*$',
),
PatternRule(
name=r'continuation',
pattern=r'\\$',
),
]

View File

@ -141,28 +141,44 @@ grammars = {
'python': lex2_python.PythonGrammar, 'python': lex2_python.PythonGrammar,
} }
#t = 'perl' import optparse
t = 'python'
m = True parser = optparse.OptionParser()
#m = False parser.add_option('-d', '--dump', dest='dump', action='store_true', default=False)
parser.add_option('-g', '--grammar', dest='grammar', action='store', default='python')
parser.add_option('-n', '--normal', dest='normal', action='store_true', default=False)
paths = sys.argv[1:] (opts, args) = parser.parse_args()
for path in paths:
for path in args:
f = open(path, 'r') f = open(path, 'r')
data = f.read() data = f.read()
f.close() f.close()
lines = data.split('\n') lines = data.split('\n')
lexer = lex2.Lexer('lexer', grammars[t]()) lexer = lex2.Lexer('lexer', grammars[opts.grammar]())
if m:
h = highlight2.Highlighter(lexer) h = highlight2.Highlighter(lexer)
h.highlight(lines) h.highlight(lines)
h.display(token_colors[t])
if opts.normal:
if opts.dump:
h.dump()
else: else:
lexer.lex(lines) h.display(token_colors[opts.grammar])
for token in lexer: else:
print '%-30s| %-6s | %r' % (token.name, (y1, x1) = (5, 9)
'(%d,%d)' % (token.x, token.y), (y2, x2) = (7, 14)
token.string) #(y2, x2) = (82, 2)
for i in range(y1 + 1, y2):
del lines[y1 + 1]
lines[y1] = lines[y1][0:x1] + lines[y1 + 1][x2:]
del lines[y1 + 1]
h.relex_del(lines, y1, x1, y2, x2)
#h.update_del(lines, y1, x1, y2, x2)
#h.highlight(lines)
if opts.dump:
h.dump()
else:
h.display(token_colors[opts.grammar])