parent
759ceeb805
commit
9e1a1711ab
154
highlight2.py
154
highlight2.py
|
@ -20,6 +20,12 @@ class Highlighter:
|
|||
self.lexer = lexer
|
||||
self.tokens = []
|
||||
|
||||
def dump(self, fmt='(%3s, %2s) | %s'):
|
||||
print fmt % ('y', 'x', 'string')
|
||||
for group in self.tokens:
|
||||
for token in group:
|
||||
print fmt % (token.y, token.x, token.string)
|
||||
|
||||
def display(self, token_colors={}, debug=False):
|
||||
for group in self.tokens:
|
||||
for token in group:
|
||||
|
@ -46,50 +52,116 @@ class Highlighter:
|
|||
for token in self.lexer:
|
||||
self.tokens[token.y].append(token)
|
||||
|
||||
def update(self, lines, y1=0, x1=0, y2=-1, x2=-1):
|
||||
# basically, we are syncing up our cached internal state with the lexer
|
||||
# so... we need to keep track of where we are in our internal structure.
|
||||
insertion_index = None
|
||||
line_index = None
|
||||
x = x1
|
||||
def update_del(self, lines, y1, x1, y2, x2):
|
||||
assert y1 >= 0
|
||||
assert y1 <= y2
|
||||
assert y2 < len(lines)
|
||||
|
||||
xdelta = x2 - x1
|
||||
ydelta = y2 - y1
|
||||
|
||||
newtokens = [[] for x in range(0, len(self.tokens) - ydelta)]
|
||||
for y in range(0, y1):
|
||||
newtokens[y] = self.tokens[y]
|
||||
|
||||
for y in range(y1, len(lines)):
|
||||
while self.tokens[y]:
|
||||
token = self.tokens[y].pop(0)
|
||||
tx1 = token.x
|
||||
tx2 = token.x + len(token.string)
|
||||
if (y, tx2) <= (y1, x1):
|
||||
# *| |
|
||||
newtokens[y].append(token)
|
||||
elif (y, tx1) >= (y2, x2):
|
||||
# | |*
|
||||
token.y -= ydelta
|
||||
if y == y2:
|
||||
token.x -= xdelta
|
||||
newtokens[token.y].append(token)
|
||||
elif (y, tx1) < (y1, x1):
|
||||
token2 = token.copy()
|
||||
if (y, tx2) <= (y2, x2):
|
||||
# *|*|
|
||||
s = token2.string[:x1 - tx1]
|
||||
else:
|
||||
# *|*|*
|
||||
s = token2.string[:x1 - tx1] + token2.string[x2 - tx1:]
|
||||
token2.string = s
|
||||
newtokens[y].append(token2)
|
||||
elif (y, tx1) < (y2, x2):
|
||||
if (y, tx2) <= (y2, x2):
|
||||
# |*|
|
||||
pass
|
||||
else:
|
||||
# |*|*
|
||||
token2 = token.copy()
|
||||
token2.x = x1
|
||||
token2.y = token2.y - ydelta
|
||||
token2.string = token2.string[x2 - tx1:]
|
||||
newtokens[token2.y].append(token2)
|
||||
self.tokens = newtokens
|
||||
|
||||
def relex_del(self, lines, y1, x1, y2, x2):
|
||||
self.update_del(lines, y1, x1, y2, x2)
|
||||
self.lexer.lex(lines, y1, 0)
|
||||
|
||||
y = y1
|
||||
i = 0
|
||||
getnext = True
|
||||
|
||||
# so figure out where exactly the document has changed and how this
|
||||
# necessarily affects our internal structure
|
||||
for i in range(0, len(self.tokens[y1])):
|
||||
t = self.tokens[y1][i]
|
||||
if t.x < x1 and t.x + len(t.string) > x1:
|
||||
# this token spans our region, so invalidate it and start our
|
||||
# update from its start instead
|
||||
x1 = t.x
|
||||
insertion_index = i
|
||||
line_index = i
|
||||
del self.tokens[i]
|
||||
while True:
|
||||
if y >= len(lines):
|
||||
break
|
||||
elif t.x == x1:
|
||||
# ok, so it looks like the change starts on a token
|
||||
line_index = i
|
||||
|
||||
assert line_index is not None:
|
||||
if getnext:
|
||||
try:
|
||||
new_token = self.lexer.next()
|
||||
getnext = False
|
||||
except StopIteration:
|
||||
for j in range(y, len(lines)):
|
||||
print 'DELETE END ROW %d[%d:]: %r' % (j, i, [x.string for x in self.tokens[j][i:]])
|
||||
del self.tokens[j][i:]
|
||||
i = 0
|
||||
break
|
||||
|
||||
self.lexer.lex(lines, y1, x1)
|
||||
for lt in self.lexer.lex:
|
||||
if y != lt.y:
|
||||
y = lt.y
|
||||
if insertion_index:
|
||||
# ok, so we have a "gap" that we have to fill, so just insert
|
||||
# the token in our strucutre, and then see if it overlaps
|
||||
# something else that has to go
|
||||
self.tokens[y].insert(insertion_index, lt)
|
||||
line_index = insertion_index + 1
|
||||
insertion_index = None
|
||||
for i in range(line_index, len(self.tokens[y])):
|
||||
if self.tokens[y][i].start < None: #GJIE
|
||||
pass #GGGJGEI
|
||||
insertion_index = None
|
||||
|
||||
|
||||
# if our next token is one a future line, we need to just get rid of
|
||||
# all our old tokens until we get there
|
||||
while new_token.y > y:
|
||||
print 'DELETE MID ROW %d[%d:]: %r' % (y, i, [x.string for x in self.tokens[y][i:]])
|
||||
del self.tokens[y][i:]
|
||||
i = 0
|
||||
y += 1
|
||||
|
||||
if y2 > 0:
|
||||
for i in range(y1, y2):
|
||||
self.tokens
|
||||
if i < len(self.tokens[y]):
|
||||
old_token = self.tokens[y][i]
|
||||
assert old_token.y == y
|
||||
else:
|
||||
old_token = None
|
||||
|
||||
if old_token is None:
|
||||
print 'DEFAULT INSERT %d[%d]: %r' % (y, i, new_token.string)
|
||||
self.tokens[y].insert(i, new_token)
|
||||
i += 1
|
||||
getnext = True
|
||||
continue
|
||||
elif old_token == new_token:
|
||||
print 'MATCH %d[%d]: %r == %r' % (y, i, old_token.string, new_token.string)
|
||||
i += 1
|
||||
getnext = True
|
||||
if new_token.y >= y2 and new_token.end_x() >= x2:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
elif old_token.x < new_token.end_x():
|
||||
print 'DELETE BEFORE %d[%d]: %r' % (y, i, old_token.string)
|
||||
del self.tokens[y][i]
|
||||
continue
|
||||
elif old_token.x >= new_token.end_x():
|
||||
print 'INSERT %d[%d]: %r' % (y, i, new_token.string)
|
||||
self.tokens[y].insert(i, new_token)
|
||||
i += 1
|
||||
getnext = True
|
||||
continue
|
||||
else:
|
||||
raise Exception, "what what?"
|
||||
|
||||
|
|
6
lex2.py
6
lex2.py
|
@ -6,16 +6,22 @@ reserved_names = ['start', 'middle', 'end', 'null']
|
|||
class Token(object):
|
||||
def __init__(self, name, rule, y, x, s, **vargs):
|
||||
self.name = name
|
||||
self.rule = rule
|
||||
self.y = y
|
||||
self.x = x
|
||||
self.string = s
|
||||
self.vargs = vargs
|
||||
def copy(self):
|
||||
return Token(self.name, None, self.y, self.x, self.string, **self.vargs)
|
||||
def add_to_string(self, s):
|
||||
self.string += s
|
||||
def end_x(self):
|
||||
return self.x + len(self.string)
|
||||
def __eq__(self, other):
|
||||
return (self.y == other.y and
|
||||
self.x == other.x and
|
||||
self.string == other.string and
|
||||
self.name == other.name and
|
||||
self.vargs == other.vargs)
|
||||
def __repr__(self):
|
||||
if len(self.string) < 10:
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
|
||||
|
||||
class StringGrammar(Grammar):
|
||||
rules = [
|
||||
PatternRule(
|
||||
name=r'octal',
|
||||
pattern=r'\\[0-7]{3}',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'escaped',
|
||||
pattern=r'\\.',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'format',
|
||||
pattern=r'%(?:\([a-zA-Z_]+\))?[-# +]*(?:[0-9]+|\*)?\.?(?:[0-9]+|\*)?[hlL]?[a-zA-Z%]',
|
||||
),
|
||||
]
|
||||
|
||||
class PythonGrammar(Grammar):
|
||||
rules = [
|
||||
PatternRule(
|
||||
name=r'import',
|
||||
pattern=r'(?:^|(?<= ))import(?= |$)',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'methodname',
|
||||
pattern=r'(?<=def )[a-zA-Z_][a-zA-Z0-9_]*',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'classname',
|
||||
pattern=r'(?<=class )[a-zA-Z_][a-zA-Z0-9_]*',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'reserved',
|
||||
pattern=r'(?:True|None|False|Exception|self)(?![a-zA-Z0-9_])',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'keyword',
|
||||
pattern=r'(?:yield|while|try|return|raise|print|pass|or|not|lambda|is|in|import|if|global|from|for|finally|exec|except|else|elif|del|def|continue|class|break|assert|as|and)(?![a-zA-Z0-9_])',
|
||||
),
|
||||
PatternRule(
|
||||
name=r"builtin_method",
|
||||
pattern=r'(?<!\.)(?:zip|xrange|vars|unicode|unichr|type|tuple|super|sum|str|staticmethod|sorted|slice|setattr|set|round|repr|reduce|raw_input|range|property|pow|ord|open|oct|object|max|min|map|long|locals|list|len|iter|issubclass|isinstance|int|input|id|hex|hash|hasattr|globals|getattr|frozenset|float|filter|file|execfile|eval|enumerate|divmod|dir|dict|delattr|complex|compile|coerce|cmp|classmethod|chr|callable|bool)(?![a-zA-Z0-9_])',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'bound_method',
|
||||
pattern=r'(?<=\. )[a-zA-Z_][a-zA-Z0-9_]*(?= *\()',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'system_identifier',
|
||||
pattern=r'__[a-zA-Z0-9_]+__',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'private_identifier',
|
||||
pattern=r'__[a-zA-Z0-9_]*',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'hidden_identifier',
|
||||
pattern=r'_[a-zA-Z0-9_]*',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'identifier',
|
||||
pattern=r'[a-zA-Z_][a-zA-Z0-9_]*',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'delimiter',
|
||||
pattern=r'\(|\)|\[|\]|{|}|@|,|:|\.|`|=|;|\+=|-=|\*=|/=|//=|%=|&=|\|=|\^=|>>=|<<=|\*\*=',
|
||||
),
|
||||
PatternRule(
|
||||
name=r"operator",
|
||||
pattern=r"\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%",
|
||||
),
|
||||
PatternRule(
|
||||
name=r"integer",
|
||||
pattern=r"(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?",
|
||||
),
|
||||
PatternRule(
|
||||
name=r"float",
|
||||
pattern=r"[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+",
|
||||
),
|
||||
PatternRule(
|
||||
name=r"imaginary",
|
||||
pattern=r"[0-9]+|(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)[jJ]",
|
||||
),
|
||||
|
||||
RegionRule(
|
||||
name=r'docstring',
|
||||
start=r'^ *(?P<tag>"""|\'\'\')',
|
||||
grammar=Grammar(),
|
||||
end=r'%(tag)s',
|
||||
),
|
||||
RegionRule(
|
||||
name=r'tq_string',
|
||||
start=r'(?P<tag>"""|\'\'\')',
|
||||
grammar=Grammar(),
|
||||
end=r'%(tag)s',
|
||||
),
|
||||
RegionRule(
|
||||
name=r'string',
|
||||
start=r'(?P<tag>"|\')',
|
||||
grammar=StringGrammar(),
|
||||
end=r'%(tag)s',
|
||||
),
|
||||
|
||||
PatternRule(
|
||||
name=r'comment',
|
||||
pattern=r'#.*$',
|
||||
),
|
||||
PatternRule(
|
||||
name=r'continuation',
|
||||
pattern=r'\\$',
|
||||
),
|
||||
]
|
48
test3.py
48
test3.py
|
@ -141,28 +141,44 @@ grammars = {
|
|||
'python': lex2_python.PythonGrammar,
|
||||
}
|
||||
|
||||
#t = 'perl'
|
||||
t = 'python'
|
||||
import optparse
|
||||
|
||||
m = True
|
||||
#m = False
|
||||
parser = optparse.OptionParser()
|
||||
parser.add_option('-d', '--dump', dest='dump', action='store_true', default=False)
|
||||
parser.add_option('-g', '--grammar', dest='grammar', action='store', default='python')
|
||||
parser.add_option('-n', '--normal', dest='normal', action='store_true', default=False)
|
||||
|
||||
paths = sys.argv[1:]
|
||||
for path in paths:
|
||||
(opts, args) = parser.parse_args()
|
||||
|
||||
for path in args:
|
||||
f = open(path, 'r')
|
||||
data = f.read()
|
||||
f.close()
|
||||
|
||||
lines = data.split('\n')
|
||||
lexer = lex2.Lexer('lexer', grammars[t]())
|
||||
lexer = lex2.Lexer('lexer', grammars[opts.grammar]())
|
||||
|
||||
if m:
|
||||
h = highlight2.Highlighter(lexer)
|
||||
h.highlight(lines)
|
||||
h.display(token_colors[t])
|
||||
h = highlight2.Highlighter(lexer)
|
||||
h.highlight(lines)
|
||||
|
||||
if opts.normal:
|
||||
if opts.dump:
|
||||
h.dump()
|
||||
else:
|
||||
h.display(token_colors[opts.grammar])
|
||||
else:
|
||||
lexer.lex(lines)
|
||||
for token in lexer:
|
||||
print '%-30s| %-6s | %r' % (token.name,
|
||||
'(%d,%d)' % (token.x, token.y),
|
||||
token.string)
|
||||
(y1, x1) = (5, 9)
|
||||
(y2, x2) = (7, 14)
|
||||
#(y2, x2) = (82, 2)
|
||||
for i in range(y1 + 1, y2):
|
||||
del lines[y1 + 1]
|
||||
lines[y1] = lines[y1][0:x1] + lines[y1 + 1][x2:]
|
||||
del lines[y1 + 1]
|
||||
|
||||
h.relex_del(lines, y1, x1, y2, x2)
|
||||
#h.update_del(lines, y1, x1, y2, x2)
|
||||
#h.highlight(lines)
|
||||
if opts.dump:
|
||||
h.dump()
|
||||
else:
|
||||
h.display(token_colors[opts.grammar])
|
||||
|
|
Loading…
Reference in New Issue