parent
759ceeb805
commit
9e1a1711ab
150
highlight2.py
150
highlight2.py
|
@ -20,6 +20,12 @@ class Highlighter:
|
||||||
self.lexer = lexer
|
self.lexer = lexer
|
||||||
self.tokens = []
|
self.tokens = []
|
||||||
|
|
||||||
|
def dump(self, fmt='(%3s, %2s) | %s'):
|
||||||
|
print fmt % ('y', 'x', 'string')
|
||||||
|
for group in self.tokens:
|
||||||
|
for token in group:
|
||||||
|
print fmt % (token.y, token.x, token.string)
|
||||||
|
|
||||||
def display(self, token_colors={}, debug=False):
|
def display(self, token_colors={}, debug=False):
|
||||||
for group in self.tokens:
|
for group in self.tokens:
|
||||||
for token in group:
|
for token in group:
|
||||||
|
@ -46,50 +52,116 @@ class Highlighter:
|
||||||
for token in self.lexer:
|
for token in self.lexer:
|
||||||
self.tokens[token.y].append(token)
|
self.tokens[token.y].append(token)
|
||||||
|
|
||||||
def update(self, lines, y1=0, x1=0, y2=-1, x2=-1):
|
def update_del(self, lines, y1, x1, y2, x2):
|
||||||
# basically, we are syncing up our cached internal state with the lexer
|
assert y1 >= 0
|
||||||
# so... we need to keep track of where we are in our internal structure.
|
assert y1 <= y2
|
||||||
insertion_index = None
|
assert y2 < len(lines)
|
||||||
line_index = None
|
|
||||||
x = x1
|
xdelta = x2 - x1
|
||||||
|
ydelta = y2 - y1
|
||||||
|
|
||||||
|
newtokens = [[] for x in range(0, len(self.tokens) - ydelta)]
|
||||||
|
for y in range(0, y1):
|
||||||
|
newtokens[y] = self.tokens[y]
|
||||||
|
|
||||||
|
for y in range(y1, len(lines)):
|
||||||
|
while self.tokens[y]:
|
||||||
|
token = self.tokens[y].pop(0)
|
||||||
|
tx1 = token.x
|
||||||
|
tx2 = token.x + len(token.string)
|
||||||
|
if (y, tx2) <= (y1, x1):
|
||||||
|
# *| |
|
||||||
|
newtokens[y].append(token)
|
||||||
|
elif (y, tx1) >= (y2, x2):
|
||||||
|
# | |*
|
||||||
|
token.y -= ydelta
|
||||||
|
if y == y2:
|
||||||
|
token.x -= xdelta
|
||||||
|
newtokens[token.y].append(token)
|
||||||
|
elif (y, tx1) < (y1, x1):
|
||||||
|
token2 = token.copy()
|
||||||
|
if (y, tx2) <= (y2, x2):
|
||||||
|
# *|*|
|
||||||
|
s = token2.string[:x1 - tx1]
|
||||||
|
else:
|
||||||
|
# *|*|*
|
||||||
|
s = token2.string[:x1 - tx1] + token2.string[x2 - tx1:]
|
||||||
|
token2.string = s
|
||||||
|
newtokens[y].append(token2)
|
||||||
|
elif (y, tx1) < (y2, x2):
|
||||||
|
if (y, tx2) <= (y2, x2):
|
||||||
|
# |*|
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# |*|*
|
||||||
|
token2 = token.copy()
|
||||||
|
token2.x = x1
|
||||||
|
token2.y = token2.y - ydelta
|
||||||
|
token2.string = token2.string[x2 - tx1:]
|
||||||
|
newtokens[token2.y].append(token2)
|
||||||
|
self.tokens = newtokens
|
||||||
|
|
||||||
|
def relex_del(self, lines, y1, x1, y2, x2):
|
||||||
|
self.update_del(lines, y1, x1, y2, x2)
|
||||||
|
self.lexer.lex(lines, y1, 0)
|
||||||
|
|
||||||
y = y1
|
y = y1
|
||||||
|
i = 0
|
||||||
|
getnext = True
|
||||||
|
|
||||||
# so figure out where exactly the document has changed and how this
|
while True:
|
||||||
# necessarily affects our internal structure
|
if y >= len(lines):
|
||||||
for i in range(0, len(self.tokens[y1])):
|
|
||||||
t = self.tokens[y1][i]
|
|
||||||
if t.x < x1 and t.x + len(t.string) > x1:
|
|
||||||
# this token spans our region, so invalidate it and start our
|
|
||||||
# update from its start instead
|
|
||||||
x1 = t.x
|
|
||||||
insertion_index = i
|
|
||||||
line_index = i
|
|
||||||
del self.tokens[i]
|
|
||||||
break
|
break
|
||||||
elif t.x == x1:
|
|
||||||
# ok, so it looks like the change starts on a token
|
|
||||||
line_index = i
|
|
||||||
|
|
||||||
assert line_index is not None:
|
if getnext:
|
||||||
|
try:
|
||||||
|
new_token = self.lexer.next()
|
||||||
|
getnext = False
|
||||||
|
except StopIteration:
|
||||||
|
for j in range(y, len(lines)):
|
||||||
|
print 'DELETE END ROW %d[%d:]: %r' % (j, i, [x.string for x in self.tokens[j][i:]])
|
||||||
|
del self.tokens[j][i:]
|
||||||
|
i = 0
|
||||||
|
break
|
||||||
|
|
||||||
self.lexer.lex(lines, y1, x1)
|
# if our next token is one a future line, we need to just get rid of
|
||||||
for lt in self.lexer.lex:
|
# all our old tokens until we get there
|
||||||
if y != lt.y:
|
while new_token.y > y:
|
||||||
y = lt.y
|
print 'DELETE MID ROW %d[%d:]: %r' % (y, i, [x.string for x in self.tokens[y][i:]])
|
||||||
if insertion_index:
|
del self.tokens[y][i:]
|
||||||
# ok, so we have a "gap" that we have to fill, so just insert
|
i = 0
|
||||||
# the token in our strucutre, and then see if it overlaps
|
y += 1
|
||||||
# something else that has to go
|
|
||||||
self.tokens[y].insert(insertion_index, lt)
|
|
||||||
line_index = insertion_index + 1
|
|
||||||
insertion_index = None
|
|
||||||
for i in range(line_index, len(self.tokens[y])):
|
|
||||||
if self.tokens[y][i].start < None: #GJIE
|
|
||||||
pass #GGGJGEI
|
|
||||||
insertion_index = None
|
|
||||||
|
|
||||||
|
if i < len(self.tokens[y]):
|
||||||
|
old_token = self.tokens[y][i]
|
||||||
|
assert old_token.y == y
|
||||||
|
else:
|
||||||
|
old_token = None
|
||||||
|
|
||||||
|
if old_token is None:
|
||||||
|
print 'DEFAULT INSERT %d[%d]: %r' % (y, i, new_token.string)
|
||||||
|
self.tokens[y].insert(i, new_token)
|
||||||
|
i += 1
|
||||||
|
getnext = True
|
||||||
|
continue
|
||||||
|
elif old_token == new_token:
|
||||||
|
print 'MATCH %d[%d]: %r == %r' % (y, i, old_token.string, new_token.string)
|
||||||
|
i += 1
|
||||||
|
getnext = True
|
||||||
|
if new_token.y >= y2 and new_token.end_x() >= x2:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
elif old_token.x < new_token.end_x():
|
||||||
|
print 'DELETE BEFORE %d[%d]: %r' % (y, i, old_token.string)
|
||||||
|
del self.tokens[y][i]
|
||||||
|
continue
|
||||||
|
elif old_token.x >= new_token.end_x():
|
||||||
|
print 'INSERT %d[%d]: %r' % (y, i, new_token.string)
|
||||||
|
self.tokens[y].insert(i, new_token)
|
||||||
|
i += 1
|
||||||
|
getnext = True
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise Exception, "what what?"
|
||||||
|
|
||||||
if y2 > 0:
|
|
||||||
for i in range(y1, y2):
|
|
||||||
self.tokens
|
|
||||||
|
|
6
lex2.py
6
lex2.py
|
@ -6,16 +6,22 @@ reserved_names = ['start', 'middle', 'end', 'null']
|
||||||
class Token(object):
|
class Token(object):
|
||||||
def __init__(self, name, rule, y, x, s, **vargs):
|
def __init__(self, name, rule, y, x, s, **vargs):
|
||||||
self.name = name
|
self.name = name
|
||||||
|
self.rule = rule
|
||||||
self.y = y
|
self.y = y
|
||||||
self.x = x
|
self.x = x
|
||||||
self.string = s
|
self.string = s
|
||||||
self.vargs = vargs
|
self.vargs = vargs
|
||||||
|
def copy(self):
|
||||||
|
return Token(self.name, None, self.y, self.x, self.string, **self.vargs)
|
||||||
def add_to_string(self, s):
|
def add_to_string(self, s):
|
||||||
self.string += s
|
self.string += s
|
||||||
|
def end_x(self):
|
||||||
|
return self.x + len(self.string)
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return (self.y == other.y and
|
return (self.y == other.y and
|
||||||
self.x == other.x and
|
self.x == other.x and
|
||||||
self.string == other.string and
|
self.string == other.string and
|
||||||
|
self.name == other.name and
|
||||||
self.vargs == other.vargs)
|
self.vargs == other.vargs)
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
if len(self.string) < 10:
|
if len(self.string) < 10:
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
|
||||||
|
|
||||||
|
class StringGrammar(Grammar):
|
||||||
|
rules = [
|
||||||
|
PatternRule(
|
||||||
|
name=r'octal',
|
||||||
|
pattern=r'\\[0-7]{3}',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'escaped',
|
||||||
|
pattern=r'\\.',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'format',
|
||||||
|
pattern=r'%(?:\([a-zA-Z_]+\))?[-# +]*(?:[0-9]+|\*)?\.?(?:[0-9]+|\*)?[hlL]?[a-zA-Z%]',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
class PythonGrammar(Grammar):
|
||||||
|
rules = [
|
||||||
|
PatternRule(
|
||||||
|
name=r'import',
|
||||||
|
pattern=r'(?:^|(?<= ))import(?= |$)',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'methodname',
|
||||||
|
pattern=r'(?<=def )[a-zA-Z_][a-zA-Z0-9_]*',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'classname',
|
||||||
|
pattern=r'(?<=class )[a-zA-Z_][a-zA-Z0-9_]*',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'reserved',
|
||||||
|
pattern=r'(?:True|None|False|Exception|self)(?![a-zA-Z0-9_])',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'keyword',
|
||||||
|
pattern=r'(?:yield|while|try|return|raise|print|pass|or|not|lambda|is|in|import|if|global|from|for|finally|exec|except|else|elif|del|def|continue|class|break|assert|as|and)(?![a-zA-Z0-9_])',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r"builtin_method",
|
||||||
|
pattern=r'(?<!\.)(?:zip|xrange|vars|unicode|unichr|type|tuple|super|sum|str|staticmethod|sorted|slice|setattr|set|round|repr|reduce|raw_input|range|property|pow|ord|open|oct|object|max|min|map|long|locals|list|len|iter|issubclass|isinstance|int|input|id|hex|hash|hasattr|globals|getattr|frozenset|float|filter|file|execfile|eval|enumerate|divmod|dir|dict|delattr|complex|compile|coerce|cmp|classmethod|chr|callable|bool)(?![a-zA-Z0-9_])',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'bound_method',
|
||||||
|
pattern=r'(?<=\. )[a-zA-Z_][a-zA-Z0-9_]*(?= *\()',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'system_identifier',
|
||||||
|
pattern=r'__[a-zA-Z0-9_]+__',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'private_identifier',
|
||||||
|
pattern=r'__[a-zA-Z0-9_]*',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'hidden_identifier',
|
||||||
|
pattern=r'_[a-zA-Z0-9_]*',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'identifier',
|
||||||
|
pattern=r'[a-zA-Z_][a-zA-Z0-9_]*',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'delimiter',
|
||||||
|
pattern=r'\(|\)|\[|\]|{|}|@|,|:|\.|`|=|;|\+=|-=|\*=|/=|//=|%=|&=|\|=|\^=|>>=|<<=|\*\*=',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r"operator",
|
||||||
|
pattern=r"\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%",
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r"integer",
|
||||||
|
pattern=r"(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?",
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r"float",
|
||||||
|
pattern=r"[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+",
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r"imaginary",
|
||||||
|
pattern=r"[0-9]+|(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)[jJ]",
|
||||||
|
),
|
||||||
|
|
||||||
|
RegionRule(
|
||||||
|
name=r'docstring',
|
||||||
|
start=r'^ *(?P<tag>"""|\'\'\')',
|
||||||
|
grammar=Grammar(),
|
||||||
|
end=r'%(tag)s',
|
||||||
|
),
|
||||||
|
RegionRule(
|
||||||
|
name=r'tq_string',
|
||||||
|
start=r'(?P<tag>"""|\'\'\')',
|
||||||
|
grammar=Grammar(),
|
||||||
|
end=r'%(tag)s',
|
||||||
|
),
|
||||||
|
RegionRule(
|
||||||
|
name=r'string',
|
||||||
|
start=r'(?P<tag>"|\')',
|
||||||
|
grammar=StringGrammar(),
|
||||||
|
end=r'%(tag)s',
|
||||||
|
),
|
||||||
|
|
||||||
|
PatternRule(
|
||||||
|
name=r'comment',
|
||||||
|
pattern=r'#.*$',
|
||||||
|
),
|
||||||
|
PatternRule(
|
||||||
|
name=r'continuation',
|
||||||
|
pattern=r'\\$',
|
||||||
|
),
|
||||||
|
]
|
48
test3.py
48
test3.py
|
@ -141,28 +141,44 @@ grammars = {
|
||||||
'python': lex2_python.PythonGrammar,
|
'python': lex2_python.PythonGrammar,
|
||||||
}
|
}
|
||||||
|
|
||||||
#t = 'perl'
|
import optparse
|
||||||
t = 'python'
|
|
||||||
|
|
||||||
m = True
|
parser = optparse.OptionParser()
|
||||||
#m = False
|
parser.add_option('-d', '--dump', dest='dump', action='store_true', default=False)
|
||||||
|
parser.add_option('-g', '--grammar', dest='grammar', action='store', default='python')
|
||||||
|
parser.add_option('-n', '--normal', dest='normal', action='store_true', default=False)
|
||||||
|
|
||||||
paths = sys.argv[1:]
|
(opts, args) = parser.parse_args()
|
||||||
for path in paths:
|
|
||||||
|
for path in args:
|
||||||
f = open(path, 'r')
|
f = open(path, 'r')
|
||||||
data = f.read()
|
data = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
lines = data.split('\n')
|
lines = data.split('\n')
|
||||||
lexer = lex2.Lexer('lexer', grammars[t]())
|
lexer = lex2.Lexer('lexer', grammars[opts.grammar]())
|
||||||
|
|
||||||
if m:
|
h = highlight2.Highlighter(lexer)
|
||||||
h = highlight2.Highlighter(lexer)
|
h.highlight(lines)
|
||||||
h.highlight(lines)
|
|
||||||
h.display(token_colors[t])
|
if opts.normal:
|
||||||
|
if opts.dump:
|
||||||
|
h.dump()
|
||||||
|
else:
|
||||||
|
h.display(token_colors[opts.grammar])
|
||||||
else:
|
else:
|
||||||
lexer.lex(lines)
|
(y1, x1) = (5, 9)
|
||||||
for token in lexer:
|
(y2, x2) = (7, 14)
|
||||||
print '%-30s| %-6s | %r' % (token.name,
|
#(y2, x2) = (82, 2)
|
||||||
'(%d,%d)' % (token.x, token.y),
|
for i in range(y1 + 1, y2):
|
||||||
token.string)
|
del lines[y1 + 1]
|
||||||
|
lines[y1] = lines[y1][0:x1] + lines[y1 + 1][x2:]
|
||||||
|
del lines[y1 + 1]
|
||||||
|
|
||||||
|
h.relex_del(lines, y1, x1, y2, x2)
|
||||||
|
#h.update_del(lines, y1, x1, y2, x2)
|
||||||
|
#h.highlight(lines)
|
||||||
|
if opts.dump:
|
||||||
|
h.dump()
|
||||||
|
else:
|
||||||
|
h.display(token_colors[opts.grammar])
|
||||||
|
|
Loading…
Reference in New Issue