big lexing improvements

--HG--
branch : pmacs2
This commit is contained in:
moculus 2007-07-11 19:36:52 +00:00
parent eb37a919b7
commit 1fbc9d4ceb
5 changed files with 55 additions and 31 deletions

View File

@ -606,6 +606,11 @@ class Application(object):
if y < len(w.buffer.lines): if y < len(w.buffer.lines):
while j < len(highlighter.tokens[y]): while j < len(highlighter.tokens[y]):
token = highlighter.tokens[y][j] token = highlighter.tokens[y][j]
if token.string.endswith('\n'):
tstring = token.string[:-1]
else:
tstring = token.string
assert token.y == y, '%d == %d' % (token.y, y) assert token.y == y, '%d == %d' % (token.y, y)
s_offset = max(x - token.x, 0) s_offset = max(x - token.x, 0)
@ -613,7 +618,8 @@ class Application(object):
assert x_offset <= slot.width, '%d <= %d' % (x_offset, slot.width) assert x_offset <= slot.width, '%d <= %d' % (x_offset, slot.width)
c = self._get_token_color(w, token) c = self._get_token_color(w, token)
s = token.string[s_offset:] #s = token.string[s_offset:]
s = tstring[s_offset:]
token_done = x_offset + len(s) <= slot.width token_done = x_offset + len(s) <= slot.width
token_wrap = x_offset + len(s) > slot.width token_wrap = x_offset + len(s) > slot.width
self.win.addstr(slot.offset + count, x_offset, s[:slot.width - x_offset], c) self.win.addstr(slot.offset + count, x_offset, s[:slot.width - x_offset], c)

View File

@ -6,11 +6,16 @@ from method import Argument, Method
class PodGrammar(Grammar): class PodGrammar(Grammar):
rules = [ rules = [
PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'), RegionRule(r'entry', r'(?<=^=head[1-4]) +.*$', Grammar, '^\n$'),
PatternRule(r'entry', r'(?<=^=over) +.*$'), RegionRule(r'entry', r'(?<=^=over) +.*$', Grammar, '^\n$'),
PatternRule(r'entry', r'(?<=^=item) +.*$'), RegionRule(r'entry', r'(?<=^=item) +.*$', Grammar, '^\n$'),
PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'), RegionRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$', Grammar, '^\n$'),
PatternRule(r'entry', r'(?<=^=encoding) +.*$'), RegionRule(r'entry', r'(?<=^=encoding) +.*$', Grammar, '^\n$'),
#PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'),
#PatternRule(r'entry', r'(?<=^=over) +.*$'),
#PatternRule(r'entry', r'(?<=^=item) +.*$'),
#PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'),
#PatternRule(r'entry', r'(?<=^=encoding) +.*$'),
] ]
class StringGrammar(Grammar): class StringGrammar(Grammar):
@ -92,7 +97,9 @@ class PerlGrammar(Grammar):
PatternRule(r'operator', r"\+=|-=|\*=|/=|//=|%=|&=\|\^=|>>=|<<=|\*\*="), PatternRule(r'operator', r"\+=|-=|\*=|/=|//=|%=|&=\|\^=|>>=|<<=|\*\*="),
PatternRule(r'operator', r"\+\+|\+|<=>|<>|<<|<=|<|-|>>|>=|>|\*\*|\*|&&|&|\|\||\||/|\^|==|//|~|=~|!~|!=|%|!|\.|x(?![a-zA-Z_])"), PatternRule(r'operator', r"\+\+|\+|<=>|<>|<<|<=|<|-|>>|>=|>|\*\*|\*|&&|&|\|\||\||/|\^|==|//|~|=~|!~|!=|%|!|\.|x(?![a-zA-Z_])"),
PatternRule(r'operator2', r"(?:xor|or|not|ne|lt|le|gt|ge|eq|cmp|and)(?![a-zA-Z_])"), PatternRule(r'operator2', r"(?:xor|or|not|ne|lt|le|gt|ge|eq|cmp|and)(?![a-zA-Z_])"),
PatternRule(r'bareword', r'(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*') PatternRule(r'bareword', r'(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*'),
] ]
class PerlTabber(tab2.StackTabber): class PerlTabber(tab2.StackTabber):
@ -229,7 +236,9 @@ class Perl(mode2.Fundamental):
# pod # pod
'pod.start': color.build('red', 'default'), 'pod.start': color.build('red', 'default'),
'pod.null': color.build('red', 'default'), 'pod.null': color.build('red', 'default'),
'pod.entry': color.build('magenta', 'default'), 'pod.entry.start': color.build('magenta', 'default'),
'pod.entry.null': color.build('magenta', 'default'),
'pod.entry.end': color.build('magenta', 'default'),
'pod.end': color.build('red', 'default'), 'pod.end': color.build('red', 'default'),
# "" strings # "" strings
@ -247,8 +256,8 @@ class Perl(mode2.Fundamental):
# `` strings # `` strings
'evalstring.start': color.build('cyan', 'default'), 'evalstring.start': color.build('cyan', 'default'),
'evalstring.null': color.build('cyan', 'default'), 'evalstring.null': color.build('cyan', 'default'),
'string1.escaped': color.build('magenta', 'default'), 'evalstring.escaped': color.build('magenta', 'default'),
'string1.deref': color.build('yellow', 'default'), 'evalstring.deref': color.build('yellow', 'default'),
'evalstring.end': color.build('cyan', 'default'), 'evalstring.end': color.build('cyan', 'default'),
# quoted region # quoted region

View File

@ -49,10 +49,11 @@ class PythonTabber(tab2.StackTabber):
# we always know that line 0 is indented at the 0 level # we always know that line 0 is indented at the 0 level
return True return True
tokens = self.get_tokens(y) tokens = self.get_tokens(y)
if not tokens: #if not tokens:
# if a line has no tokens, we don't know much about its indentation # # if a line has no tokens, we don't know much about its indentation
return False # return False
elif tokens[0].name in self.startlevel_names: #elif tokens[0].name in self.startlevel_names:
if tokens[0].name in self.startlevel_names:
# if a line has no whitespace and beings with something like # if a line has no whitespace and beings with something like
# 'while','class','def','if',etc. then we can start at it # 'while','class','def','if',etc. then we can start at it
return True return True
@ -86,8 +87,8 @@ class PythonTabber(tab2.StackTabber):
self._pop() self._pop()
# if we haven't reached the target-line yet, we can detect how many # if we haven't reached the target-line yet, we can detect how many
# levels of unindention, if any, the user chose on previous lines # levels of unindention, if any, the user chose on previous lines
if y < target and tokens: if y < target and len(tokens) > 2:
if self.token_is_whitespace(y, 0): if self.token_is_space(y, 0):
l = len(tokens[0].string) l = len(tokens[0].string)
else: else:
l = 0 l = 0

View File

@ -9,7 +9,8 @@ shell_command = re.compile(r'^[^ ]+')
# whitespace regexes # whitespace regexes
leading_whitespace = re.compile('^ *') leading_whitespace = re.compile('^ *')
trailing_whitespace = re.compile(' *$') trailing_whitespace = re.compile(' *$')
whitespace = re.compile('^ *$') whitespace = re.compile('^[ \n]*$')
space = re.compile('^ *$')
# word regexes # word regexes
word = re.compile('^[A-Za-z0-9_]+$') word = re.compile('^[A-Za-z0-9_]+$')

11
tab2.py
View File

@ -10,7 +10,9 @@ class Marker:
class Tabber: class Tabber:
wsre = regex.whitespace wsre = regex.whitespace
wst = 'null' wst = ('null', 'eol',)
sre = regex.space
st = ('null',)
def __init__(self, m): def __init__(self, m):
self.mode = m self.mode = m
self.lines = {} self.lines = {}
@ -21,9 +23,14 @@ class Tabber:
return self.mode.window.buffer.highlights[self.mode.name()].tokens[y] return self.mode.window.buffer.highlights[self.mode.name()].tokens[y]
def get_token(self, y, i): def get_token(self, y, i):
return self.mode.window.buffer.highlights[self.mode.name()].tokens[y][i] return self.mode.window.buffer.highlights[self.mode.name()].tokens[y][i]
def token_is_whitespace(self, y, i): def token_is_whitespace(self, y, i):
token = self.get_token(y, i) token = self.get_token(y, i)
return token.name == self.wst and self.wsre.match(token.string) return token.name in self.wst and self.wsre.match(token.string)
def token_is_space(self, y, i):
token = self.get_token(y, i)
return token.name in self.st and self.sre.match(token.string)
def get_next_left_token(self, y, i): def get_next_left_token(self, y, i):
tokens = self.get_tokens(y) tokens = self.get_tokens(y)
assert i >= 0 and i < len(tokens) assert i >= 0 and i < len(tokens)