big lexing improvements

--HG--
branch : pmacs2
This commit is contained in:
moculus 2007-07-11 19:36:52 +00:00
parent eb37a919b7
commit 1fbc9d4ceb
5 changed files with 55 additions and 31 deletions

View File

@ -606,6 +606,11 @@ class Application(object):
if y < len(w.buffer.lines):
while j < len(highlighter.tokens[y]):
token = highlighter.tokens[y][j]
if token.string.endswith('\n'):
tstring = token.string[:-1]
else:
tstring = token.string
assert token.y == y, '%d == %d' % (token.y, y)
s_offset = max(x - token.x, 0)
@ -613,7 +618,8 @@ class Application(object):
assert x_offset <= slot.width, '%d <= %d' % (x_offset, slot.width)
c = self._get_token_color(w, token)
s = token.string[s_offset:]
#s = token.string[s_offset:]
s = tstring[s_offset:]
token_done = x_offset + len(s) <= slot.width
token_wrap = x_offset + len(s) > slot.width
self.win.addstr(slot.offset + count, x_offset, s[:slot.width - x_offset], c)

View File

@ -6,11 +6,16 @@ from method import Argument, Method
class PodGrammar(Grammar):
rules = [
PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'),
PatternRule(r'entry', r'(?<=^=over) +.*$'),
PatternRule(r'entry', r'(?<=^=item) +.*$'),
PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'),
PatternRule(r'entry', r'(?<=^=encoding) +.*$'),
RegionRule(r'entry', r'(?<=^=head[1-4]) +.*$', Grammar, '^\n$'),
RegionRule(r'entry', r'(?<=^=over) +.*$', Grammar, '^\n$'),
RegionRule(r'entry', r'(?<=^=item) +.*$', Grammar, '^\n$'),
RegionRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$', Grammar, '^\n$'),
RegionRule(r'entry', r'(?<=^=encoding) +.*$', Grammar, '^\n$'),
#PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'),
#PatternRule(r'entry', r'(?<=^=over) +.*$'),
#PatternRule(r'entry', r'(?<=^=item) +.*$'),
#PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'),
#PatternRule(r'entry', r'(?<=^=encoding) +.*$'),
]
class StringGrammar(Grammar):
@ -92,7 +97,9 @@ class PerlGrammar(Grammar):
PatternRule(r'operator', r"\+=|-=|\*=|/=|//=|%=|&=\|\^=|>>=|<<=|\*\*="),
PatternRule(r'operator', r"\+\+|\+|<=>|<>|<<|<=|<|-|>>|>=|>|\*\*|\*|&&|&|\|\||\||/|\^|==|//|~|=~|!~|!=|%|!|\.|x(?![a-zA-Z_])"),
PatternRule(r'operator2', r"(?:xor|or|not|ne|lt|le|gt|ge|eq|cmp|and)(?![a-zA-Z_])"),
PatternRule(r'bareword', r'(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*')
PatternRule(r'bareword', r'(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*'),
]
class PerlTabber(tab2.StackTabber):
@ -229,7 +236,9 @@ class Perl(mode2.Fundamental):
# pod
'pod.start': color.build('red', 'default'),
'pod.null': color.build('red', 'default'),
'pod.entry': color.build('magenta', 'default'),
'pod.entry.start': color.build('magenta', 'default'),
'pod.entry.null': color.build('magenta', 'default'),
'pod.entry.end': color.build('magenta', 'default'),
'pod.end': color.build('red', 'default'),
# "" strings
@ -247,8 +256,8 @@ class Perl(mode2.Fundamental):
# `` strings
'evalstring.start': color.build('cyan', 'default'),
'evalstring.null': color.build('cyan', 'default'),
'string1.escaped': color.build('magenta', 'default'),
'string1.deref': color.build('yellow', 'default'),
'evalstring.escaped': color.build('magenta', 'default'),
'evalstring.deref': color.build('yellow', 'default'),
'evalstring.end': color.build('cyan', 'default'),
# quoted region

View File

@ -49,10 +49,11 @@ class PythonTabber(tab2.StackTabber):
# we always know that line 0 is indented at the 0 level
return True
tokens = self.get_tokens(y)
if not tokens:
# if a line has no tokens, we don't know much about its indentation
return False
elif tokens[0].name in self.startlevel_names:
#if not tokens:
# # if a line has no tokens, we don't know much about its indentation
# return False
#elif tokens[0].name in self.startlevel_names:
if tokens[0].name in self.startlevel_names:
# if a line has no whitespace and beings with something like
# 'while','class','def','if',etc. then we can start at it
return True
@ -86,8 +87,8 @@ class PythonTabber(tab2.StackTabber):
self._pop()
# if we haven't reached the target-line yet, we can detect how many
# levels of unindention, if any, the user chose on previous lines
if y < target and tokens:
if self.token_is_whitespace(y, 0):
if y < target and len(tokens) > 2:
if self.token_is_space(y, 0):
l = len(tokens[0].string)
else:
l = 0

View File

@ -9,7 +9,8 @@ shell_command = re.compile(r'^[^ ]+')
# whitespace regexes
leading_whitespace = re.compile('^ *')
trailing_whitespace = re.compile(' *$')
whitespace = re.compile('^ *$')
whitespace = re.compile('^[ \n]*$')
space = re.compile('^ *$')
# word regexes
word = re.compile('^[A-Za-z0-9_]+$')

11
tab2.py
View File

@ -10,7 +10,9 @@ class Marker:
class Tabber:
wsre = regex.whitespace
wst = 'null'
wst = ('null', 'eol',)
sre = regex.space
st = ('null',)
def __init__(self, m):
self.mode = m
self.lines = {}
@ -21,9 +23,14 @@ class Tabber:
return self.mode.window.buffer.highlights[self.mode.name()].tokens[y]
def get_token(self, y, i):
return self.mode.window.buffer.highlights[self.mode.name()].tokens[y][i]
def token_is_whitespace(self, y, i):
token = self.get_token(y, i)
return token.name == self.wst and self.wsre.match(token.string)
return token.name in self.wst and self.wsre.match(token.string)
def token_is_space(self, y, i):
token = self.get_token(y, i)
return token.name in self.st and self.sre.match(token.string)
def get_next_left_token(self, y, i):
tokens = self.get_tokens(y)
assert i >= 0 and i < len(tokens)