From 1fbc9d4ceb267c7f119cfa35d1918ed2ffd5c9e4 Mon Sep 17 00:00:00 2001 From: moculus Date: Wed, 11 Jul 2007 19:36:52 +0000 Subject: [PATCH] big lexing improvements --HG-- branch : pmacs2 --- application.py | 8 +++++++- mode_perl.py | 51 +++++++++++++++++++++++++++++--------------------- mode_python.py | 13 +++++++------ regex.py | 3 ++- tab2.py | 11 +++++++++-- 5 files changed, 55 insertions(+), 31 deletions(-) diff --git a/application.py b/application.py index 09ec245..1a15b13 100755 --- a/application.py +++ b/application.py @@ -606,6 +606,11 @@ class Application(object): if y < len(w.buffer.lines): while j < len(highlighter.tokens[y]): token = highlighter.tokens[y][j] + if token.string.endswith('\n'): + tstring = token.string[:-1] + else: + tstring = token.string + assert token.y == y, '%d == %d' % (token.y, y) s_offset = max(x - token.x, 0) @@ -613,7 +618,8 @@ class Application(object): assert x_offset <= slot.width, '%d <= %d' % (x_offset, slot.width) c = self._get_token_color(w, token) - s = token.string[s_offset:] + #s = token.string[s_offset:] + s = tstring[s_offset:] token_done = x_offset + len(s) <= slot.width token_wrap = x_offset + len(s) > slot.width self.win.addstr(slot.offset + count, x_offset, s[:slot.width - x_offset], c) diff --git a/mode_perl.py b/mode_perl.py index e415168..ad65805 100644 --- a/mode_perl.py +++ b/mode_perl.py @@ -6,11 +6,16 @@ from method import Argument, Method class PodGrammar(Grammar): rules = [ - PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'), - PatternRule(r'entry', r'(?<=^=over) +.*$'), - PatternRule(r'entry', r'(?<=^=item) +.*$'), - PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'), - PatternRule(r'entry', r'(?<=^=encoding) +.*$'), + RegionRule(r'entry', r'(?<=^=head[1-4]) +.*$', Grammar, '^\n$'), + RegionRule(r'entry', r'(?<=^=over) +.*$', Grammar, '^\n$'), + RegionRule(r'entry', r'(?<=^=item) +.*$', Grammar, '^\n$'), + RegionRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$', Grammar, '^\n$'), + RegionRule(r'entry', r'(?<=^=encoding) +.*$', Grammar, '^\n$'), + #PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'), + #PatternRule(r'entry', r'(?<=^=over) +.*$'), + #PatternRule(r'entry', r'(?<=^=item) +.*$'), + #PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'), + #PatternRule(r'entry', r'(?<=^=encoding) +.*$'), ] class StringGrammar(Grammar): @@ -92,7 +97,9 @@ class PerlGrammar(Grammar): PatternRule(r'operator', r"\+=|-=|\*=|/=|//=|%=|&=\|\^=|>>=|<<=|\*\*="), PatternRule(r'operator', r"\+\+|\+|<=>|<>|<<|<=|<|-|>>|>=|>|\*\*|\*|&&|&|\|\||\||/|\^|==|//|~|=~|!~|!=|%|!|\.|x(?![a-zA-Z_])"), PatternRule(r'operator2', r"(?:xor|or|not|ne|lt|le|gt|ge|eq|cmp|and)(?![a-zA-Z_])"), - PatternRule(r'bareword', r'(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*') + PatternRule(r'bareword', r'(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*'), + + ] class PerlTabber(tab2.StackTabber): @@ -216,21 +223,23 @@ class Perl(mode2.Fundamental): 'method': color.build('cyan', 'default'), # heredoc - 'heredoc1.start': color.build('green', 'default'), - 'heredoc1.null': color.build('green', 'default'), - 'heredoc1.end': color.build('green', 'default'), - 'heredoc2.start': color.build('green', 'default'), - 'heredoc2.null': color.build('green', 'default'), - 'heredoc2.end': color.build('green', 'default'), + 'heredoc1.start': color.build('green', 'default'), + 'heredoc1.null': color.build('green', 'default'), + 'heredoc1.end': color.build('green', 'default'), + 'heredoc2.start': color.build('green', 'default'), + 'heredoc2.null': color.build('green', 'default'), + 'heredoc2.end': color.build('green', 'default'), 'eval_heredoc.start': color.build('cyan', 'default'), 'eval_heredoc.null': color.build('cyan', 'default'), 'eval_heredoc.end': color.build('cyan', 'default'), # pod - 'pod.start': color.build('red', 'default'), - 'pod.null': color.build('red', 'default'), - 'pod.entry': color.build('magenta', 'default'), - 'pod.end': color.build('red', 'default'), + 'pod.start': color.build('red', 'default'), + 'pod.null': color.build('red', 'default'), + 'pod.entry.start': color.build('magenta', 'default'), + 'pod.entry.null': color.build('magenta', 'default'), + 'pod.entry.end': color.build('magenta', 'default'), + 'pod.end': color.build('red', 'default'), # "" strings 'string1.start': color.build('green', 'default'), @@ -245,11 +254,11 @@ class Perl(mode2.Fundamental): 'string2.end': color.build('green', 'default'), # `` strings - 'evalstring.start': color.build('cyan', 'default'), - 'evalstring.null': color.build('cyan', 'default'), - 'string1.escaped': color.build('magenta', 'default'), - 'string1.deref': color.build('yellow', 'default'), - 'evalstring.end': color.build('cyan', 'default'), + 'evalstring.start': color.build('cyan', 'default'), + 'evalstring.null': color.build('cyan', 'default'), + 'evalstring.escaped': color.build('magenta', 'default'), + 'evalstring.deref': color.build('yellow', 'default'), + 'evalstring.end': color.build('cyan', 'default'), # quoted region 'quoted': color.build('cyan', 'default'), diff --git a/mode_python.py b/mode_python.py index 85feee4..d1b96b5 100644 --- a/mode_python.py +++ b/mode_python.py @@ -49,10 +49,11 @@ class PythonTabber(tab2.StackTabber): # we always know that line 0 is indented at the 0 level return True tokens = self.get_tokens(y) - if not tokens: - # if a line has no tokens, we don't know much about its indentation - return False - elif tokens[0].name in self.startlevel_names: + #if not tokens: + # # if a line has no tokens, we don't know much about its indentation + # return False + #elif tokens[0].name in self.startlevel_names: + if tokens[0].name in self.startlevel_names: # if a line has no whitespace and beings with something like # 'while','class','def','if',etc. then we can start at it return True @@ -86,8 +87,8 @@ class PythonTabber(tab2.StackTabber): self._pop() # if we haven't reached the target-line yet, we can detect how many # levels of unindention, if any, the user chose on previous lines - if y < target and tokens: - if self.token_is_whitespace(y, 0): + if y < target and len(tokens) > 2: + if self.token_is_space(y, 0): l = len(tokens[0].string) else: l = 0 diff --git a/regex.py b/regex.py index c8977e8..b560021 100644 --- a/regex.py +++ b/regex.py @@ -9,7 +9,8 @@ shell_command = re.compile(r'^[^ ]+') # whitespace regexes leading_whitespace = re.compile('^ *') trailing_whitespace = re.compile(' *$') -whitespace = re.compile('^ *$') +whitespace = re.compile('^[ \n]*$') +space = re.compile('^ *$') # word regexes word = re.compile('^[A-Za-z0-9_]+$') diff --git a/tab2.py b/tab2.py index 850bb98..058d17a 100644 --- a/tab2.py +++ b/tab2.py @@ -10,7 +10,9 @@ class Marker: class Tabber: wsre = regex.whitespace - wst = 'null' + wst = ('null', 'eol',) + sre = regex.space + st = ('null',) def __init__(self, m): self.mode = m self.lines = {} @@ -21,9 +23,14 @@ class Tabber: return self.mode.window.buffer.highlights[self.mode.name()].tokens[y] def get_token(self, y, i): return self.mode.window.buffer.highlights[self.mode.name()].tokens[y][i] + def token_is_whitespace(self, y, i): token = self.get_token(y, i) - return token.name == self.wst and self.wsre.match(token.string) + return token.name in self.wst and self.wsre.match(token.string) + def token_is_space(self, y, i): + token = self.get_token(y, i) + return token.name in self.st and self.sre.match(token.string) + def get_next_left_token(self, y, i): tokens = self.get_tokens(y) assert i >= 0 and i < len(tokens)