From eb37a919b7129a6642c2fa4c1da1f5075e22dc5a Mon Sep 17 00:00:00 2001 From: moculus Date: Wed, 11 Jul 2007 16:20:33 +0000 Subject: [PATCH] experimental lexing stuff! --HG-- branch : pmacs2 --- lex2.py | 83 ++++++++++++++++++++++++++++++------------------ mode_python.py | 8 ++--- tab_c.py | 86 -------------------------------------------------- 3 files changed, 57 insertions(+), 120 deletions(-) delete mode 100644 tab_c.py diff --git a/lex2.py b/lex2.py index 3bccf6e..22f3b60 100755 --- a/lex2.py +++ b/lex2.py @@ -63,6 +63,8 @@ class Rule: raise Exception, "%s rule cannot match!" % self.name def make_token(self, lexer, s, name, parent=None, matchd={}): return Token(name, self, lexer.y, lexer.x, s, parent, matchd) + def _get_line(self, lexer): + return lexer.lines[lexer.y] + '\n' def _set_group(self, group): if group is None: self.group = self.name @@ -78,7 +80,8 @@ class ConstantRule(Rule): self.length = len(self.constant) self._set_group(group) def match(self, lexer, parent): - if lexer.lines[lexer.y][lexer.x:].startswith(self.constant): + line = self._get_line(lexer) + if line[lexer.x:].startswith(self.constant): token = self.make_token(lexer, self.constant, self.name, parent) lexer.add_token(token) lexer.x += self.length @@ -102,7 +105,8 @@ class PatternRule(Rule): lexer.add_token(token) lexer.x += len(s) def match(self, lexer, parent): - m = self.re.match(lexer.lines[lexer.y], lexer.x) + line = self._get_line(lexer) + m = self.re.match(line, lexer.x) if m: self._match(lexer, parent, m) return True @@ -127,7 +131,8 @@ class ContextPatternRule(PatternRule): r = re.compile(self.pattern % parent.matchd) except KeyError: r = self.fallback_re - m = r.match(lexer.lines[lexer.y], lexer.x) + line = self._get_line(lexer) + m = r.match(line, lexer.x) if m: self._match(lexer, parent, m) return True @@ -157,7 +162,8 @@ class RegionRule(Rule): return True def match(self, lexer, parent): - m = self.start_re.match(lexer.lines[lexer.y], lexer.x) + line = self._get_line(lexer) + m = self.start_re.match(line, lexer.x) if m: self._match(lexer, parent, m, []) return True @@ -208,15 +214,16 @@ class RegionRule(Rule): # if this line is empty, then we skip it, but here we insert # an empty null token just so we have something - if not reenter and len(lexer.lines[lexer.y]) == 0: - null_t = Token('null', None, lexer.y, lexer.x, '', parent) - lexer.add_token(null_t) - null_t = None + #if not reenter and len(lexer.lines[lexer.y]) == 0: + # null_t = Token('null', None, lexer.y, lexer.x, '', parent) + # lexer.add_token(null_t) + # null_t = None # ok, as long as we haven't found the end token, and have more # data on the current line to read, we will process tokens - while (not done and lexer.y == old_y and - lexer.x < len(lexer.lines[lexer.y])): + #while (not done and lexer.y == old_y and + # lexer.x < len(lexer.lines[lexer.y])): + while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1: # if we are reentering mid-parse, then that takes precedence if reenter: reenter = False @@ -225,14 +232,17 @@ class RegionRule(Rule): null_t = None if lexer.y >= len(lexer.lines): return True - elif lexer.x >= len(lexer.lines[lexer.y]): + #elif lexer.x >= len(lexer.lines[lexer.y]): + elif lexer.x >= len(lexer.lines[lexer.y]) + 1: lexer.y += 1 lexer.x = 0 + line = self._get_line(lexer) + # if we are looking for an end token, then see if we've # found it. if so, then we are done! if self.end: - m = end_re.match(lexer.lines[lexer.y], lexer.x) + m = end_re.match(line, lexer.x) if m: self._add_from_regex('end', lexer, parent, m, {}) done = True @@ -247,6 +257,7 @@ class RegionRule(Rule): null_t = None break + # if we never found a token, then we need to add another # character to the current null token (which we should # create if it isn't set). @@ -254,8 +265,10 @@ class RegionRule(Rule): if null_t is None: null_t = Token('null', None, lexer.y, lexer.x, '', parent) lexer.add_token(null_t) - if len(lexer.lines[lexer.y]) > lexer.x: - null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) + #if len(lexer.lines[lexer.y]) > lexer.x: + if lexer.x < len(line): + #null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) + null_t.add_to_string(line[lexer.x]) lexer.x += 1 # ok, since we're soon going to be on a different line (or @@ -311,7 +324,8 @@ class DualRegionRule(Rule): return True def match(self, lexer, parent): # see if we can match our start token - m = self.start_re.match(lexer.lines[lexer.y], lexer.x) + line = self._get_line(lexer) + m = self.start_re.match(line, lexer.x) if m: t1 = self._add_from_regex('start', lexer, parent, m, m.groupdict()) t2 = self._match_first(lexer, t1, []) @@ -341,14 +355,15 @@ class DualRegionRule(Rule): # if this line is empty, then we will skip it, but here we insert # an empty null token just so we have something - if len(lexer.lines[lexer.y]) == 0: - null_t = Token('null', None, lexer.y, lexer.x, '', parent) - lexer.add_token(null_t) - null_t = None + #if len(lexer.lines[lexer.y]) == 0: + # null_t = Token('null', None, lexer.y, lexer.x, '', parent) + # lexer.add_token(null_t) + # null_t = None # ok, as long as we haven't found the end token, and have more # data on the current line to read, we will process tokens - while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]): + #while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]): + while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1: # if we are reentering mid-parse, then that takes precedence if reenter: raise Exception, "aw damn1" @@ -361,9 +376,11 @@ class DualRegionRule(Rule): #null_t = None #break + line = self._get_line(lexer) + # see if we have found the middle token. if so, we can then # proceed to "stage 2" - m2 = middle_re.match(lexer.lines[lexer.y], lexer.x) + m2 = middle_re.match(line, lexer.x) if m2: d2 = dict(d1.items() + m2.groupdict().items()) t2 = self._add_from_regex('middle', lexer, parent, m2, d2) @@ -386,7 +403,8 @@ class DualRegionRule(Rule): if null_t is None: null_t = Token('null', None, lexer.y, lexer.x, '', parent) lexer.add_token(null_t) - null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) + #null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) + null_t.add_to_string(line[lexer.x]) lexer.x += 1 # ok, since we're soon going to be on a different line (or @@ -433,17 +451,18 @@ class DualRegionRule(Rule): # if this line is empty, then we will skip it, but here weinsert # an empty null token just so we have something - if len(lexer.lines[lexer.y]) == 0: - null_t = Token('null', None, lexer.y, lexer.x, '', parent) - lexer.add_token(null_t) - null_t = None + #if len(lexer.lines[lexer.y]) == 0: + # null_t = Token('null', None, lexer.y, lexer.x, '', parent) + # lexer.add_token(null_t) + # null_t = None # ok, as long as we haven't found the end token, and have more # data on the current line to read, we will process tokens - while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]): + while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1: # see if we have found the middle token. if so, we can then # proceed to "stage 2" - m3 = end_re.match(lexer.lines[lexer.y], lexer.x) + line = self._get_line(lexer) + m3 = end_re.match(line, lexer.x) if m3: t3 = self._add_from_regex('end', lexer, parent, m3, {}) done = True @@ -465,7 +484,8 @@ class DualRegionRule(Rule): if null_t is None: null_t = Token('null', None, lexer.y, lexer.x, '', parent) lexer.add_token(null_t) - null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) + #null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) + null_t.add_to_string(line[lexer.x]) lexer.x += 1 # ok, since we're soon going to be on a different line (or @@ -556,7 +576,8 @@ class Lexer: return self.tokens.pop(0) while self.y < len(self.lines): - line = self.lines[self.y] + #line = self.lines[self.y] + line = self.lines[self.y] + '\n' while self.x < len(line): curr_t = None for rule in self.grammar.rules: @@ -566,6 +587,8 @@ class Lexer: if null_t is None: null_t = Token('null', None, self.y, self.x, '') self.add_token(null_t) + #assert line[self.x] != '\n', "DAMN" + #assert line[self.x] != '$', "DAMN" null_t.add_to_string(line[self.x]) self.x += 1 null_t = None diff --git a/mode_python.py b/mode_python.py index af79e37..85feee4 100644 --- a/mode_python.py +++ b/mode_python.py @@ -2,7 +2,7 @@ import commands, os.path, sets, string import color, completer, default, mode2, lex2, method, regex, tab2 import ctag_python from point2 import Point -from lex2 import Grammar, PatternRule, RegionRule +from lex2 import Grammar, PatternRule, RegionRule, ConstantRule class StringGrammar(Grammar): rules = [ @@ -33,7 +33,8 @@ class PythonGrammar(Grammar): RegionRule(r'string', r'"', StringGrammar, r'"'), RegionRule(r'string', r"'", StringGrammar, r"'"), PatternRule(r'comment', r'#.*$'), - PatternRule(r'continuation', r'\\$'), + PatternRule(r'continuation', r'\\\n$'), + PatternRule(r'eol', r'\n$'), ] class PythonTabber(tab2.StackTabber): @@ -125,7 +126,7 @@ class PythonTabber(tab2.StackTabber): # since we're done with the string, resume our indentation level self._opt_pop('string') elif fqname == 'delimiter': - # we only reall care about a colon as part of a one-line statement, + # we only really care about a colon as part of a one-line statement, # i.e. "while ok: foo()" or "if True: print 3" if token.string == ':': if self.markers and self.markers[-1].name in ('[', '{'): @@ -183,7 +184,6 @@ class Python(mode2.Fundamental): # highlighting self.colors = { 'keyword': color.build('cyan', 'default'), - #'reserved': color.build('cyan', 'default'), 'reserved': color.build('magenta', 'default'), 'builtin': color.build('cyan', 'default'), 'functionname': color.build('blue', 'default'), diff --git a/tab_c.py b/tab_c.py deleted file mode 100644 index 7b6fcfd..0000000 --- a/tab_c.py +++ /dev/null @@ -1,86 +0,0 @@ -import tab, point - -class CTabber(tab.TokenStackTabber): - close_tags = {')': '(', - ']': '[', - '}': '{'} - - def stack_append_const(self, c): - self.stack_append((c, self.tab_stack[-1][1] + 4)) - def stack_append_unique_const(self, c): - if self.tab_stack[-1][0] != c: - self.stack_append((c, self.tab_stack[-1][1] + 4)) - def stack_pop_const(self, *c_args): - if self.tab_stack[-1][0] in c_args: - self.stack_pop() - def stack_pop_all_const(self, *c_args): - while self.tab_stack[-1][0] in c_args: - self.stack_pop() - - def handle_token(self, prev_token, token, next_token, y=None): - buffer = self.mode.window.buffer - name = token.name - s = token.string - - if name == "c comment": - if self.tab_stack[-1][0] != "c comment": - self.stack_append(("c comment", self.tab_stack[-1][1])) - else: - self.line_depth += 1 - p = point.Point(len(buffer.lines[self.y]), self.y) - offset = buffer.get_point_offset(p) - if token.end <= offset or next_token is not None: - self.stack_pop() - elif name == "macro": - self.line_depth -= 4 - elif name == "operator" and next_token is None: - self.stack_append_const_unique("cont") - elif name == "label": - self.line_depth -= 4 - #self.line_depth = 0 - elif name == "keyword": - if (s == "do" or - s == "else" or - s == "for" or - s == "if" or - s == "while"): - self.stack_append_const("block") - elif s == "case": - if prev_token is None: - self.line_depth -= 4 - elif name == "delimiter": - if s == "{" or s == "(" or s == "[": - if s == "{": - if prev_token is None and self.tab_stack[-1][0] == "block": - self.line_depth -= 4 - self.stack_pop_const("block") - #self.stack_pop_const("block", "cont") - else: - self.stack_pop_const("cont") - if next_token is None: - self.stack_append((s, self.tab_stack[-1][1] + 4)) - else: - p = buffer.get_offset_point(next_token.start) - self.stack_append((s, p.x)) - elif s == "}" or s == ")" or s == "]": - if s == "}": - self.stack_pop_all_const("block", "cont") - else: - self.stack_pop_all_const("cont") - if self.tab_stack[-1][0] == self.close_tags[s]: - self.stack_pop() - if prev_token is None: - self.line_depth = self.tab_stack[-1][1] - elif self.errors is False: - err = "tag mismatch, line %d: expected %r, got %r" % \ - (self.y, self.tab_stack[-1][0], s) - self.mode.window.application.set_error(err) - self.errors = True - if s == "}": - self.stack_pop_all_const("block", "cont") - elif (s == "=" or s == "?") and next_token is None: - self.stack_append_const_unique("cont") - elif s == ',': - self.stack_pop_all_const("cont") - elif s == ';': - self.stack_pop_all_const("block", "cont")