From 30dcf827aa9f287eff36e57991cfb541bfd147b1 Mon Sep 17 00:00:00 2001 From: moculus Date: Mon, 11 Jun 2007 21:40:21 +0000 Subject: [PATCH] --HG-- branch : pmacs2 --- application.py | 10 +- highlight2.py | 25 ++- lex2.py | 472 +++++++++++++++++-------------------------------- 3 files changed, 186 insertions(+), 321 deletions(-) diff --git a/application.py b/application.py index 24c91c5..ea16a16 100755 --- a/application.py +++ b/application.py @@ -485,14 +485,8 @@ class Application: elif token.x >= x + slot.width: break - c = w.mode.colors.get(token.name, w.mode.default_color) -# c = w.mode.default_color -# name_parts = token.name.split('.') -# for i in range(0, len(name_parts)): -# name = '.'.join(name_parts[i:]) -# if name in w.mode.colors: -# c = w.mode.colors[name] -# break + name = token.fqname() + c = w.mode.colors.get(name, w.mode.default_color) if DARK_BACKGROUND: c |= curses.A_BOLD diff --git a/highlight2.py b/highlight2.py index 37d5181..07bd78d 100644 --- a/highlight2.py +++ b/highlight2.py @@ -20,7 +20,6 @@ class Highlighter: def __init__(self, lexer): self.lexer = lexer self.tokens = [] - self.line_contexts = {} def dump(self, fmt='(%3s, %2s) | %s'): print fmt % ('y', 'x', 'string') @@ -50,20 +49,36 @@ class Highlighter: sys.stdout.write(token.string) sys.stdout.write('\n') + def delete_token(self, y, i): + assert y < len(self.tokens), "%d < %d" % (y, len(self.tokens)) + assert i < len(self.tokens[y]), "%d < %d" % (i, len(self.tokens[i])) + deleted = [] + deleted.append(self.tokens[y].pop(i)) + while y < len(self.tokens): + while i < len(self.tokens[y]): + while deleted and self.tokens[y][i].parent is not deleted[-1]: + del deleted[-1] + if not deleted: + return + elif self.tokens[y][i].parent is deleted[-1]: + deleted.append(self.tokens[y].pop(i)) + else: + raise Exception, "huh?? %r %r" % (self.tokens[y][i].parent, + deleted) + i = 0 + y += 1 def highlight(self, lines): self.tokens = [[] for l in lines] self.lexer.lex(lines, y=0, x=0) for token in self.lexer: self.tokens[token.y].append(token) - self.line_contexts = dict(self.lexer.line_contexts) # relexing # ====================== def relex(self, lines, y1, x1, y2, x2): # start the relexing process - #self.lexer.lex(lines, y1, 0) - rulecontexts = self.line_contexts[y1] - self.lexer.resume(lines, y1, 0, rulecontexts) + token = self.tokens[y1][0] + self.lexer.resume(lines, y1, 0, token) # these keep track of the current y coordinate, the current token index # on line[y], and the current "new token", respectively. diff --git a/lex2.py b/lex2.py index 2ad5f1b..e858aaa 100755 --- a/lex2.py +++ b/lex2.py @@ -1,58 +1,68 @@ import re -valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$') +valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$') reserved_names = ['start', 'middle', 'end', 'null'] -class RuleContext: - # to be clear: - # x, y: where the rule processing began - # rule: the rule which began - # flag: a signal to be used to resume the rule correctly - # context: the previous rule namespace(s) - # matchd: the dictionary returned by the rule's matching - def __init__(self, y, x, rule, flag, context, matchd): +class Token(object): + def __init__(self, name, rule=None, y=0, x=0, s="", parent=None, matchd={}): + self.name = name + self.rule = rule self.y = y self.x = x - self.rule = rule - self.flag = flag - self.context = context + self.string = s + self.parent = parent self.matchd = matchd - -class Token(object): - def __init__(self, name, rule, y, x, s, **vargs): - self.name = name - self.rule = rule - self.y = y - self.x = x - self.string = s - self.vargs = vargs + def parents(self): + if self.parent is not None: + parents = self.parent.parents() + parents.append(self.parent) + return parents + else: + return [] + def domain(self): + names = [] + if self.parent is not None: + names.extend(self.parent.domain()) + names.append(self.rule.name) + return names + def fqlist(self): + names = [] + if self.parent is not None: + names.extend(self.parent.domain()) + names.append(self.name) + return names + def fqname(self): + if self.name == 'start': + names = self.domain() + names.append(self.name) + else: + names = self.fqlist() + return '.'.join(names) def copy(self): - return Token(self.name, None, self.y, self.x, self.string, **self.vargs) + return Token(self.name, self.rule, self.y, self.x, self.string, + self.parent, self.matchd) def add_to_string(self, s): self.string += s def end_x(self): return self.x + len(self.string) def __eq__(self, other): - return (self.y == other.y and - self.x == other.x and - self.string == other.string and - self.name == other.name and - self.vargs == other.vargs) + return (self.y == other.y and self.x == other.x + and self.name == other.name and self.parent is other.parent and + self.string == other.string) def __repr__(self): if len(self.string) < 10: s = self.string else: s = self.string[:10] + '...' - return "" % (self.name, self.rule, self.y, self.x, s) - def render(self): - return (self,) + fields = (self.fqname(), self.rule, self.y, self.x, s) + return "" % fields class Rule: name = 'abstract' - def match(self, lexer, context=[], d={}): + def match(self, lexer, parent): raise Exception, "%s rule cannot match!" % self.name - def make_token(self, lexer, s, name, **vargs): - return Token(name, self, lexer.y, lexer.x, s, **vargs) + def make_token(self, lexer, s, name, parent=None, matchd={}): + return Token(name, self, lexer.y, lexer.x, s, parent, matchd) class ConstantRule(Rule): def __init__(self, name, constant): @@ -60,11 +70,12 @@ class ConstantRule(Rule): assert name not in reserved_names, "reserved rule name: %r" % name self.name = name self.constant = constant - def match(self, lexer, context=[], d={}): + self.lenth = len(self.constant) + def match(self, lexer, parent): if lexer.lines[lexer.y][lexer.x:].startswith(self.constant): - name = '.'.join(context + [self.name]) - lexer.add_token(self.make_token(lexer, self.constant, name, grammar=lexer.grammar)) - lexer.x += len(self.constant) + token = self.make_token(lexer, self.constant, self.name, parent) + lexer.add_token(token) + lexer.x += self.length return True else: return False @@ -76,17 +87,20 @@ class PatternRule(Rule): self.name = name self.pattern = pattern self.re = re.compile(pattern) - def match(self, lexer, context=[], d={}): + def _match(self, lexer, parent, m): + s = m.group(0) + token = self.make_token(lexer, s, self.name, parent) + lexer.add_token(token) + lexer.x += len(s) + def match(self, lexer, parent): m = self.re.match(lexer.lines[lexer.y], lexer.x) if m: - name = '.'.join(context + [self.name]) - lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar)) - lexer.x += len(m.group(0)) + self._match(lexer, parent, m) return True else: return False -class ContextPatternRule(Rule): +class ContextPatternRule(PatternRule): def __init__(self, name, pattern, fallback): assert valid_name_re.match(name), 'invalid name %r' % name assert name not in reserved_names, "reserved rule name: %r" % name @@ -94,16 +108,14 @@ class ContextPatternRule(Rule): self.pattern = pattern self.fallback = fallback self.fallback_re = re.compile(fallback) - def match(self, lexer, context=[], d={}): + def match(self, lexer, parent): try: - r = re.compile(self.pattern % d) + r = re.compile(self.pattern % parent.matchd) except KeyError: r = self.fallback_re m = r.match(lexer.lines[lexer.y], lexer.x) if m: - name = '.'.join(context + [self.name]) - lexer.add_token(self.make_token(lexer, m.group(0), name, grammar=lexer.grammar)) - lexer.x += len(m.group(0)) + self._match(lexer, parent, m) return True else: return False @@ -117,42 +129,50 @@ class RegionRule(Rule): self.grammar = grammar self.end = end self.start_re = re.compile(start) - def _add_from_regex(self, context, name, lexer, m, grammar): - t_name = '.'.join(context + [self.name, name]) - t = self.make_token(lexer, m.group(0), t_name, grammar=grammar) - lexer.add_token(t) - lexer.x += len(m.group(0)) - def resume(self, lexer, context, flag, d, rulecontexts): - assert rulecontexts, "can't resume without rulecontexts!" - self._match(lexer, context, d, None, rulecontexts) + def resume(self, lexer, toresume): + assert toresume, "can't resume without tokens to resume!" + self._match(lexer, None, None, toresume) return True - def match(self, lexer, context=[], d={}): - # see if we can match our start token + def match(self, lexer, parent): m = self.start_re.match(lexer.lines[lexer.y], lexer.x) if m: - # region was match, so let's do this - return self._match(lexer, context, m.groupdict(), m, []) + self._match(lexer, parent, m, []) + return True else: - # region was not matched; we never started. so return false return False - def _match(self, lexer, context, d, m, rulecontext=[]): - # if we have been given rulecontext, then we are going to "resume" a - # parse that can already be assumed to have started - reenter = len(rulecontext) > 0 - assert m or reenter + def _add_from_regex(self, name, lexer, parent, m, matchd={}): + s = m.group(0) + token = self.make_token(lexer, s, name, parent, matchd) + lexer.add_token(token) + lexer.x += len(s) + return token - # first let's do some bookkeeping - lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start', - list(context), dict(d))) - if m is not None: - self._add_from_regex(context, 'start', lexer, m, lexer.grammar) - null_t_name = '.'.join(context + [self.name, 'null']) + def _match(self, lexer, parent, m, toresume=[]): + # we either need a match object, or a token to resume + assert m or len(toresume) > 0 + + if m: + # if we had a match, then it becomes the parent, and we save its + # subgroup dict + d = m.groupdict() + parent = self._add_from_regex('start', lexer, parent, m, d) + else: + # otherwise, we should be resuming the start token, so let's pull + # the relevant info out of the token + parent = toresume[0] + d = parent.matchd + assert parent.name == 'start' null_t = None - # if we have an end token, then build it here. notice that it can + # this determines whether we are still reentering. if len(toresume) == 1 + # then it means that we have been reentering but will not continue, so + # reenter will be false. + reenter = len(toresume) > 1 + + # if we have an end regex, then build it here. notice that it can # reference named groups from the start token. if we have no end, # well, then, we're never getting out of here alive! if self.end: @@ -167,21 +187,19 @@ class RegionRule(Rule): # if this line is empty, then we skip it, but here we insert # an empty null token just so we have something if not reenter and len(lexer.lines[lexer.y]) == 0: - null_t = Token(null_t_name, None, lexer.y, lexer.x, '') + null_t = Token('null', None, lexer.y, lexer.x, '', parent) lexer.add_token(null_t) null_t = None # ok, as long as we haven't found the end token, and have more # data on the current line to read, we will process tokens - while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]): + while (not done and lexer.y == old_y and + lexer.x < len(lexer.lines[lexer.y])): # if we are reentering mid-parse, then that takes precedence if reenter: reenter = False - rule2 = rulecontext[0].rule - context2 = rulecontext[0].context - d2 = rulecontext[0].matchd - assert rule2.resume(lexer, context2, d2, rulecontext[1:]), \ - "%r %r %r %r" % (lexer, context2, d2, rulecontext[1:]) + rule2 = toresume[1].rule + rule2.resume(lexer, toresume[1:]) found = True null_t = None break @@ -191,7 +209,7 @@ class RegionRule(Rule): if self.end: m = end_re.match(lexer.lines[lexer.y], lexer.x) if m: - self._add_from_regex(context, 'end', lexer, m, None) + self._add_from_regex('end', lexer, parent, m, {}) done = True break @@ -199,7 +217,7 @@ class RegionRule(Rule): # find a token, note that we found one and exit the loop found = False for rule in self.grammar.rules: - if rule.match(lexer, context + [self.name], d): + if rule.match(lexer, parent): found = True null_t = None break @@ -209,7 +227,7 @@ class RegionRule(Rule): # create if it isn't set). if not found: if null_t is None: - null_t = Token(null_t_name, None, lexer.y, lexer.x, '') + null_t = Token('null', None, lexer.y, lexer.x, '', parent) lexer.add_token(null_t) null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) lexer.x += 1 @@ -223,12 +241,9 @@ class RegionRule(Rule): # then that means we're finished with the line and should move # on to the next one here if not done and old_y == lexer.y: - lexer.save_context() lexer.y += 1 lexer.x = 0 - # alright, we're finally done procesing the region, so return true - lexer.context.pop(-1) return True class DualRegionRule(Rule): @@ -242,52 +257,49 @@ class DualRegionRule(Rule): self.grammar2 = grammar2 self.end = end self.start_re = re.compile(start) - def _add_from_regex(self, context, name, lexer, m, grammar=None): - t_name = '.'.join(context + [self.name, name]) - t = self.make_token(lexer, m.group(0), t_name, grammar=grammar) - lexer.add_token(t) - lexer.x += len(m.group(0)) - - def resume(self, lexer, context, flag, d, rulecontexts): - if flag == 'start': - d2 = self._match_first(lexer, context, d, None, rulecontexts) - d3 = dict(d.items() + d2.items()) - self._match_second(lexer, context, d3, None, rulecontexts) - return True - elif flag == 'middle': - self._match_second(lexer, context, flag, d, None, rulecontexts) - return True + def _add_from_regex(self, name, lexer, parent, m, matchd={}): + s = m.group(0) + token = self.make_token(lexer, s, name, parent, matchd) + lexer.add_token(token) + lexer.x += len(s) + return token + def resume(self, lexer, toresume): + assert toresume + token = toresume[0] + d = token.matchd + if token.name == 'start': + stoken = toresume[0] + mtoken = self._match_first(lexer, stoken, None, toresume) + self._match_second(lexer, mtoken, []) + elif token.name == 'middle': + d3 = token.matchd + self._match_second(lexer, token.parent, d3, toresume) else: raise Exception, "invalid flag %r" % flag - - def match(self, lexer, context=[], d={}): + return True + def match(self, lexer, parent): # see if we can match our start token m = self.start_re.match(lexer.lines[lexer.y], lexer.x) if m: # region was match, so let's do this d1 = m.groupdict() - d2 = self._match_first(lexer, context, d1, m, []) + d2 = self._match_first(lexer, parent, m, []) d3 = dict(d1.items() + d2.items()) - self._match_second(lexer, context, d3, None, []) + self._match_second(lexer, parent, d3, None, []) return True else: # region was not matched; we never started. so return false return False - def _match_first(self, lexer, context, d1, m1, rulecontext=[]): + def _match_first(self, lexer, context, d1, m1, toresume=[]): # if we have been given rulecontext, then we are going to "resume" a # parse that can already be assumed to have started - reenter = len(rulecontext) > 0 + reenter = len(toresume) > 1 assert m1 or reenter - # first let's do some bookkeeping - lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start', - list(context), dict(d1))) - # ok, so create our start token, and get ready to start reading data if m1 is not None: - self._add_from_regex(context, 'start', lexer, m1, lexer.grammar) - null_t_name = '.'.join(context + [self.name, 'null']) + self._add_from_regex('start', lexer, parent, m1, m1.groupdict()) null_t = None middle_re = re.compile(self.middle % d1) @@ -302,7 +314,7 @@ class DualRegionRule(Rule): # if this line is empty, then we will skip it, but here weinsert # an empty null token just so we have something if len(lexer.lines[lexer.y]) == 0: - null_t = Token(null_t_name, None, lexer.y, lexer.x, '') + null_t = Token('null', None, lexer.y, lexer.x, '') lexer.add_token(null_t) null_t = None @@ -311,22 +323,22 @@ class DualRegionRule(Rule): while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]): # if we are reentering mid-parse, then that takes precedence if reenter: - reenter = False - xrule = rulecontext[0].rule - xcontext = rulecontext[0].context - xd = rulecontext[0].matchd - assert rule2.resume(lexer, xcontext, xd, rulecontext[1:]), \ - "%r %r %r %r" % (lexer, xcontext, xd, rulecontext[1:]) - found = True - null_t = None - break + raise Exception, "aw damn" + #reenter = False + #xrule = rulecontext[0].rule + #xd = rulecontext[0].matchd + #assert rule2.resume(lexer, xcontext, xd, rulecontext[1:]), \ + # "%r %r %r %r" % (lexer, xcontext, xd, rulecontext[1:]) + #found = True + #null_t = None + #break # see if we have found the middle token. if so, we can then # proceed to "stage 2" m2 = middle_re.match(lexer.lines[lexer.y], lexer.x) if m2: d2 = m2.groupdict() - self._add_from_regex(context, 'middle', lexer, m2, None) + self._add_from_regex('middle', lexer, parent, m2, {}) done = True break @@ -334,7 +346,7 @@ class DualRegionRule(Rule): # find a token, note that we found one and exit the loop found = False for rule in self.grammar1.rules: - if rule.match(lexer, context + [self.name], d1): + if rule.match(lexer, parent): found = True null_t = None break @@ -344,7 +356,7 @@ class DualRegionRule(Rule): # create if it isn't set). if not found: if null_t is None: - null_t = Token(null_t_name, None, lexer.y, lexer.x, '') + null_t = Token('null', None, lexer.y, lexer.x, '') lexer.add_token(null_t) null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) lexer.x += 1 @@ -361,21 +373,17 @@ class DualRegionRule(Rule): lexer.save_context() lexer.y += 1 lexer.x = 0 - lexer.context.pop(-1) return d2 - def _match_second(self, lexer, context, d3, m, rulecontext=[]): + def _match_second(self, lexer, context, d3, m, toresume=[]): # if we have been given rulecontext, then we are going to "resume" a # parse that can already be assumed to have started - reenter = len(rulecontext) > 0 + reenter = len(toresume) > 1 # ok stage 2 is like stage 1, only we are looking for end tokens # instead of middle tokens - null_t_name = '.'.join(context + [self.name, 'null']) null_t = None end_re = re.compile(self.end % d3) - lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'middle', - list(context), dict(d3))) # ok, so as long as we aren't done (we haven't found an end token), # keep reading input @@ -385,20 +393,20 @@ class DualRegionRule(Rule): # if we are reentering mid-parse, then that takes precedence if reenter: - reenter = False - xrule = rulecontext[0].rule - xcontext = rulecontext[0].context - xd = rulecontext[0].matchd - assert rule2.resume(lexer, xcontext, xd, rulecontext[1:]), \ - "%r %r %r %r" % (lexer, xcontext, xd, rulecontext[1:]) - found = True - null_t = None - break + raise Exception, "aw damn" + #reenter = False + #xrule = rulecontext[0].rule + #xd = rulecontext[0].matchd + #assert rule2.resume(lexer, xcontext, xd, rulecontext[1:]), \ + # "%r %r %r %r" % (lexer, xcontext, xd, rulecontext[1:]) + #found = True + #null_t = None + #break # if this line is empty, then we will skip it, but here weinsert # an empty null token just so we have something if len(lexer.lines[lexer.y]) == 0: - null_t = Token(null_t_name, None, lexer.y, lexer.x, '') + null_t = Token('null', None, lexer.y, lexer.x, '') lexer.add_token(null_t) null_t = None @@ -409,7 +417,7 @@ class DualRegionRule(Rule): # proceed to "stage 2" m3 = end_re.match(lexer.lines[lexer.y], lexer.x) if m3: - self._add_from_regex(context, 'end', lexer, m3, None) + self._add_from_regex('end', lexer, parent, m3, {}) done = True break @@ -417,7 +425,7 @@ class DualRegionRule(Rule): # find a token, note that we found one and exit the loop found = False for rule in self.grammar2.rules: - if rule.match(lexer, context + [self.name], d3): + if rule.match(lexer, parent): found = True null_t = None break @@ -427,7 +435,7 @@ class DualRegionRule(Rule): # create if it isn't set). if not found: if null_t is None: - null_t = Token(null_t_name, None, lexer.y, lexer.x, '') + null_t = Token('null', None, lexer.y, lexer.x, '') lexer.add_token(null_t) null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) lexer.x += 1 @@ -446,150 +454,8 @@ class DualRegionRule(Rule): lexer.x = 0 # alright, we're finally done processing; return true - lexer.context.pop(-1) return True -# def matchOLD(self, lexer, context=[], d={}): -# m1 = self.start_re.match(lexer.lines[lexer.y], lexer.x) -# # see if we can match out start token -# if m1: -# # ok, so create our start token, and get ready to start reading data -# self._add_from_regex(context, 'start', lexer, m1, lexer.grammar) -# null_t_name = '.'.join(context + [self.name, 'null']) -# null_t = None -# -# d1 = m1.groupdict() -# lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'start', -# list(context), dict(d1))) -# d2 = {} -# middle_re = re.compile(self.middle % d1) -# -# # ok, so as long as we aren't done (we haven't found an end token), -# # keep reading input -# done = False -# while not done and lexer.y < len(lexer.lines): -# old_y = lexer.y -# # if this line is empty, then we will skip it, but here weinsert -# # an empty null token just so we have something -# if len(lexer.lines[lexer.y]) == 0: -# null_t = Token(null_t_name, None, lexer.y, lexer.x, '') -# lexer.add_token(null_t) -# null_t = None -# -# # ok, as long as we haven't found the end token, and have more -# # data on the current line to read, we will process tokens -# while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]): -# # see if we have found the middle token. if so, we can then -# # proceed to "stage 2" -# m2 = middle_re.match(lexer.lines[lexer.y], lexer.x) -# if m2: -# d2 = m2.groupdict() -# self._add_from_regex(context, 'middle', lexer, m2, None) -# done = True -# break -# -# # ok, we need to check all our rules now, in order. if we -# # find a token, note that we found one and exit the loop -# found = False -# for rule in self.grammar1.rules: -# if rule.match(lexer, context + [self.name], d1): -# found = True -# null_t = None -# break -# -# # if we never found a token, then we need to add another -# # character to the current null token (which we should -# # create if it isn't set). -# if not found: -# if null_t is None: -# null_t = Token(null_t_name, None, lexer.y, lexer.x, '') -# lexer.add_token(null_t) -# null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) -# lexer.x += 1 -# -# # ok, since we're soon going to be on a different line (or -# # already are), we want a new null token. so forget about the -# # current one. -# null_t = None -# -# # if we're still on the same line at this point (and not done) -# # then that means we're finished with the line and should move -# # on to the next one here -# if not done and old_y == lexer.y: -# lexer.save_context() -# lexer.y += 1 -# lexer.x = 0 -# -# # ok stage 2 is like stage 1, only we are looking for end tokens -# # instead of middle tokens -# d3 = dict(d1.items() + d2.items()) -# end_re = re.compile(self.end % d3) -# lexer.context.pop(-1) -# lexer.context.append(RuleContext(lexer.y, lexer.x, self, 'middle', -# list(context), dict(d3))) -# -# # ok, so as long as we aren't done (we haven't found an end token), -# # keep reading input -# done = False -# while not done and lexer.y < len(lexer.lines): -# old_y = lexer.y -# # if this line is empty, then we will skip it, but here weinsert -# # an empty null token just so we have something -# if len(lexer.lines[lexer.y]) == 0: -# null_t = Token(null_t_name, None, lexer.y, lexer.x, '') -# lexer.add_token(null_t) -# null_t = None -# -# # ok, as long as we haven't found the end token, and have more -# # data on the current line to read, we will process tokens -# while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]): -# # see if we have found the middle token. if so, we can then -# # proceed to "stage 2" -# m3 = end_re.match(lexer.lines[lexer.y], lexer.x) -# if m3: -# self._add_from_regex(context, 'end', lexer, m3, None) -# done = True -# break -# -# # ok, we need to check all our rules now, in order. if we -# # find a token, note that we found one and exit the loop -# found = False -# for rule in self.grammar2.rules: -# if rule.match(lexer, context + [self.name], d3): -# found = True -# null_t = None -# break -# -# # if we never found a token, then we need to add another -# # character to the current null token (which we should -# # create if it isn't set). -# if not found: -# if null_t is None: -# null_t = Token(null_t_name, None, lexer.y, lexer.x, '') -# lexer.add_token(null_t) -# null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) -# lexer.x += 1 -# -# # ok, since we're soon going to be on a different line (or -# # already are), we want a new null token. so forget about the -# # current one. -# null_t = None -# -# # if we're still on the same line at this point (and not done) -# # then that means we're finished with the line and should move -# # on to the next one here -# if not done and old_y == lexer.y: -# lexer.save_context() -# lexer.y += 1 -# lexer.x = 0 -# -# # alright, we're finally done processing; return true -# lexer.context.pop(-1) -# return True -# else: -# # dual region was not matched; we never started. so return false -# return False - class Grammar: rules = [] def __init__(self): @@ -606,9 +472,6 @@ class Lexer: self.lines = None self.tokens = [] - self.context = [] - self.line_contexts = {} - def add_token(self, t): self.tokens.append(t) @@ -618,30 +481,24 @@ class Lexer: self.lines = lines self.tokens = [] - self.context = [] - self.line_contexts = {} + def resume(self, lines, y, x, token): + self.y = y + self.x = x + #self.x = 0 + self.lines = lines + self.tokens = [] - def resume(self, lines, y=0, x=0, rulecontexts=[]): - if len(rulecontexts) == 0: - self.lex(lines, y, x) - else: - self.y = y - self.x = x - self.lines = lines - self.tokens = [] - rc = rulecontexts[0] - rc.rule.resume(self, rc.context, rc.flag, rc.matchd, rulecontexts[1:]) + if token: + toresume = token.parents() + if toresume: + raise Exception, "aw damn" def __iter__(self): if self.lines is None: raise Exception, "no lines to lex" return self - def save_context(self): - self.line_contexts[self.y] = list(self.context) - def next(self): - null_t_name = 'null' null_t = None if self.tokens: @@ -652,15 +509,14 @@ class Lexer: while self.x < len(line): curr_t = None for rule in self.grammar.rules: - if rule.match(self): + if rule.match(self, None): assert self.tokens, "match rendered no tokens?" return self.tokens.pop(0) if null_t is None: - null_t = Token(null_t_name, None, self.y, self.x, '') + null_t = Token('null', None, self.y, self.x, '') self.add_token(null_t) null_t.add_to_string(line[self.x]) self.x += 1 - self.save_context() null_t = None self.y += 1 self.x = 0