still workin on it

--HG--
branch : pmacs2
This commit is contained in:
moculus 2007-07-14 23:02:42 +00:00
parent 6749e4c1c8
commit a780d9d61a
1 changed files with 40 additions and 35 deletions

75
lex3.py
View File

@ -116,26 +116,27 @@ class RegionRule(Rule):
self.grammar = grammar
self.end = end
self.start_re = re.compile(start, self.reflags)
def resume(self, lexer, toresume):
assert toresume, "can't resume without tokens to resume!"
self._lex(lexer, None, None, toresume)
return True
if not toresume:
raise Exception, "can't resume without tokens to resume!"
for t in self._lex(lexer, None, None, toresume):
yield t
raise StopIteration
def match(self, lexer, parent):
return self.start_re.match(self.get_line(lexer), lexer.x)
def lex(self, lexer, parent, m):
self._lex(lexer, parent, m, [])
def _add_from_regex(self, name, lexer, parent, m, matchd={}):
s = m.group(0)
token = self.make_token(lexer, s, name, parent, matchd)
lexer.add_token(token)
lexer.x += len(s)
return token
for t in self._lex(lexer, parent, m, []):
yield t
raise StopIteration
def _lex(self, lexer, parent, m, toresume=[]):
# this determines whether we are still reentering. if len(toresume) == 1
# then it means that we have been reentering but will not continue, so
# reenter will be false.
reenter = len(toresume) > 1
# we either need a match object, or a token to resume
assert m or len(toresume) > 0
assert m or reenter, "we need a current match, or a previous match"
if m:
# if we had a match, then it becomes the parent, and we save its
@ -148,12 +149,11 @@ class RegionRule(Rule):
parent = toresume[0]
d = parent.matchd
assert parent.name == 'start'
null_t = None
# this determines whether we are still reentering. if len(toresume) == 1
# then it means that we have been reentering but will not continue, so
# reenter will be false.
reenter = len(toresume) > 1
# this token, when set, will store unmatched characters which will be
# combined into a single "null" token when the end of the document, or
# a named-token, is reached.
null_t = None
# if we have an end regex, then build it here. notice that it can
# reference named groups from the start token. if we have no end,
@ -166,22 +166,24 @@ class RegionRule(Rule):
done = False
while not done and lexer.y < len(lexer.lines):
old_y = lexer.y
line = self.get_line(lexer)
# ok, as long as we haven't found the end token, and have more
# data on the current line to read, we will process tokens
while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
# if we are reentering mid-parse, then that takes precedence
while not done and lexer.y == old_y and lexer.x < len(line):
# if we are reentering mid-parse, then that takes precedence.
# afterwards, we need to clean-up and get our new state in order
if reenter:
reenter = False
for t in toresume[1].rule.resume(lexer, toresume[1:]):
yield t
if lexer.y >= len(lexer.lines):
raise StopIteration
elif lexer.x >= len(lexer.lines[lexer.y]) + 1:
elif lexer.x >= len(line):
lexer.y += 1
lexer.x = 0
line = self.get_line(lexer)
# if we are looking for an end token, then see if we've
# found it. if so, then we are done!
if self.end:
@ -194,8 +196,8 @@ class RegionRule(Rule):
done = True
break
# ok, we need to check all our rules now, in order. if we
# find a token, note that we found one and exit the loop
# ok, we need to check all our rules now, in order. if we find a
# token, note that we found one and exit the loop
found = False
for rule in self.grammar.rules:
m = rule.match(lexer, parent)
@ -205,7 +207,7 @@ class RegionRule(Rule):
null_t = None
for t in rule.lex(lexer, parent, m):
yield t
found = True
found = True
break
# if we never found a token, then we need to add another
@ -233,7 +235,6 @@ class RegionRule(Rule):
lexer.x = 0
raise StopIteration
class NocaseRegionRule(RegionRule):
reflags = re.IGNORECASE
@ -426,10 +427,18 @@ class DualRegionRule(Rule):
# alright, we're finally done processing; return true
return t3
class NocaseDualRegionRule(DualRegionRule):
def _compile_start(self):
return re.compile(self.start, re.IGNORECASE)
def _compile_middle(self, d):
return re.compile(self.middle % d, re.IGNORECASE)
def _compile_end(self, d):
return re.compile(self.end % d, re.IGNORECASE)
class Grammar:
rules = []
def __init__(self):
# XYZ maybe this is unnecessary
for rule in self.rules:
if hasattr(rule, 'grammar') and rule.grammar is None:
rule.grammar = self
@ -473,7 +482,11 @@ class Lexer:
i += 1
if toresume:
toresume[0].rule.resume(self, toresume)
for t in toresume[0].rule.resume(self, toresume):
yield t
for t in self._lex():
yield t
raise StopIteration
def __iter__(self):
if self.lines is None:
@ -504,11 +517,3 @@ class Lexer:
return self.tokens.pop(0)
else:
raise StopIteration
class NocaseDualRegionRule(DualRegionRule):
def _compile_start(self):
return re.compile(self.start, re.IGNORECASE)
def _compile_middle(self, d):
return re.compile(self.middle % d, re.IGNORECASE)
def _compile_end(self, d):
return re.compile(self.end % d, re.IGNORECASE)