major awesomeness for lex3

--HG--
branch : pmacs2
This commit is contained in:
moculus 2007-07-15 19:07:36 +00:00
parent 8a16c4083d
commit 7e23af56e3
14 changed files with 249 additions and 792 deletions

View File

@ -4,7 +4,7 @@ my $bar =~ s/foob/blag/g;
sub foo { sub foo {
bar() bar()
unless 9; unless 9 && 3;
} }
my $foo = { my $foo = {

552
lex2.py
View File

@ -1,552 +0,0 @@
import re
import util
valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
full_name_re = re.compile('^([a-zA-Z_]+)([0-9]*)$')
reserved_names = ['start', 'middle', 'end', 'null']
class Token(object):
def __init__(self, name, rule=None, y=0, x=0, s="", parent=None, matchd={}):
self.name = name
self.rule = rule
self.y = y
self.x = x
self.string = s
self.parent = parent
self.matchd = matchd
def parents(self):
if self.parent is not None:
parents = self.parent.parents()
parents.append(self.parent)
return parents
else:
return []
def domain(self):
if self.parent is not None:
names = self.parent.domain()
else:
names = []
if self.name != 'middle':
names.append(self.rule.name)
return names
def fqlist(self):
if self.parent is not None:
names = self.parent.domain()
else:
names = []
if self.name == 'start':
names.append(self.rule.name)
names.append(self.name)
return names
def fqname(self):
names = self.fqlist()
return '.'.join(names)
def copy(self):
return Token(self.name, self.rule, self.y, self.x, self.string,
self.parent, self.matchd)
def add_to_string(self, s):
self.string += s
def end_x(self):
return self.x + len(self.string)
def __eq__(self, other):
return (self.y == other.y and self.x == other.x
and self.name == other.name and self.parent is other.parent and
self.string == other.string)
def __repr__(self):
if len(self.string) < 10:
s = self.string
else:
s = self.string[:10] + '...'
fields = (self.fqname(), self.rule, self.y, self.x, s)
return "<Token(%r, %r, %d, %d, %r)>" % fields
class Rule:
def __init__(self, name):
assert valid_name_re.match(name), 'invalid name %r' % name
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
def match(self, lexer, parent):
raise Exception, "%s rule cannot match!" % self.name
def make_token(self, lexer, s, name, parent=None, matchd={}):
return Token(name, self, lexer.y, lexer.x, s, parent, matchd)
def _get_line(self, lexer):
return lexer.lines[lexer.y] + '\n'
def _set_group(self, group):
if group is None:
self.group = self.name
else:
self.group = group
class ConstantRule(Rule):
def __init__(self, name, constant, group=None):
Rule.__init__(self, name)
self.constant = constant
self.length = len(self.constant)
self._set_group(group)
def match(self, lexer, parent):
line = self._get_line(lexer)
if line[lexer.x:].startswith(self.constant):
token = self.make_token(lexer, self.constant, self.name, parent)
lexer.add_token(token)
lexer.x += self.length
return True
else:
return False
class PatternRule(Rule):
def __init__(self, name, pattern, group=None):
Rule.__init__(self, name)
self.pattern = pattern
self._compile()
self._set_group(group)
def _compile(self):
self.re = re.compile(self.pattern)
def _match(self, lexer, parent, m):
s = m.group(0)
token = self.make_token(lexer, s, self.name, parent)
lexer.add_token(token)
lexer.x += len(s)
def match(self, lexer, parent):
line = self._get_line(lexer)
m = self.re.match(line, lexer.x)
if m:
self._match(lexer, parent, m)
return True
else:
return False
class NocasePatternRule(PatternRule):
def _compile(self):
self.re = re.compile(self.pattern, re.IGNORECASE)
class ContextPatternRule(PatternRule):
def __init__(self, name, pattern, fallback, group=None):
Rule.__init__(self, name)
self.pattern = pattern
self.fallback = fallback
self.fallback_re = re.compile(fallback)
self._set_group(group)
def match(self, lexer, parent):
try:
r = re.compile(self.pattern % parent.matchd)
except KeyError:
r = self.fallback_re
line = self._get_line(lexer)
m = r.match(line, lexer.x)
if m:
self._match(lexer, parent, m)
return True
else:
return False
class RegionRule(Rule):
def __init__(self, name, start, grammar, end, group=None):
Rule.__init__(self, name)
self.start = start
self.grammar = grammar
self.end = end
self.start_re = self._compile_start()
self._set_group(group)
def _compile_start(self):
return re.compile(self.start)
def _compile_end(self, d):
return re.compile(self.end % d)
def resume(self, lexer, toresume):
assert toresume, "can't resume without tokens to resume!"
self._match(lexer, None, None, toresume)
return True
def match(self, lexer, parent):
line = self._get_line(lexer)
m = self.start_re.match(line, lexer.x)
if m:
self._match(lexer, parent, m, [])
return True
else:
return False
def _add_from_regex(self, name, lexer, parent, m, matchd={}):
s = m.group(0)
token = self.make_token(lexer, s, name, parent, matchd)
lexer.add_token(token)
lexer.x += len(s)
return token
def _match(self, lexer, parent, m, toresume=[]):
# we either need a match object, or a token to resume
assert m or len(toresume) > 0
if m:
# if we had a match, then it becomes the parent, and we save its
# subgroup dict
d = m.groupdict()
parent = self._add_from_regex('start', lexer, parent, m, d)
else:
# otherwise, we should be resuming the start token, so let's pull
# the relevant info out of the token
parent = toresume[0]
d = parent.matchd
assert parent.name == 'start'
null_t = None
# this determines whether we are still reentering. if len(toresume) == 1
# then it means that we have been reentering but will not continue, so
# reenter will be false.
reenter = len(toresume) > 1
# if we have an end regex, then build it here. notice that it can
# reference named groups from the start token. if we have no end,
# well, then, we're never getting out of here alive!
if self.end:
end_re = self._compile_end(d)
# ok, so as long as we aren't done (we haven't found an end token),
# keep reading input
done = False
while not done and lexer.y < len(lexer.lines):
old_y = lexer.y
# ok, as long as we haven't found the end token, and have more
# data on the current line to read, we will process tokens
while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
# if we are reentering mid-parse, then that takes precedence
if reenter:
reenter = False
rule2 = toresume[1].rule
rule2.resume(lexer, toresume[1:])
null_t = None
if lexer.y >= len(lexer.lines):
return True
elif lexer.x >= len(lexer.lines[lexer.y]) + 1:
lexer.y += 1
lexer.x = 0
line = self._get_line(lexer)
# if we are looking for an end token, then see if we've
# found it. if so, then we are done!
if self.end:
m = end_re.match(line, lexer.x)
if m:
self._add_from_regex('end', lexer, parent, m, {})
done = True
break
# ok, we need to check all our rules now, in order. if we
# find a token, note that we found one and exit the loop
found = False
for rule in self.grammar.rules:
if rule.match(lexer, parent):
found = True
null_t = None
break
# if we never found a token, then we need to add another
# character to the current null token (which we should
# create if it isn't set).
if not found:
if null_t is None:
null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t)
if lexer.x < len(line):
null_t.add_to_string(line[lexer.x])
lexer.x += 1
# ok, since we're soon going to be on a different line (or
# already are), we want a new null token. so forget about the
# current one (i.e. stop adding to it).
null_t = None
# if we're still on the same line at this point (and not done)
# then that means we're finished with the line and should move
# on to the next one here
if not done and old_y == lexer.y:
lexer.y += 1
lexer.x = 0
return True
class NocaseRegionRule(RegionRule):
def _compile_start(self):
return re.compile(self.start, re.IGNORECASE)
def _compile_end(self, d):
return re.compile(self.end % d, re.IGNORECASE)
class DualRegionRule(Rule):
def __init__(self, name, start, grammar1, middle, grammar2, end, group=None):
Rule.__init__(self, name)
self.start = start
self.grammar1 = grammar1
self.middle = middle
self.grammar2 = grammar2
self.end = end
self.start_re = self._compile_start()
self._set_group(group)
def _compile_start(self):
return re.compile(self.start)
def _compile_middle(self, d):
return re.compile(self.middle % d)
def _compile_end(self, d):
return re.compile(self.end % d)
def _add_from_regex(self, name, lexer, parent, m, matchd={}):
s = m.group(0)
token = self.make_token(lexer, s, name, parent, matchd)
lexer.add_token(token)
lexer.x += len(s)
return token
def resume(self, lexer, toresume):
assert toresume, "can't resume without tokens to resume!"
token = toresume[0]
if token.name == 'start':
t2 = self._match_first(lexer, token, toresume)
if t2 is not None:
t3 = self._match_second(lexer, t2, [])
return True
elif token.name == 'middle':
t3 = self._match_second(lexer, token, toresume)
else:
raise Exception, "invalid flag %r" % flag
return True
def match(self, lexer, parent):
# see if we can match our start token
line = self._get_line(lexer)
m = self.start_re.match(line, lexer.x)
if m:
t1 = self._add_from_regex('start', lexer, parent, m, m.groupdict())
t2 = self._match_first(lexer, t1, [])
if t2 is not None:
t3 = self._match_second(lexer, t2, [])
return True
else:
# region was not matched; we never started. so return false
return False
def _match_first(self, lexer, parent, toresume=[]):
reenter = len(toresume) > 1
if reenter:
assert parent is toresume[0]
d1 = parent.matchd
assert parent.name == 'start'
null_t = None
middle_re = self._compile_middle(d1)
d2 = {}
# ok, so as long as we aren't done (we haven't found an end token),
# keep reading input
t2 = None
done = False
while not done and lexer.y < len(lexer.lines):
old_y = lexer.y
# ok, as long as we haven't found the end token, and have more
# data on the current line to read, we will process tokens
while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
# if we are reentering mid-parse, then that takes precedence
if reenter:
reenter = False
rule2 = toresume[1].rule
rule2.resume(lexer, toresume[1:])
null_t = None
line = self._get_line(lexer)
# see if we have found the middle token. if so, we can then
# proceed to "stage 2"
m2 = middle_re.match(line, lexer.x)
if m2:
d2 = dict(d1.items() + m2.groupdict().items())
t2 = self._add_from_regex('middle', lexer, parent, m2, d2)
done = True
break
# ok, we need to check all our rules now, in order. if we
# find a token, note that we found one and exit the loop
found = False
for rule in self.grammar1.rules:
if rule.match(lexer, parent):
found = True
null_t = None
break
# if we never found a token, then we need to add another
# character to the current null token (which we should
# create if it isn't set).
if not found:
if null_t is None:
null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t)
null_t.add_to_string(line[lexer.x])
lexer.x += 1
# ok, since we're soon going to be on a different line (or
# already are), we want a new null token. so forget about the
# current one.
null_t = None
# if we're still on the same line at this point (and not done)
# then that means we're finished with the line and should move
# on to the next one here
if not done and old_y == lexer.y:
lexer.y += 1
lexer.x = 0
return t2
def _match_second(self, lexer, parent, toresume=[]):
reenter = len(toresume) > 1
if reenter:
assert parent is toresume[0]
assert parent.name == 'middle'
d3 = parent.matchd
null_t = None
end_re = self._compile_end(d3)
# ok, so as long as we aren't done (we haven't found an end token),
# keep reading input
t3 = None
done = False
while not done and lexer.y < len(lexer.lines):
old_y = lexer.y
# if we are reentering mid-parse, then that takes precedence
if reenter:
reenter = False
rule2 = toresume[1].rule
rule2.resume(lexer, toresume[1:])
null_t = None
# ok, as long as we haven't found the end token, and have more
# data on the current line to read, we will process tokens
while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
# see if we have found the middle token. if so, we can then
# proceed to "stage 2"
line = self._get_line(lexer)
m3 = end_re.match(line, lexer.x)
if m3:
t3 = self._add_from_regex('end', lexer, parent, m3, {})
done = True
break
# ok, we need to check all our rules now, in order. if we
# find a token, note that we found one and exit the loop
found = False
for rule in self.grammar2.rules:
if rule.match(lexer, parent):
found = True
null_t = None
break
# if we never found a token, then we need to add another
# character to the current null token (which we should
# create if it isn't set).
if not found:
if null_t is None:
null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t)
null_t.add_to_string(line[lexer.x])
lexer.x += 1
# ok, since we're soon going to be on a different line (or
# already are), we want a new null token. so forget about the
# current one.
null_t = None
# if we're still on the same line at this point (and not done)
# then that means we're finished with the line and should move
# on to the next one here
if not done and old_y == lexer.y:
lexer.y += 1
lexer.x = 0
# alright, we're finally done processing; return true
return t3
class Grammar:
rules = []
def __init__(self):
for rule in self.rules:
if hasattr(rule, 'grammar') and rule.grammar is None:
rule.grammar = self
if hasattr(rule, 'grammar1') and rule.grammar is None:
rule.grammar = self
if hasattr(rule, 'grammar2') and rule.grammar is None:
rule.grammar = self
grammar = Grammar()
class Lexer:
def __init__(self, name, grammar):
self.name = name
self.grammar = grammar
self.y = 0
self.x = 0
self.lines = None
self.tokens = []
def add_token(self, t):
self.tokens.append(t)
def lex(self, lines, y=0, x=0):
self.y = y
self.x = x
self.lines = lines
self.tokens = []
def resume(self, lines, y, x, token):
self.y = y
self.x = x
self.lines = lines
self.tokens = []
toresume = token.parents()
# this is a special case for the "middle" rule of a dual region rule
i = 0
while i < len(toresume):
if i > 0 and toresume[i].name == 'middle' and toresume[i-1].name == 'start':
del toresume[i-1]
else:
i += 1
if toresume:
toresume[0].rule.resume(self, toresume)
def __iter__(self):
if self.lines is None:
raise Exception, "no lines to lex"
return self
def next(self):
null_t = None
if self.tokens:
return self.tokens.pop(0)
while self.y < len(self.lines):
line = self.lines[self.y] + '\n'
while self.x < len(line):
curr_t = None
for rule in self.grammar.rules:
if rule.match(self, None):
assert self.tokens, "match rendered no tokens?"
return self.tokens.pop(0)
if null_t is None:
null_t = Token('null', None, self.y, self.x, '')
self.add_token(null_t)
null_t.add_to_string(line[self.x])
self.x += 1
null_t = None
self.y += 1
self.x = 0
if self.tokens:
return self.tokens.pop(0)
else:
raise StopIteration
class NocaseDualRegionRule(DualRegionRule):
def _compile_start(self):
return re.compile(self.start, re.IGNORECASE)
def _compile_middle(self, d):
return re.compile(self.middle % d, re.IGNORECASE)
def _compile_end(self, d):
return re.compile(self.end % d, re.IGNORECASE)

266
lex3.py
View File

@ -1,12 +1,8 @@
import re import re
import util import regex, util
valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
full_name_re = re.compile('^([a-zA-Z_]+)([0-9]*)$')
reserved_names = ['start', 'middle', 'end', 'null']
class Token(object): class Token(object):
def __init__(self, name, rule=None, y=0, x=0, s="", parent=None, matchd={}): def __init__(self, name, rule, y, x, s, parent=None, matchd={}, link=None):
self.name = name self.name = name
self.rule = rule self.rule = rule
self.y = y self.y = y
@ -14,6 +10,7 @@ class Token(object):
self.string = s self.string = s
self.parent = parent self.parent = parent
self.matchd = matchd self.matchd = matchd
self.link = link
assert parent is None or hasattr(parent, 'name'), 'oh no %r' % parent assert parent is None or hasattr(parent, 'name'), 'oh no %r' % parent
def parents(self): def parents(self):
if self.parent is not None: if self.parent is not None:
@ -27,7 +24,7 @@ class Token(object):
names = self.parent.domain() names = self.parent.domain()
else: else:
names = [] names = []
if self.name != 'middle': if self.link and not self.link.startswith('middle'):
names.append(self.rule.name) names.append(self.rule.name)
return names return names
def fqlist(self): def fqlist(self):
@ -35,7 +32,7 @@ class Token(object):
names = self.parent.domain() names = self.parent.domain()
else: else:
names = [] names = []
if self.name == 'start': if self.link == 'start':
names.append(self.rule.name) names.append(self.rule.name)
names.append(self.name) names.append(self.name)
return names return names
@ -63,27 +60,24 @@ class Token(object):
class Rule: class Rule:
reflags = 0 reflags = 0
def __init__(self, name, group=None): def __init__(self, name):
assert valid_name_re.match(name), 'invalid name %r' % name assert regex.valid_token_name.match(name), 'invalid name %r' % name
assert name not in reserved_names, "reserved rule name: %r" % name assert not regex.reserved_token_names.match(name), \
"rule name %r is reserved and thus illegal" % name
self.name = name self.name = name
if group is None:
self.group = name
else:
self.group = group
def match(self, lexer, parent): def match(self, lexer, parent):
raise Exception, "not implemented" raise Exception, "not implemented"
def lex(self, lexer, parent, match): def lex(self, lexer, parent, match):
raise Exception, "not implemented" raise Exception, "not implemented"
def make_token(self, lexer, s, name, parent=None, matchd={}): def make_token(self, lexer, s, name, parent=None, matchd={}, link=None):
t = Token(name, self, lexer.y, lexer.x, s, parent, matchd) t = Token(name, self, lexer.y, lexer.x, s, parent, matchd, link)
lexer.x += len(s) lexer.x += len(s)
return t return t
def get_line(self, lexer): def get_line(self, lexer):
return lexer.lines[lexer.y] + '\n' return lexer.lines[lexer.y] + '\n'
class PatternRule(Rule): class PatternRule(Rule):
def __init__(self, name, pattern, group=None): def __init__(self, name, pattern):
Rule.__init__(self, name) Rule.__init__(self, name)
self.pattern = pattern self.pattern = pattern
self.re = re.compile(self.pattern, self.reflags) self.re = re.compile(self.pattern, self.reflags)
@ -98,7 +92,7 @@ class NocasePatternRule(PatternRule):
reflags = re.IGNORECASE reflags = re.IGNORECASE
class ContextPatternRule(PatternRule): class ContextPatternRule(PatternRule):
def __init__(self, name, pattern, fallback, group=None): def __init__(self, name, pattern, fallback):
Rule.__init__(self, name) Rule.__init__(self, name)
self.pattern = pattern self.pattern = pattern
self.fallback_re = re.compile(fallback, self.reflags) self.fallback_re = re.compile(fallback, self.reflags)
@ -111,85 +105,187 @@ class ContextPatternRule(PatternRule):
class NocaseContextPatternRule(ContextPatternRule): class NocaseContextPatternRule(ContextPatternRule):
reflags = re.IGNORECASE reflags = re.IGNORECASE
class RegionRule(Rule): class PatternGroupRule(PatternRule):
def __init__(self, name, start, grammar, end, group=None): def __init__(self, name, *args):
assert args and len(args) % 2 == 0
i = 0
pairs = []
while i < len(args):
tokname, pattern = args[i], args[i+1]
pairs.append((name, re.compile(pattern, self.reflags)))
i += 2
Rule.__init__(self, name) Rule.__init__(self, name)
self.grammar = grammar self.pairs = tuple(pairs)
self.end = end def match(self, lexer, parent):
self.start_re = re.compile(start, self.reflags) x = lexer.x
matches = []
line = self.get_line(lexer)
for (tokname, tokre) in self.pairs:
m = tokre.match(line, x)
if m:
x += len(m.group(0))
matches.append((tokname, m))
else:
return []
assert len(matches) == len(self.pairs)
return matches
def lex(self, lexer, parent, matches):
if matches:
for (tokname, m) in matches:
yield self.make_token(lexer, m.group(0), tokname, parent, m.groupdict())
raise StopIteration
class RegionRule(Rule):
def __init__(self, name, *args):
Rule.__init__(self, name)
assert len(args) > 1
args = list(args)
self.pairs = []
self.start_re = re.compile(args.pop(0), self.reflags)
while len(args) > 1:
grammar = args.pop(0)
pattern = args.pop(0)
assert hasattr(grammar, 'rules'), repr(grammar)
assert type(pattern) == type(''), repr(pattern)
self.pairs.append((grammar, pattern))
if len(args) == 1:
self.pairs.append((grammar, None))
def match(self, lexer, parent): def match(self, lexer, parent):
return self.start_re.match(self.get_line(lexer), lexer.x) return self.start_re.match(self.get_line(lexer), lexer.x)
def lex(self, lexer, parent, m): def lex(self, lexer, parent, m):
t1 = self.make_token(lexer, m.group(0), 'start', parent, m.groupdict()) assert m
yield t1 # ok, so since we had a match, we need to create our start token, who
if self.end: # will be the ancestor to all other tokens matched in this region
stopre = re.compile(self.end % t1.matchd, self.reflags) matchd = m.groupdict()
else: parent = self.make_token(lexer, m.group(0), 'start', parent, matchd, 'start')
stopre = None yield parent
for t2 in self._lex(lexer, [t1], 'end', stopre, self.grammar):
yield t2 # now we will loop over the different pairs of grammars/stop-patterns in
# this region, and return the resulting token; we start at 0
for tok in self._lex_loop(lexer, [parent], matchd, 0):
yield tok
raise StopIteration raise StopIteration
def resume(self, lexer, toresume): def resume(self, lexer, toresume):
assert toresume assert toresume, "can't resume without tokens to resume!"
t1 = toresume[0] # ok, so we need to figure out in which of the grammars of our region
assert t1.name # we are resuming. to do this we calculate i, a position in our list
if self.end: # of grammar/stop-pattern pairs
stopre = re.compile(self.end % t1.matchd, self.reflags) if toresume[0].link == 'start':
i = 0
else:
m = regex.middle_token_name.match(toresume[0].link)
assert m
i = int(m.group(1)) + 1
assert i > 0 and i < len(self.pairs)
# now we will loop over the different pairs of grammars/stop-patterns in
# this region, and return the resulting token; we start at i
for tok in self._lex_loop(lexer, toresume, toresume[0].matchd, i):
yield tok
raise StopIteration
def _lex_loop(self, lexer, toresume, matchd, i):
# we need to loop over our grammar/stop-pattern pairs
while i < len(self.pairs):
# for each one, we will compile our stop-regex, and figure out the
# name of the stop token to be created if this stop-regex matches.
grammar = self.pairs[i][0]
if self.pairs[i][1]:
stopre = re.compile(self.pairs[i][1] % matchd, self.reflags)
else: else:
stopre = None stopre = None
for t2 in self._lex(lexer, [t1], 'end', stopre, self.grammar): if i == len(self.pairs) - 1:
yield t2 tokname = 'end'
raise StopIteration else:
tokname = 'middle%d' % i
# ok, so now loop over all the tokens in the current grammar, until
# the stop-token (if any) is found, and return each result as we get
# it.
for tok in self._lex(lexer, toresume, tokname, stopre, grammar):
yield tok
# ok, so now either we found the stop-token, and have a new parent
# for future tokens (if any), or we are done.
if tok.name == tokname:
toresume = [tok]
matchd.update(tok.matchd)
else:
raise StopIteration
i += 1
# assuming we make it through all our grammars, and find the end-token,
# then we need to signal that we are done.
raise StopIteration
def _lex(self, lexer, toresume, stopname, stopre, grammar): def _lex(self, lexer, toresume, stopname, stopre, grammar):
assert toresume assert toresume
parent = toresume[0] parent = toresume[0]
reenter = len(toresume) > 1 reenter = len(toresume) > 1
null_t = None null_t = None
# ok, so there are only two way we want to exit this loop: either we
# lex the whole document, or we encounter the stop-token.
done = False done = False
while not done and lexer.y < len(lexer.lines): while not done and lexer.y < len(lexer.lines):
old_y = lexer.y
line = self.get_line(lexer) line = self.get_line(lexer)
old_y = lexer.y
while not done and lexer.y == old_y and lexer.x < len(line): while not done and lexer.y == old_y and lexer.x < len(line):
# ok, so reenter gets priority, since the current input might be
# intended for nested grammar. so handle it here
if reenter: if reenter:
reenter = False reenter = False
for t in toresume[1].rule.resume(lexer, toresume[1:]): for t in toresume[1].rule.resume(lexer, toresume[1:]):
yield t yield t
# since we might have changed our x/y coordinates, we need to
# do some checks here, and maybe finish or change our coordintes
if lexer.y >= len(lexer.lines): if lexer.y >= len(lexer.lines):
raise StopIteration raise StopIteration
elif lexer.x >= len(line): elif lexer.x >= len(line):
lexer.y += 1 lexer.y += 1
lexer.x = 0 lexer.x = 0
# ok, so get the *now* current line
line = self.get_line(lexer) line = self.get_line(lexer)
if stopre: if stopre:
# if we are looking for a stop-token, do that check now
m = stopre.match(line, lexer.x) m = stopre.match(line, lexer.x)
if m: if m:
if null_t: if null_t:
# if we have a null token waiting, return it first.
yield null_t yield null_t
null_t = None null_t = None
yield self.make_token(lexer, m.group(0), stopname, parent, m.groupdict()) # ok, now return the stop-token, and signal that we are
# done and no more input is to be consumed
yield self.make_token(lexer, m.group(0), stopname,
parent, m.groupdict(), stopname)
done = True done = True
break break
m = None m = None
# start checking our rules to see if we can match the input
for rule in grammar.rules: for rule in grammar.rules:
m = rule.match(lexer, parent) m = rule.match(lexer, parent)
if m: if m:
# ok great, we have a match
if null_t: if null_t:
# if we have a null token waiting, return it first.
yield null_t yield null_t
null_t = None null_t = None
# ok, now for every token this rules has created, we
# return them, one by one.
for t in rule.lex(lexer, parent, m): for t in rule.lex(lexer, parent, m):
yield t yield t
break break
if not m: if not m:
if lexer.x < len(line): # we didn't find a match on a rule, so add this character to
# the current null token (creating a new one if necessary);
if not null_t: if not null_t:
null_t = Token('null', None, lexer.y, lexer.x, '', parent) null_t = Token('null', None, lexer.y, lexer.x, '', parent)
null_t.add_to_string(line[lexer.x]) null_t.add_to_string(line[lexer.x])
lexer.x += 1 lexer.x += 1
# ok, we are at the end of a line of input. so, if we have a null
# token waiting, now is the time to return it
if null_t: if null_t:
yield null_t yield null_t
null_t = None null_t = None
@ -200,86 +296,8 @@ class RegionRule(Rule):
class NocaseRegionRule(RegionRule): class NocaseRegionRule(RegionRule):
reflags = re.IGNORECASE reflags = re.IGNORECASE
class DualRegionRule(RegionRule):
def __init__(self, name, start, grammar1, middle, grammar2, end, group=None):
Rule.__init__(self, name)
self.start_re = re.compile(start, self.reflags)
self.grammar1 = grammar1
self.middle = middle
self.grammar2 = grammar2
self.end = end
def match(self, lexer, parent):
return self.start_re.match(self.get_line(lexer), lexer.x)
def lex(self, lexer, parent, m):
assert m
t1 = self.make_token(lexer, m.group(0), 'start', parent, m.groupdict())
yield t1
t2 = None
if self.middle:
stopre = re.compile(self.middle % t1.matchd, self.reflags)
else:
stopre = None
for t2 in self._lex(lexer, [t1], 'middle', stopre, self.grammar1):
yield t2
if t2 is not None and t2.name == 'middle':
if self.end:
d = dict(t2.matchd)
if t1:
d.update(t1.matchd)
stopre = re.compile(self.end % d, self.reflags)
else:
stopre = None
for t3 in self._lex(lexer, [t2], 'end', stopre, self.grammar2):
yield t3
raise StopIteration
def resume(self, lexer, toresume):
assert toresume, "can't resume without tokens to resume!"
t1 = t2 = None
if toresume[0].name == 'start':
t1 = toresume[0]
assert t1.name
elif toresume[0].name == 'middle':
t2 = toresume[0]
assert t2.name
else:
raise Exception, "invalid name %r" % toresume[0].name
if t1 is not None:
#assert t1.name == 'start'
if self.middle:
stopre = re.compile(self.middle, self.reflags)
else:
stopre = None
for t2 in self._lex_first(lexer, toresume, 'middle', stopre):
yield t2
toresume = [t2]
if t2 is not None:
assert t2.name == 'middle'
if self.end:
stopre = re.compile(self.end, self.reflags)
else:
stopre = None
for t3 in self._lex_second(lexer, toresume, 'end', stopre):
yield t3
#toresume = [t3]
raise StopIteration
class NocaseDualRegionRule(DualRegionRule):
reflags = re.IGNORECASE
class Grammar: class Grammar:
rules = [] rules = []
def __init__(self):
# XYZ maybe this is unnecessary
for rule in self.rules:
if hasattr(rule, 'grammar') and rule.grammar is None:
rule.grammar = self
if hasattr(rule, 'grammar1') and rule.grammar is None:
rule.grammar = self
if hasattr(rule, 'grammar2') and rule.grammar is None:
rule.grammar = self
grammar = Grammar() grammar = Grammar()
class Lexer: class Lexer:
@ -307,10 +325,9 @@ class Lexer:
self.tokens = [] self.tokens = []
toresume = token.parents() toresume = token.parents()
# this is a special case for the "middle" rule of a dual region rule
i = 1 i = 1
while i < len(toresume): while i < len(toresume):
if toresume[i].name == 'middle' and toresume[i-1].name == 'start': if toresume[i].link and toresume[i].link != 'start':
del toresume[i-1] del toresume[i-1]
else: else:
i += 1 i += 1
@ -339,6 +356,8 @@ class Lexer:
yield t yield t
break break
if self.y >= len(self.lines):
break
line = self.get_line() line = self.get_line()
if not m: if not m:
if self.x < len(line): if self.x < len(line):
@ -351,3 +370,4 @@ class Lexer:
self.y += 1 self.y += 1
self.x = 0 self.x = 0
raise StopIteration raise StopIteration

View File

@ -1,32 +1,21 @@
import color, mode2 import color, mode2
from point2 import Point from point2 import Point
from lex3 import Grammar, PatternRule, RegionRule, DualRegionRule from lex3 import Grammar, PatternRule, RegionRule
class MetadataGrammar(Grammar): class MetadataGrammar(Grammar):
rules = [ rules = [
PatternRule( PatternRule(r'username', r'[a-zA-Z0-9_]+'),
name=r'username',
pattern='[a-zA-Z0-9_]+',
),
] ]
class BlameGrammar(Grammar): class BlameGrammar(Grammar):
rules = [ rules = [
RegionRule( RegionRule(r'metadata', r'^[0-9\.]+', MetadataGrammar, r'[0-9]{4}-[0-9]{2}-[0-9]{2}'),
name=r'metadata', PatternRule(r'data', r'.+$'),
start=r'^[0-9\.]+',
grammar=MetadataGrammar(),
end=r'[0-9]{4}-[0-9]{2}-[0-9]{2}',
),
PatternRule(
name=r'data',
pattern=r'.+$',
),
] ]
class Blame(mode2.Fundamental): class Blame(mode2.Fundamental):
grammar = BlameGrammar() grammar = BlameGrammar
def __init__(self, w): def __init__(self, w):
mode2.Fundamental.__init__(self, w) mode2.Fundamental.__init__(self, w)
self.colors = { self.colors = {

View File

@ -4,45 +4,45 @@ from point2 import Point
class StringGrammar(Grammar): class StringGrammar(Grammar):
rules = [ rules = [
PatternRule(name=r'octal', pattern=r'\\[0-7]{3}'), PatternRule(r'octal', r'\\[0-7]{3}'),
PatternRule(name=r'escaped', pattern=r'\\.'), PatternRule(r'escaped', r'\\.'),
] ]
class KeywordGrammar(Grammar): class KeywordGrammar(Grammar):
rules = [ rules = [
PatternRule(name=r'octal', pattern=r'\\[0-7]{3}'), PatternRule(r'octal', r'\\[0-7]{3}'),
PatternRule(name=r'escaped', pattern=r'\\.'), PatternRule(r'escaped', r'\\.'),
RegionRule(name='string', start="'", grammar=StringGrammar(), end="'"), RegionRule('string', "'", StringGrammar, r"'"),
RegionRule(name='string', start='"', grammar=StringGrammar(), end='"'), RegionRule('string', '"', StringGrammar, r'"'),
] ]
class CSSGrammar(Grammar): class CSSGrammar(Grammar):
rules = [ rules = [
RegionRule(name=r'comment', start='/\*', grammar=Grammar(), end='\*/'), RegionRule(r'comment', '/\*', Grammar, '\*/'),
RegionRule(name=r'htmlcomment', start='<!--', grammar=Grammar(), end='-->'), RegionRule(r'htmlcomment', '<!--', Grammar, '-->'),
NocasePatternRule(name=r'dimension', pattern=r'[+-]?(?:[0-9]+|[0-9]*\.[0-9]+)[-a-z_][-a-z0-9_]*'), NocasePatternRule(r'dimension', r'[+-]?(?:[0-9]+|[0-9]*\.[0-9]+)[-a-z_][-a-z0-9_]*'),
NocasePatternRule(name=r'percentage', pattern=r'[+-]?(?:[0-9]+|[0-9]*\.[0-9]+)%%'), NocasePatternRule(r'percentage', r'[+-]?(?:[0-9]+|[0-9]*\.[0-9]+)%%'),
NocasePatternRule(name=r'length', pattern=r'[+-]?(?:[0-9]+|[0-9]*\.[0-9]+)(?:em|ex|px|in|cm|mm|pt|pc)'), NocasePatternRule(r'length', r'[+-]?(?:[0-9]+|[0-9]*\.[0-9]+)(?:em|ex|px|in|cm|mm|pt|pc)'),
NocasePatternRule(name=r'hash', pattern=r'#[-a-z0-9_]+'), NocasePatternRule(r'hash', r'#[-a-z0-9_]+'),
NocasePatternRule(name=r'real', pattern=r'[+-]?[0-9]*\.[0-9]+'), NocasePatternRule(r'real', r'[+-]?[0-9]*\.[0-9]+'),
NocasePatternRule(name=r'int', pattern=r'[+-]?[0-9]+'), NocasePatternRule(r'int', r'[+-]?[0-9]+'),
NocasePatternRule(name=r'rule', pattern=r'@(?:page|media|import)'), NocasePatternRule(r'rule', r'@(?:page|media|import)'),
NocasePatternRule(name=r'color', pattern=r'(?:aqua|black|blue|fuchsia|gray|green|lime|maroon|navy|olive|orange|purple|red|silver|teal|white|yellow|#[0-9]{6}|#[0-9]{3})'), NocasePatternRule(r'color', r'(?:aqua|black|blue|fuchsia|gray|green|lime|maroon|navy|olive|orange|purple|red|silver|teal|white|yellow|#[0-9]{6}|#[0-9]{3})'),
NocasePatternRule(name=r'keyword', pattern=r'(?:url|rgb|counter)'), NocasePatternRule(r'keyword', r'(?:url|rgb|counter)'),
NocaseRegionRule(name=r'keyword', start='(?:(?<=url)|(?<=rgb)|(?<=counter))\(', grammar=KeywordGrammar(), end='\)'), NocaseRegionRule(r'keyword', '(?:(?<=url)|(?<=rgb)|(?<=counter))\(', KeywordGrammar, '\)'),
NocasePatternRule(name=r'label', pattern=r"\.?[-a-zA-Z0-9_]+(?= *{)"), NocasePatternRule(r'label', r"\.?[-a-zA-Z0-9_]+(?= *{)"),
NocasePatternRule(name=r'ident', pattern=r"-?[a-z_][-a-z0-9_]*"), NocasePatternRule(r'ident', r"-?[a-z_][-a-z0-9_]*"),
NocasePatternRule(name=r'name', pattern=r"[-a-z0-9_]+"), NocasePatternRule(r'name', r"[-a-z0-9_]+"),
NocasePatternRule(name=r'delimiter', pattern=r'[:;,{}()\[\]]|~=|\|=|='), NocasePatternRule(r'delimiter', r'[:;,{}()\[\]]|~=|\|=|='),
RegionRule(name='string', start="'", grammar=StringGrammar(), end="'"), RegionRule(r'string', "'", StringGrammar, r"'"),
RegionRule(name='string', start='"', grammar=StringGrammar(), end='"'), RegionRule(r'string', '"', StringGrammar, r'"'),
] ]
class CSS(mode2.Fundamental): class CSS(mode2.Fundamental):
grammar = CSSGrammar() grammar = CSSGrammar
def __init__(self, w): def __init__(self, w):
mode2.Fundamental.__init__(self, w) mode2.Fundamental.__init__(self, w)
self.add_bindings('close-paren', (')',)) self.add_bindings('close-paren', (')',))

View File

@ -5,33 +5,33 @@ from mode_python import StringGrammar
class JavascriptGrammar(Grammar): class JavascriptGrammar(Grammar):
rules = [ rules = [
PatternRule(name=r'comment', pattern=r'//.*$'), PatternRule(r'comment', r'//.*$'),
RegionRule(name=r'comment', start='/\*', grammar=Grammar, end='\*/'), RegionRule(r'comment', '/\*', Grammar, '\*/'),
PatternRule(name=r'continuation', pattern=r'\\(?= *$)'), PatternRule(r'continuation', r'\\(?= *$)'),
PatternRule(name=r'function', pattern=r"(?<=function )[a-zA-Z_][a-zA-Z0-9_]*"), PatternRule(r'function', r"(?<=function )[a-zA-Z_][a-zA-Z0-9_]*"),
PatternRule(name=r'class', pattern=r"(?<=class )[a-zA-Z_][a-zA-Z0-9_]*"), PatternRule(r'class', r"(?<=class )[a-zA-Z_][a-zA-Z0-9_]*"),
PatternRule(name=r'reserved', pattern=r'(?:as|break|case|catch|class|const|continue|default|delete|do|else|export|extends|false|finally|for|function|if|import|in|instanceof|is|namespace|new|null|package|private|public|return|super|switch|this|throw|true|try|typeof|use|var|void|while|with)(?![a-zA-Z0-9_])'), PatternRule(r'reserved', r'(?:as|break|case|catch|class|const|continue|default|delete|do|else|export|extends|false|finally|for|function|if|import|in|instanceof|is|namespace|new|null|package|private|public|return|super|switch|this|throw|true|try|typeof|use|var|void|while|with)(?![a-zA-Z0-9_])'),
PatternRule(name=r'reserved', pattern=r'(?:abstract|debugger|enum|goto|implements|interface|native|protected|synchronized|throws|transient|volatile)(?![a-zA-Z0-9_])'), PatternRule(r'reserved', r'(?:abstract|debugger|enum|goto|implements|interface|native|protected|synchronized|throws|transient|volatile)(?![a-zA-Z0-9_])'),
PatternRule(name=r'nonreserved', pattern=r'(?:get|include|set)(?![a-zA-Z0-9_])'), PatternRule(r'nonreserved', r'(?:get|include|set)(?![a-zA-Z0-9_])'),
PatternRule(name=r"method", pattern=r"(?<=\.)[a-zA-Z_][a-zA-Z0-9_]*(?= *\()"), PatternRule(r"method", r"(?<=\.)[a-zA-Z_][a-zA-Z0-9_]*(?= *\()"),
PatternRule(name=r'identifier', pattern=r"[a-zA-Z_][a-zA-Z0-9_]*"), PatternRule(r'identifier', r"[a-zA-Z_][a-zA-Z0-9_]*"),
PatternRule(name=r'integer', pattern=r"(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?"), PatternRule(r'integer', r"(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?"),
PatternRule(name=r'float', pattern=r"[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+"), PatternRule(r'float', r"[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+"),
# fucking javascript! # fucking javascript!
# their lexer grammar requires one-token look-behind in order to know # their lexer grammar requires one-token look-behind in order to know
# whether a "/" starts a literal regex, or is part of a mathematical # whether a "/" starts a literal regex, or is part of a mathematical
# expression/assignment. so for now we will require either a space or $ # expression/assignment. so for now we will require either a space or $
# after / in order to *not* treat it as a regex. dammit! # after / in order to *not* treat it as a regex. dammit!
PatternRule(name=r'delimiter', pattern=r'%=|&&=|&=|\(|\)|\*=|\+=|,|-=|\.{3}|\.|/=(?= |$)|::|:|;|<<=|>>=|>>>=|\?|\[|\]|^=|^^=|\{|\}|\|=|\|\|='), PatternRule(r'delimiter', r'%=|&&=|&=|\(|\)|\*=|\+=|,|-=|\.{3}|\.|/=(?= |$)|::|:|;|<<=|>>=|>>>=|\?|\[|\]|^=|^^=|\{|\}|\|=|\|\|='),
PatternRule(name=r'operator', pattern=r'!==|!=|!|%|&&|&|\*|\+\+|\+|--|-|/(?= |$)|<<=|<<|<=|<|===|==|=|>>>=|>>>|>>=|>>|>=|>|\\'), PatternRule(r'operator', r'!==|!=|!|%|&&|&|\*|\+\+|\+|--|-|/(?= |$)|<<=|<<|<=|<|===|==|=|>>>=|>>>|>>=|>>|>=|>|\\'),
RegionRule(name='regex', start="/", grammar=StringGrammar, end="/"), RegionRule('regex', "/", StringGrammar, "/"),
RegionRule(name='string', start="'", grammar=StringGrammar, end="'"), RegionRule('string', "'", StringGrammar, "'"),
RegionRule(name='string', start='"', grammar=StringGrammar, end='"'), RegionRule('string', '"', StringGrammar, '"'),
] ]
class JavascriptTabber(tab2.StackTabber): class JavascriptTabber(tab2.StackTabber):

View File

@ -1,8 +1,6 @@
import re, sets, string, sys import re, sets, string, sys
import color, commands, default, method, mode2, regex, tab2 import color, commands, default, method, mode2, regex, tab2
from point2 import Point from point2 import Point
from lex3 import Grammar, PatternRule, ContextPatternRule, \
RegionRule, DualRegionRule
class Life(mode2.Fundamental): class Life(mode2.Fundamental):
def __init__(self, w): def __init__(self, w):

View File

@ -3,31 +3,31 @@ from lex3 import Grammar, PatternRule, RegionRule
class StringGrammar(Grammar): class StringGrammar(Grammar):
rules = [ rules = [
PatternRule(name=r'octal', pattern=r'\\[0-7]{3}'), PatternRule(r'octal', r'\\[0-7]{3}'),
PatternRule(name=r'escaped', pattern=r'\\.'), PatternRule(r'escaped', r'\\.'),
] ]
class NasmGrammar(Grammar): class NasmGrammar(Grammar):
rules = [ rules = [
PatternRule(name=r'keyword', pattern=r"(?:section|global|extern)(?![a-zA-Z_])"), PatternRule(r'keyword', r"(?:section|global|extern)(?![a-zA-Z_])"),
PatternRule(name=r'macros', pattern=r"%(?:define|undef|assign|strlen|macro|endmacro|if|elif|else|endif|ifdef|ifndef|include|push|pop|stacksize)(?![a-zA-Z_])"), PatternRule(r'macros', r"%(?:define|undef|assign|strlen|macro|endmacro|if|elif|else|endif|ifdef|ifndef|include|push|pop|stacksize)(?![a-zA-Z_])"),
PatternRule(name=r'instructions', pattern=r"(?:jeq|jne|ja|jmp|push|pushad|pushfd|call|ret|sub|add|pop|popa|popad|popfd|call|and|cwd|cdq|cmp|cmpxchg|cpuid|div|divpd|enter|leave|fadd|fld|fmul|fsqrt|fsub|hlt|imul|inc|int|int3|lea|mov|movd|mul|neg|not|nop|or|sal|sar|shl|shr|shld|shrd|syscall|sysenter|sysexit|test|xchg|xadd|xor)(?![a-zA-Z_])"), PatternRule(r'instructions', r"(?:jeq|jne|ja|jmp|push|pushad|pushfd|call|ret|sub|add|pop|popa|popad|popfd|call|and|cwd|cdq|cmp|cmpxchg|cpuid|div|divpd|enter|leave|fadd|fld|fmul|fsqrt|fsub|hlt|imul|inc|int|int3|lea|mov|movd|mul|neg|not|nop|or|sal|sar|shl|shr|shld|shrd|syscall|sysenter|sysexit|test|xchg|xadd|xor)(?![a-zA-Z_])"),
PatternRule(name=r'registers', pattern=r"(?:eax|ax|ah|al|ebx|bx|bh|bl|ecx|cx|ch|cl|esi|edi|esp|ebp)(?![a-zA-Z_])"), PatternRule(r'registers', r"(?:eax|ax|ah|al|ebx|bx|bh|bl|ecx|cx|ch|cl|esi|edi|esp|ebp)(?![a-zA-Z_])"),
PatternRule(name=r'prefix', pattern=r"(?:dword|word|lock)(?![a-zA-Z_])"), PatternRule(r'prefix', r"(?:dword|word|lock)(?![a-zA-Z_])"),
PatternRule(name=r'label', pattern=r"[a-zA-Z_.][a-zA-Z0-9_.]*:"), PatternRule(r'label', r"[a-zA-Z_.][a-zA-Z0-9_.]*:"),
PatternRule(name=r"identifier", pattern=r"[a-zA-Z_][a-zA-Z0-9_]*"), PatternRule(r"identifier", r"[a-zA-Z_][a-zA-Z0-9_]*"),
PatternRule(name=r"integer", pattern=r"(0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?"), PatternRule(r"integer", r"(0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?"),
PatternRule(name=r"float", pattern=r"[0-9]+\.[0-9]*|\.[0-9]+|([0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+"), PatternRule(r"float", r"[0-9]+\.[0-9]*|\.[0-9]+|([0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+"),
RegionRule(name=r'string', start=r'"""', grammar=StringGrammar(), end=r'"""'), RegionRule(r'string', r'"""', StringGrammar, r'"""'),
RegionRule(name=r'string', start=r"'''", grammar=StringGrammar(), end=r"'''"), RegionRule(r'string', r"'''", StringGrammar, r"'''"),
RegionRule(name=r'string', start=r'"', grammar=StringGrammar(), end=r'"'), RegionRule(r'string', r'"', StringGrammar, r'"'),
RegionRule(name=r'string', start=r"'", grammar=StringGrammar(), end=r"'"), RegionRule(r'string', r"'", StringGrammar, r"'"),
PatternRule(name=r'comment', pattern=r';.*$'), PatternRule(r'comment', r';.*$'),
] ]
class Nasm(mode2.Fundamental): class Nasm(mode2.Fundamental):
grammar = NasmGrammar() grammar = NasmGrammar
def __init__(self, w): def __init__(self, w):
mode2.Fundamental.__init__(self, w) mode2.Fundamental.__init__(self, w)
self.colors = { self.colors = {

View File

@ -1,7 +1,7 @@
import re, sets, string, sys import re, sets, string, sys
import color, commands, default, method, mode2, regex, tab2 import color, commands, default, method, mode2, regex, tab2
from point2 import Point from point2 import Point
from lex3 import Grammar, PatternRule, ContextPatternRule, RegionRule, DualRegionRule from lex3 import Grammar, PatternRule, ContextPatternRule, RegionRule
from method import Argument, Method from method import Argument, Method
class PodGrammar(Grammar): class PodGrammar(Grammar):
@ -45,7 +45,7 @@ class PerlGrammar(Grammar):
PatternRule(r'hash_key', r'[A-Za-z0-9_]+(?= *=>)'), PatternRule(r'hash_key', r'[A-Za-z0-9_]+(?= *=>)'),
PatternRule(r'length', r"\$#[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*"), PatternRule(r'length', r"\$#[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*"),
PatternRule(r'cast', r'[\$\@\%\^\&](?= *{)'), PatternRule(r'cast', r'[\$\@\%\^\&](?= *{)'),
PatternRule(r'scalar', r"\$[][><ab/'\"_@\?#\$!%^|&*()](?![A-Za-z0-9_])"), PatternRule(r'scalar', r"\$[\[\]<>ab/'\"_@\?#\$!%^|&*()](?![A-Za-z0-9_])"),
PatternRule(r'array', r"@_"), PatternRule(r'array', r"@_"),
PatternRule(r'function', r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*(?=-> *\()"), PatternRule(r'function', r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*(?=-> *\()"),
PatternRule(r'scalar', r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*"), PatternRule(r'scalar', r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*"),
@ -59,12 +59,12 @@ class PerlGrammar(Grammar):
RegionRule(r'match', r'm(?P<delim>#)', StringGrammar, r'#[a-z]*'), RegionRule(r'match', r'm(?P<delim>#)', StringGrammar, r'#[a-z]*'),
# replace regexes # replace regexes
DualRegionRule(r'replace', r's *(?P<delim>[^ a-zA-Z0-9_])', StringGrammar, r'%(delim)s', StringGrammar, r'%(delim)s[a-z]*'), RegionRule(r'replace', r's *(?P<delim>[^ a-zA-Z0-9_])', StringGrammar, r'%(delim)s', StringGrammar, r'%(delim)s[a-z]*'),
DualRegionRule(r'replace', r's(?P<delim>#)', StringGrammar, r'#', StringGrammar, r'#[a-z]*'), RegionRule(r'replace', r's(?P<delim>#)', StringGrammar, r'#', StringGrammar, r'#[a-z]*'),
# translate operator # translate operator
DualRegionRule(r'translate', r'(?:y|tr) *(?P<delim>[^ a-zA-Z0-9_])', Grammar, r'%(delim)s', Grammar, r'%(delim)s[a-z]*'), RegionRule(r'translate', r'(?:y|tr) *(?P<delim>[^ a-zA-Z0-9_])', Grammar, r'%(delim)s', Grammar, r'%(delim)s[a-z]*'),
DualRegionRule(r'translate', r'(?:y|tr)#', Grammar, r'#', Grammar, r'#[a-z]*'), RegionRule(r'translate', r'(?:y|tr)#', Grammar, r'#', Grammar, r'#[a-z]*'),
# some more basic stuff # some more basic stuff
PatternRule(r'package', r"(?<=package )(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*"), PatternRule(r'package', r"(?<=package )(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*"),
@ -255,7 +255,7 @@ class Perl(mode2.Fundamental):
# replace regex # replace regex
'replace.start': color.build('cyan', 'default'), 'replace.start': color.build('cyan', 'default'),
'replace.middle': color.build('cyan', 'default'), 'replace.middle0': color.build('cyan', 'default'),
'replace.end': color.build('cyan', 'default'), 'replace.end': color.build('cyan', 'default'),
'replace.null': color.build('cyan', 'default'), 'replace.null': color.build('cyan', 'default'),
'replace.escaped': color.build('magenta', 'default'), 'replace.escaped': color.build('magenta', 'default'),
@ -267,7 +267,7 @@ class Perl(mode2.Fundamental):
# translate regex # translate regex
'translate.start': color.build('magenta', 'default'), 'translate.start': color.build('magenta', 'default'),
'translate.middle': color.build('magenta', 'default'), 'translate.middle0': color.build('magenta', 'default'),
'translate.end': color.build('magenta', 'default'), 'translate.end': color.build('magenta', 'default'),
'translate.null': color.build('magenta', 'default'), 'translate.null': color.build('magenta', 'default'),
} }

View File

@ -17,7 +17,6 @@ class ShGrammar(Grammar):
PatternRule(r'operator', r"(?:-eq|-ne|-gt|-lt|-ge|-le| = | != )"), PatternRule(r'operator', r"(?:-eq|-ne|-gt|-lt|-ge|-le| = | != )"),
PatternRule(r'delimiter', r";;|[\[\]\(\);\{\}|&><]"), PatternRule(r'delimiter', r";;|[\[\]\(\);\{\}|&><]"),
RegionRule(r'eval', '`', StringGrammar, '`'), RegionRule(r'eval', '`', StringGrammar, '`'),
#RegionRule(r'neval', r'\$\(', None, r'\)'),
RegionRule(r'neval', r'\$\(', StringGrammar, r'\)'), RegionRule(r'neval', r'\$\(', StringGrammar, r'\)'),
PatternRule(r'variable', r"(?:^|(?<= ))[a-zA-Z_][a-zA-Z_][a-zA-Z0-9_]*(?==)"), PatternRule(r'variable', r"(?:^|(?<= ))[a-zA-Z_][a-zA-Z_][a-zA-Z0-9_]*(?==)"),
PatternRule(r'variable', r"\${(?:[a-zA-Z0-9_]+|\?\$)}"), PatternRule(r'variable', r"\${(?:[a-zA-Z0-9_]+|\?\$)}"),

View File

@ -1,5 +1,5 @@
import color, mode2, tab2 import color, mode2, tab2
from lex3 import Grammar, PatternRule, NocasePatternRule, RegionRule, NocaseRegionRule, DualRegionRule, NocaseDualRegionRule from lex3 import Grammar, PatternRule, NocasePatternRule, RegionRule, NocaseRegionRule
from mode_python import StringGrammar from mode_python import StringGrammar
class PlPgSqlGrammar(Grammar): class PlPgSqlGrammar(Grammar):

View File

@ -3,8 +3,7 @@ from lex3 import Token, Rule, PatternRule, RegionRule, Grammar
class WordRule(PatternRule): class WordRule(PatternRule):
def __init__(self): def __init__(self):
PatternRule.__init__(self, name=r'word', PatternRule.__init__(self, r'word', pattern=r"[a-zA-Z][a-zA-Z-\']*[a-zA-Z](?=$|[^a-zA-Z0-9-_])")
pattern=r"[a-zA-Z][a-zA-Z-\']*[a-zA-Z](?=$|[^a-zA-Z0-9-_])")
def _spelled_ok(self, word): def _spelled_ok(self, word):
if ispell.can_spell(): if ispell.can_spell():
speller = ispell.get_speller() speller = ispell.get_speller()
@ -22,15 +21,14 @@ class WordRule(PatternRule):
class ContinuedRule(RegionRule): class ContinuedRule(RegionRule):
def __init__(self): def __init__(self):
RegionRule.__init__(self, name=r'cont', start=r'[a-zA-Z0-9_]+- *$', RegionRule.__init__(self, r'cont', r'[a-zA-Z0-9_]+- *$', Grammar, r'^ *(?:[^ ]+|$)')
grammar=Grammar(), end=r'^ *(?:[^ ]+|$)')
class TextGrammar(Grammar): class TextGrammar(Grammar):
rules = [ rules = [
ContinuedRule(), ContinuedRule(),
WordRule(), WordRule(),
PatternRule(name=r'punct', pattern=r'[^a-zA-Z0-9_]'), PatternRule(r'punct', r'[^a-zA-Z0-9_]'),
PatternRule(name=r'stuff', pattern=r'[a-zA-Z0-9_]+'), PatternRule(r'stuff', r'[a-zA-Z0-9_]+'),
] ]
class Text(mode2.Fundamental): class Text(mode2.Fundamental):

View File

@ -3,18 +3,18 @@ from lex3 import Grammar, PatternRule, RegionRule
class TagGrammar(Grammar): class TagGrammar(Grammar):
rules = [ rules = [
RegionRule(name=r'string', start=r'(?P<tag>["\'])', grammar=Grammar(), end=r'%(tag)s'), RegionRule(r'string', r'(?P<tag>["\'])', Grammar, r'%(tag)s'),
PatternRule(name=r'namespace', pattern=r'[a-zA-Z_]+:'), PatternRule(r'namespace', pattern=r'[a-zA-Z_]+:'),
PatternRule(name=r'attrname', pattern=r'[^ =>\n]+(?==)'), PatternRule(r'attrname', pattern=r'[^ =>\n]+(?==)'),
PatternRule(name=r'name', pattern=r'[^ =>\n]+'), PatternRule(r'name', pattern=r'[^ =>\n]+'),
] ]
class TemplateGrammar(Grammar): class TemplateGrammar(Grammar):
rules = [ rules = [
RegionRule(name=r'comment', start=r'<!--', grammar=Grammar(), end=r'-->'), RegionRule(r'comment', r'<!--', Grammar, r'-->'),
RegionRule(name=r'template', start=r'\[\%', grammar=Grammar(), end=r'%%\]'), RegionRule(r'template', r'\[\%', Grammar, r'%%\]'),
RegionRule(name=r'opentag', start=r'<', grammar=TagGrammar(), end=r'/?>'), RegionRule(r'opentag', r'<', TagGrammar, r'/?>'),
PatternRule(name=r'closetag', pattern=r'< */ *[ =>\n]+ *>'), PatternRule(r'closetag', pattern=r'< */ *[ =>\n]+ *>'),
] ]
class Template(mode2.Fundamental): class Template(mode2.Fundamental):

View File

@ -1,5 +1,10 @@
import re import re
# lexing
reserved_token_names = re.compile(r'^(?:rules|null|start|end|middle[0-9]*)$')
valid_token_name = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
middle_token_name = re.compile(r'^middle([0-9]+)$')
# meta regexes # meta regexes
meta_chars = re.compile(r'([\.\^\$\*\+\?\{\}\(\)\[\]\|\"\'\\,])') meta_chars = re.compile(r'([\.\^\$\*\+\?\{\}\(\)\[\]\|\"\'\\,])')