parent
3b055e897a
commit
83ef208a0f
41
lex2.py
41
lex2.py
|
@ -23,7 +23,7 @@ class Token(object):
|
|||
|
||||
class Rule:
|
||||
name = 'abstract'
|
||||
def match(self, lexer, context=[]):
|
||||
def match(self, lexer, context=[], d={}):
|
||||
raise Exception, "%s rule cannot match!" % self.name
|
||||
def make_token(self, lexer, s, name, **vargs):
|
||||
return Token(name, lexer.y, lexer.x, s, **vargs)
|
||||
|
@ -34,7 +34,7 @@ class ConstantRule(Rule):
|
|||
assert name not in reserved_names, "reserved rule name: %r" % name
|
||||
self.name = name
|
||||
self.constant = constant
|
||||
def match(self, lexer, context=[]):
|
||||
def match(self, lexer, context=[], d={}):
|
||||
if lexer.lines[lexer.y][lexer.x:].startswith(self.constant):
|
||||
name = '.'.join(context + [self.name])
|
||||
lexer.add_token(self.make_token(lexer, self.constant, name))
|
||||
|
@ -50,7 +50,7 @@ class PatternRule(Rule):
|
|||
self.name = name
|
||||
self.pattern = pattern
|
||||
self.re = re.compile(pattern)
|
||||
def match(self, lexer, context=[]):
|
||||
def match(self, lexer, context=[], d={}):
|
||||
m = self.re.match(lexer.lines[lexer.y], lexer.x)
|
||||
if m:
|
||||
name = '.'.join(context + [self.name])
|
||||
|
@ -60,6 +60,28 @@ class PatternRule(Rule):
|
|||
else:
|
||||
return False
|
||||
|
||||
class ContextPatternRule(Rule):
|
||||
def __init__(self, name, pattern, fallback):
|
||||
assert valid_name_re.match(name), 'invalid name %r' % name
|
||||
assert name not in reserved_names, "reserved rule name: %r" % name
|
||||
self.name = name
|
||||
self.pattern = pattern
|
||||
self.fallback = fallback
|
||||
self.fallback_re = re.compile(fallback)
|
||||
def match(self, lexer, context=[], d={}):
|
||||
try:
|
||||
r = re.compile(self.pattern % d)
|
||||
except KeyError:
|
||||
r = self.fallback_re
|
||||
m = r.match(lexer.lines[lexer.y], lexer.x)
|
||||
if m:
|
||||
name = '.'.join(context + [self.name])
|
||||
lexer.add_token(self.make_token(lexer, m.group(0), name))
|
||||
lexer.x += len(m.group(0))
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
class RegionRule(Rule):
|
||||
def __init__(self, name, start, grammar, end):
|
||||
assert valid_name_re.match(name), 'invalid name %r' % name
|
||||
|
@ -74,11 +96,12 @@ class RegionRule(Rule):
|
|||
t = self.make_token(lexer, m.group(0), t_name)
|
||||
lexer.add_token(t)
|
||||
lexer.x += len(m.group(0))
|
||||
def match(self, lexer, context=[]):
|
||||
def match(self, lexer, context=[], d={}):
|
||||
m = self.start_re.match(lexer.lines[lexer.y], lexer.x)
|
||||
# see if we can match out start token
|
||||
if m:
|
||||
# ok, so create our start token, and get ready to start reading data
|
||||
d = m.groupdict()
|
||||
self._add_from_regex(context, 'start', lexer, m)
|
||||
null_t_name = '.'.join(context + [self.name, 'null'])
|
||||
null_t = None
|
||||
|
@ -87,7 +110,7 @@ class RegionRule(Rule):
|
|||
# reference named groups from the start token. if we have no end,
|
||||
# well, then, we're never getting out of here alive!
|
||||
if self.end:
|
||||
end_re = re.compile(self.end % m.groupdict())
|
||||
end_re = re.compile(self.end % d)
|
||||
|
||||
# ok, so as long as we aren't done (we haven't found an end token),
|
||||
# keep reading input
|
||||
|
@ -117,7 +140,7 @@ class RegionRule(Rule):
|
|||
# find a token, note that we found one and exit the loop
|
||||
found = False
|
||||
for rule in self.grammar.rules:
|
||||
if rule.match(lexer, context + [self.name]):
|
||||
if rule.match(lexer, context + [self.name], d):
|
||||
found = True
|
||||
null_t = None
|
||||
break
|
||||
|
@ -166,7 +189,7 @@ class DualRegionRule(Rule):
|
|||
t = self.make_token(lexer, m.group(0), t_name)
|
||||
lexer.add_token(t)
|
||||
lexer.x += len(m.group(0))
|
||||
def match(self, lexer, context=[]):
|
||||
def match(self, lexer, context=[], d={}):
|
||||
m1 = self.start_re.match(lexer.lines[lexer.y], lexer.x)
|
||||
# see if we can match out start token
|
||||
if m1:
|
||||
|
@ -208,7 +231,7 @@ class DualRegionRule(Rule):
|
|||
# find a token, note that we found one and exit the loop
|
||||
found = False
|
||||
for rule in self.grammar1.rules:
|
||||
if rule.match(lexer, context + [self.name]):
|
||||
if rule.match(lexer, context + [self.name], d1):
|
||||
found = True
|
||||
null_t = None
|
||||
break
|
||||
|
@ -267,7 +290,7 @@ class DualRegionRule(Rule):
|
|||
# find a token, note that we found one and exit the loop
|
||||
found = False
|
||||
for rule in self.grammar2.rules:
|
||||
if rule.match(lexer, context + [self.name]):
|
||||
if rule.match(lexer, context + [self.name], d3):
|
||||
found = True
|
||||
null_t = None
|
||||
break
|
||||
|
|
50
lex2_perl.py
50
lex2_perl.py
|
@ -1,4 +1,4 @@
|
|||
from lex2 import Grammar, ConstantRule, PatternRule, RegionRule, DualRegionRule
|
||||
from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
|
||||
|
||||
class PodGrammar(Grammar):
|
||||
rules = [
|
||||
|
@ -34,17 +34,14 @@ class StringGrammar(Grammar):
|
|||
name=r'hash_deref',
|
||||
pattern=r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*(?:->{(?:[a-zA-Z_][a-zA-Z_0-9]*|'(?:\\.|[^'\\])*'|\"(\\.|[^\\\"])*\")})+",
|
||||
),
|
||||
#PatternRule(
|
||||
# name=r'hash_bareword_index',
|
||||
# pattern=r'(?<={) *[A-Za-z0-9_]+(?=})',
|
||||
#),
|
||||
PatternRule(
|
||||
name=r'length_scalar',
|
||||
pattern=r"\$#[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*",
|
||||
),
|
||||
PatternRule(
|
||||
ContextPatternRule(
|
||||
name=r'system_scalar',
|
||||
pattern=r"\$[][><ab/'\"_@\?#\$!%^|&*()](?![A-Za-z0-9_])",
|
||||
pattern=r"\$[^A-Za-z0-9 %(delim)s](?![A-Za-z0-9_])",
|
||||
fallback=r"\$[^A-Za-z0-9 ](?![A-Za-z0-9_])",
|
||||
),
|
||||
PatternRule(
|
||||
name=r'system_array',
|
||||
|
@ -186,31 +183,43 @@ class PerlGrammar(Grammar):
|
|||
grammar=Grammar(),
|
||||
end=r'\]',
|
||||
),
|
||||
|
||||
# match regexes
|
||||
RegionRule(
|
||||
name=r'implicit_match_regex',
|
||||
start=r'(?:(?<==~)|(?<=!~)|(?<=\()) */',
|
||||
name=r'match_regex1',
|
||||
start=r'(?:(?<==~)|(?<=!~)|(?<=\()) *(?P<delim>/)',
|
||||
grammar=StringGrammar(),
|
||||
end=r'/',
|
||||
end=r'/[a-z]*',
|
||||
),
|
||||
RegionRule(
|
||||
name=r'explicit_match_regex1',
|
||||
name=r'match_regex2',
|
||||
start=r'm *(?P<delim>[^ #a-zA-Z0-9_])',
|
||||
grammar=StringGrammar(),
|
||||
end=r'%(delim)s',
|
||||
end=r'%(delim)s[a-z]*',
|
||||
),
|
||||
RegionRule(
|
||||
name=r'explicit_match_regex1',
|
||||
start=r'm#',
|
||||
name=r'match_regex3',
|
||||
start=r'm(?P<delim>#)',
|
||||
grammar=StringGrammar(),
|
||||
end=r'#',
|
||||
end=r'#[a-z]*',
|
||||
),
|
||||
|
||||
# replace regexes
|
||||
DualRegionRule(
|
||||
name=r'replace_regex1',
|
||||
start=r's *(?P<delim>[^ a-zA-Z0-9_])',
|
||||
grammar1=StringGrammar(),
|
||||
middle=r'%(delim)s',
|
||||
grammar2=StringGrammar(),
|
||||
end=r'%(delim)s[a-z]*',
|
||||
),
|
||||
DualRegionRule(
|
||||
name=r'replace_regex',
|
||||
start=r's */',
|
||||
name=r'replace_regex2',
|
||||
start=r's#',
|
||||
grammar1=StringGrammar(),
|
||||
middle=r' */ *',
|
||||
middle=r'#',
|
||||
grammar2=StringGrammar(),
|
||||
end=r'/ *[a-z]*',
|
||||
end=r'#[a-z]*',
|
||||
),
|
||||
|
||||
PatternRule(
|
||||
|
@ -219,7 +228,8 @@ class PerlGrammar(Grammar):
|
|||
),
|
||||
PatternRule(
|
||||
name=r'sub',
|
||||
pattern=r"(?<=sub )[a-zA-Z_][a-zA-Z_0-9]*(?= *{)",
|
||||
#pattern=r"(?<=sub )[a-zA-Z_][a-zA-Z_0-9]*(?= *{)",
|
||||
pattern=r"(?<=sub )[a-zA-Z_][a-zA-Z_0-9]*",
|
||||
),
|
||||
PatternRule(
|
||||
name=r'use',
|
||||
|
|
2
test2.py
2
test2.py
|
@ -15,4 +15,4 @@ for path in paths:
|
|||
lexer.lex(lines)
|
||||
print path
|
||||
for token in lexer:
|
||||
print '%-28s| %s' % (token.name, token.string)
|
||||
print '%-28s| %r' % (token.name, token.string)
|
||||
|
|
44
test3.py
44
test3.py
|
@ -17,12 +17,17 @@ for i in range(0, len(color_list)):
|
|||
color_dict[color_names[i]] = color_list[i]
|
||||
|
||||
token_colors = {
|
||||
'escaped': 'lpurple',
|
||||
'null': 'white',
|
||||
'delimiter': 'white',
|
||||
'pod.start': 'lred',
|
||||
'pod.null': 'lred',
|
||||
'pod.end': 'lred',
|
||||
'pod.header': 'lpurple',
|
||||
'pod.indent_level': 'lpurple',
|
||||
'pod.item_entry': 'lpurple',
|
||||
'pod.format': 'lpurple',
|
||||
'pod.encoding_type': 'lpurple',
|
||||
'sub': 'lcyan',
|
||||
'number': 'white',
|
||||
'operator': 'white',
|
||||
|
@ -54,12 +59,39 @@ token_colors = {
|
|||
'array': 'yellow',
|
||||
'hash': 'yellow',
|
||||
'bareword_hash_index': 'lgreen',
|
||||
'quoted_region': 'lcyan',
|
||||
'match_regex': 'lcyan',
|
||||
'replace_regex.start': 'lcyan',
|
||||
'replace_regex.middle': 'lcyan',
|
||||
'replace_regex.end': 'lcyan',
|
||||
'replace_regex.null': 'lcyan',
|
||||
|
||||
# quoted region
|
||||
'quoted_region1': 'lcyan',
|
||||
'quoted_region1.start': 'lcyan',
|
||||
'quoted_region1.null': 'lcyan',
|
||||
'quoted_region1.end': 'lcyan',
|
||||
'quoted_region2': 'lcyan',
|
||||
'quoted_region2.start': 'lcyan',
|
||||
'quoted_region2.null': 'lcyan',
|
||||
'quoted_region2.end': 'lcyan',
|
||||
|
||||
# match regex
|
||||
'match_regex1.start': 'lcyan',
|
||||
'match_regex1.end': 'lcyan',
|
||||
'match_regex1.null': 'lcyan',
|
||||
'match_regex2.start': 'lcyan',
|
||||
'match_regex2.end': 'lcyan',
|
||||
'match_regex2.null': 'lcyan',
|
||||
'match_regex3.start': 'lcyan',
|
||||
'match_regex3.end': 'lcyan',
|
||||
'match_regex3.null': 'lcyan',
|
||||
|
||||
# replace regex
|
||||
'replace_regex1.start': 'lcyan',
|
||||
'replace_regex1.middle': 'lcyan',
|
||||
'replace_regex1.end': 'lcyan',
|
||||
'replace_regex1.null': 'lcyan',
|
||||
'replace_regex2.start': 'lcyan',
|
||||
'replace_regex2.middle': 'lcyan',
|
||||
'replace_regex2.end': 'lcyan',
|
||||
'replace_regex2.null': 'lcyan',
|
||||
|
||||
#
|
||||
'bareword_hash_key': 'lgreen',
|
||||
'interpolated_scalar': 'yellow',
|
||||
'interpolated_system_scalar': 'yellow',
|
||||
|
|
Loading…
Reference in New Issue