fixes for perl highlighting

--HG--
branch : pmacs2
This commit is contained in:
moculus 2007-04-01 04:54:52 +00:00
parent 3b055e897a
commit 83ef208a0f
4 changed files with 103 additions and 38 deletions

41
lex2.py
View File

@ -23,7 +23,7 @@ class Token(object):
class Rule:
name = 'abstract'
def match(self, lexer, context=[]):
def match(self, lexer, context=[], d={}):
raise Exception, "%s rule cannot match!" % self.name
def make_token(self, lexer, s, name, **vargs):
return Token(name, lexer.y, lexer.x, s, **vargs)
@ -34,7 +34,7 @@ class ConstantRule(Rule):
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
self.constant = constant
def match(self, lexer, context=[]):
def match(self, lexer, context=[], d={}):
if lexer.lines[lexer.y][lexer.x:].startswith(self.constant):
name = '.'.join(context + [self.name])
lexer.add_token(self.make_token(lexer, self.constant, name))
@ -50,7 +50,7 @@ class PatternRule(Rule):
self.name = name
self.pattern = pattern
self.re = re.compile(pattern)
def match(self, lexer, context=[]):
def match(self, lexer, context=[], d={}):
m = self.re.match(lexer.lines[lexer.y], lexer.x)
if m:
name = '.'.join(context + [self.name])
@ -60,6 +60,28 @@ class PatternRule(Rule):
else:
return False
class ContextPatternRule(Rule):
def __init__(self, name, pattern, fallback):
assert valid_name_re.match(name), 'invalid name %r' % name
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
self.pattern = pattern
self.fallback = fallback
self.fallback_re = re.compile(fallback)
def match(self, lexer, context=[], d={}):
try:
r = re.compile(self.pattern % d)
except KeyError:
r = self.fallback_re
m = r.match(lexer.lines[lexer.y], lexer.x)
if m:
name = '.'.join(context + [self.name])
lexer.add_token(self.make_token(lexer, m.group(0), name))
lexer.x += len(m.group(0))
return True
else:
return False
class RegionRule(Rule):
def __init__(self, name, start, grammar, end):
assert valid_name_re.match(name), 'invalid name %r' % name
@ -74,11 +96,12 @@ class RegionRule(Rule):
t = self.make_token(lexer, m.group(0), t_name)
lexer.add_token(t)
lexer.x += len(m.group(0))
def match(self, lexer, context=[]):
def match(self, lexer, context=[], d={}):
m = self.start_re.match(lexer.lines[lexer.y], lexer.x)
# see if we can match out start token
if m:
# ok, so create our start token, and get ready to start reading data
d = m.groupdict()
self._add_from_regex(context, 'start', lexer, m)
null_t_name = '.'.join(context + [self.name, 'null'])
null_t = None
@ -87,7 +110,7 @@ class RegionRule(Rule):
# reference named groups from the start token. if we have no end,
# well, then, we're never getting out of here alive!
if self.end:
end_re = re.compile(self.end % m.groupdict())
end_re = re.compile(self.end % d)
# ok, so as long as we aren't done (we haven't found an end token),
# keep reading input
@ -117,7 +140,7 @@ class RegionRule(Rule):
# find a token, note that we found one and exit the loop
found = False
for rule in self.grammar.rules:
if rule.match(lexer, context + [self.name]):
if rule.match(lexer, context + [self.name], d):
found = True
null_t = None
break
@ -166,7 +189,7 @@ class DualRegionRule(Rule):
t = self.make_token(lexer, m.group(0), t_name)
lexer.add_token(t)
lexer.x += len(m.group(0))
def match(self, lexer, context=[]):
def match(self, lexer, context=[], d={}):
m1 = self.start_re.match(lexer.lines[lexer.y], lexer.x)
# see if we can match out start token
if m1:
@ -208,7 +231,7 @@ class DualRegionRule(Rule):
# find a token, note that we found one and exit the loop
found = False
for rule in self.grammar1.rules:
if rule.match(lexer, context + [self.name]):
if rule.match(lexer, context + [self.name], d1):
found = True
null_t = None
break
@ -267,7 +290,7 @@ class DualRegionRule(Rule):
# find a token, note that we found one and exit the loop
found = False
for rule in self.grammar2.rules:
if rule.match(lexer, context + [self.name]):
if rule.match(lexer, context + [self.name], d3):
found = True
null_t = None
break

View File

@ -1,4 +1,4 @@
from lex2 import Grammar, ConstantRule, PatternRule, RegionRule, DualRegionRule
from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
class PodGrammar(Grammar):
rules = [
@ -34,17 +34,14 @@ class StringGrammar(Grammar):
name=r'hash_deref',
pattern=r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*(?:->{(?:[a-zA-Z_][a-zA-Z_0-9]*|'(?:\\.|[^'\\])*'|\"(\\.|[^\\\"])*\")})+",
),
#PatternRule(
# name=r'hash_bareword_index',
# pattern=r'(?<={) *[A-Za-z0-9_]+(?=})',
#),
PatternRule(
name=r'length_scalar',
pattern=r"\$#[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*",
),
PatternRule(
ContextPatternRule(
name=r'system_scalar',
pattern=r"\$[][><ab/'\"_@\?#\$!%^|&*()](?![A-Za-z0-9_])",
pattern=r"\$[^A-Za-z0-9 %(delim)s](?![A-Za-z0-9_])",
fallback=r"\$[^A-Za-z0-9 ](?![A-Za-z0-9_])",
),
PatternRule(
name=r'system_array',
@ -186,31 +183,43 @@ class PerlGrammar(Grammar):
grammar=Grammar(),
end=r'\]',
),
# match regexes
RegionRule(
name=r'implicit_match_regex',
start=r'(?:(?<==~)|(?<=!~)|(?<=\()) */',
name=r'match_regex1',
start=r'(?:(?<==~)|(?<=!~)|(?<=\()) *(?P<delim>/)',
grammar=StringGrammar(),
end=r'/',
end=r'/[a-z]*',
),
RegionRule(
name=r'explicit_match_regex1',
name=r'match_regex2',
start=r'm *(?P<delim>[^ #a-zA-Z0-9_])',
grammar=StringGrammar(),
end=r'%(delim)s',
end=r'%(delim)s[a-z]*',
),
RegionRule(
name=r'explicit_match_regex1',
start=r'm#',
name=r'match_regex3',
start=r'm(?P<delim>#)',
grammar=StringGrammar(),
end=r'#',
end=r'#[a-z]*',
),
# replace regexes
DualRegionRule(
name=r'replace_regex1',
start=r's *(?P<delim>[^ a-zA-Z0-9_])',
grammar1=StringGrammar(),
middle=r'%(delim)s',
grammar2=StringGrammar(),
end=r'%(delim)s[a-z]*',
),
DualRegionRule(
name=r'replace_regex',
start=r's */',
name=r'replace_regex2',
start=r's#',
grammar1=StringGrammar(),
middle=r' */ *',
middle=r'#',
grammar2=StringGrammar(),
end=r'/ *[a-z]*',
end=r'#[a-z]*',
),
PatternRule(
@ -219,7 +228,8 @@ class PerlGrammar(Grammar):
),
PatternRule(
name=r'sub',
pattern=r"(?<=sub )[a-zA-Z_][a-zA-Z_0-9]*(?= *{)",
#pattern=r"(?<=sub )[a-zA-Z_][a-zA-Z_0-9]*(?= *{)",
pattern=r"(?<=sub )[a-zA-Z_][a-zA-Z_0-9]*",
),
PatternRule(
name=r'use',

View File

@ -15,4 +15,4 @@ for path in paths:
lexer.lex(lines)
print path
for token in lexer:
print '%-28s| %s' % (token.name, token.string)
print '%-28s| %r' % (token.name, token.string)

View File

@ -17,12 +17,17 @@ for i in range(0, len(color_list)):
color_dict[color_names[i]] = color_list[i]
token_colors = {
'escaped': 'lpurple',
'null': 'white',
'delimiter': 'white',
'pod.start': 'lred',
'pod.null': 'lred',
'pod.end': 'lred',
'pod.header': 'lpurple',
'pod.indent_level': 'lpurple',
'pod.item_entry': 'lpurple',
'pod.format': 'lpurple',
'pod.encoding_type': 'lpurple',
'sub': 'lcyan',
'number': 'white',
'operator': 'white',
@ -54,12 +59,39 @@ token_colors = {
'array': 'yellow',
'hash': 'yellow',
'bareword_hash_index': 'lgreen',
'quoted_region': 'lcyan',
'match_regex': 'lcyan',
'replace_regex.start': 'lcyan',
'replace_regex.middle': 'lcyan',
'replace_regex.end': 'lcyan',
'replace_regex.null': 'lcyan',
# quoted region
'quoted_region1': 'lcyan',
'quoted_region1.start': 'lcyan',
'quoted_region1.null': 'lcyan',
'quoted_region1.end': 'lcyan',
'quoted_region2': 'lcyan',
'quoted_region2.start': 'lcyan',
'quoted_region2.null': 'lcyan',
'quoted_region2.end': 'lcyan',
# match regex
'match_regex1.start': 'lcyan',
'match_regex1.end': 'lcyan',
'match_regex1.null': 'lcyan',
'match_regex2.start': 'lcyan',
'match_regex2.end': 'lcyan',
'match_regex2.null': 'lcyan',
'match_regex3.start': 'lcyan',
'match_regex3.end': 'lcyan',
'match_regex3.null': 'lcyan',
# replace regex
'replace_regex1.start': 'lcyan',
'replace_regex1.middle': 'lcyan',
'replace_regex1.end': 'lcyan',
'replace_regex1.null': 'lcyan',
'replace_regex2.start': 'lcyan',
'replace_regex2.middle': 'lcyan',
'replace_regex2.end': 'lcyan',
'replace_regex2.null': 'lcyan',
#
'bareword_hash_key': 'lgreen',
'interpolated_scalar': 'yellow',
'interpolated_system_scalar': 'yellow',