208 lines
9.5 KiB
Python
Executable File
208 lines
9.5 KiB
Python
Executable File
#!/bin/env python
|
|
|
|
# 2.3 imports
|
|
from optparse import OptionParser
|
|
|
|
# our imports
|
|
import lex
|
|
|
|
# this will support perl's string interpolation; but, it can be slower and also
|
|
# possibly buggier
|
|
INTERPOLATION_HIGHLIGHTING = False
|
|
#INTERPOLATION_HIGHLIGHTING = True
|
|
|
|
class PerlGrammar(lex.Grammar):
|
|
GRAMMAR_LIST = [
|
|
{'name': 'heredoc',
|
|
'expr': r"""<< *([a-zA-Z0-9_]+) *;(?:.*?\n)*?(?:\1|$)""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'endblock',
|
|
'expr': r"""(?:^|\n)(?:__END__|__DATA__)(?:.|\n)*$""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'pod',
|
|
'expr': r"""(?:^|(?<=\n))=[a-zA-Z0-9_]+.*(?:\n(?!=cut).*)*(?:\n=cut|$)""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': "comment",
|
|
'expr': r'[#].*(?:\n|$)',
|
|
'action': lex.make_token},
|
|
|
|
{'name': "string1",
|
|
'expr': r'''"(?:\\(?:.|\n)|[^\\"]|[ \n])*(?:"|.?$)''',
|
|
'action': lex.make_token},
|
|
|
|
{'name': "string2",
|
|
'expr': r"""'(?:\\(?:.|\n)|[^\\'])*(?:'|.?$)""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': "evalstring",
|
|
'expr': r"""`(?:\\(?:.|\n)|[^\\`])*(?:`|.?$)""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'number',
|
|
'expr': r"""0?\.[0-9]+|[0-9]+(?:\.[0-9]+)?""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'label',
|
|
'expr': r"""[a-zA-Z_][a-zA-Z0-9_]*:(?= |\n)""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'keyword',
|
|
'expr': r"""(?<!->)(?:STDIN|STDERR|STDOUT|and|cmp|continue|do|else|elsif|eq|eval|foreach|for|if|last|my|next|ne|not|or|our|package|require|return|sub|undef|unless|until|use|while)(?![a-zA-Z_])""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'hash bareword index',
|
|
'expr': r"(?<={)[A-Za-z0-9_]+(?=})",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'literal hash bareword index',
|
|
'expr': r"[A-Za-z0-9_]+(?= *=>)",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'length scalar',
|
|
'expr': r"\$#[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'system scalar',
|
|
'expr': r"\$[][><ab/'\"_@\?#\$!%^|&*()](?![A-Za-z0-9_])",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'system array',
|
|
'expr': r"@_",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'scalar',
|
|
'expr': r"""\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'array',
|
|
'expr': r"""@\$*[A-Za-z_](?:[A-Za-z0-9_]|::)*""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'hash',
|
|
'expr': r"""%\$*[A-Za-z_](?:[A-Za-z0-9_]|::)*""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'dereference',
|
|
'expr': r"""[@%\$&\*](?={)""",
|
|
'action': lex.make_token},
|
|
|
|
# this isn't totally right but it handle's q//, q{} and q() which are
|
|
# the commonest
|
|
{'name': 'quoted region',
|
|
'expr': r"""q.\((?:\\.|[^\\\)])*\)|q./(?:\\.|[^\\/])*/|q.\{(?:\\.|[^\\\}])*\}""",
|
|
'action': lex.make_token},
|
|
|
|
# match regexes are annoying: the basic gist is easy, but all the perl
|
|
# crap sucks. if the m is not present, you have to use / as the
|
|
# delimiter. otherwise, you can use any non-alphanumeric-or-whitespace
|
|
# character. if you use <, (, [, or {, you close with the opposite kind
|
|
# of thing. we have to special-case those last 4. ugh.
|
|
#
|
|
# basic gist: /(\\.|[^\\])*?/[a-z]*
|
|
{'name': 'match regex',
|
|
'expr': r"""(?:(?<==~)|(?<=!~)|(?<=\()) */(?:\\.|[^\\/])*/[a-z]*|m([^<[{(A-Za-z0-9 \t\n])(?:\\.|[^\\])*?\1[a-z]*|m\((?:\\.|[^\\])*?\)[a-z]*|m{(?:\\.|[^\\])*?}[a-z]*|m<(?:\\.|[^\\])*?>[a-z]*|m\[(?:\\.|[^\\])*?\][a-z]*""",
|
|
'action': lex.make_token},
|
|
|
|
# we officially don't support the bullshit s{a}{b} thing perl has going.
|
|
# those guys are on crack. we only support things like s#a#b# or s/a/b/.
|
|
# same comments as above apply
|
|
{'name': 'replace regex',
|
|
'expr': r"""(?:y|tr|s)([^<[{(A-Za-z0-9 \t\n])(?:\\.|[^\\])*?\1(?:\\.|[^\\])*?\1[a-z]*""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'package',
|
|
'expr': r"""(?<=package )(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'use',
|
|
'expr': r"""(?<=use )(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'instance method',
|
|
'expr': r"""(?<=->)[a-zA-Z_][a-zA-Z_0-9]*""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'static method',
|
|
'expr': r"""&?(?:[a-zA-Z_][a-zA-Z_0-9]*::)+[a-zA-Z_][a-zA-Z_0-9]*""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'method declaration',
|
|
'expr': r"""(?<=sub )[a-zA-Z_][a-zA-Z_0-9]*(?=[ \n]*{)""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'built-in method',
|
|
'expr': r"""(?<!->)&?(?:write|warn|wantarray|waitpid|wait|vec|values|utime|use|untie|unshift|unpack|unlink|undef|umask|ucfirst|uc|truncate|times|time|tied|tie|telldir|tell|syswrite|system|sysseek|sysread|sysopen|syscall|symlink|substr|sub|study|stat|srand|sqrt|sprintf|split|splice|sort|socketpair|socket|sleep|sin|shutdown|shmwrite|shmread|shmget|shmctl|shift|setsockopt|setservent|setpwent|setprotoent|setpriority|setpgrp|setnetent|sethostent|setgrent|send|semop|semget|semctl|select|seekdir|seek|scalar|rmdir|rindex|rewinddir|reverse|return|reset|require|rename|ref|redo|recv|readpipe|readlink|readline|readdir|read|rand|quotemeta|push|prototype|printf|print|pos|pop|pipe|package|pack|our|ord|opendir|open|oct|no|next|my|msgsnd|msgrcv|msgget|msgctl|mkdir|map|lstat|log|lock|localtime|local|listen|link|length|lcfirst|lc|last|kill|keys|join|ioctl|int|index|import|hex|grep|goto|gmtime|glob|getsockopt|getsockname|getservent|getservbyport|getservbyname|getpwuid|getpwnam|getpwent|getprotoent|getprotobynumber|getprotobyname|getpriority|getppid|getpgrp|getpeername|getnetent|getnetbyname|getnetbyaddr|getlogin|gethostent|gethostbyname|gethostbyaddr|getgrnam|getgrgid|getgrent|getc|formline|format|fork|flock|fileno|fcntl|exp|exit|exists|exec|eval|eof|endservent|endpwent|endprotoent|endnetent|endhostent|endgrent|each|dump|do|die|delete|defined|dbmopen|dbmclose|crypt|cos|continue|connect|closedir|close|chroot|chr|chown|chop|chomp|chmod|chdir|caller|bless|binmode|bind|atan2|alarm|accept|abs)(?![a-zA-Z0-9_])""",
|
|
#'expr':r"""(?<!->)&?(?:abs|accept|alarm|atan2|bind|binmode|bless|caller|chdir|chmod|chomp|chop|chown|chroot|chr|closedir|close|connect|cos|crypt|dbmclose|dbmopen|defined|delete|die|dump|each|eof|exec|exists|exit|exp|fcntl|fileno|flock|fork|format|formline|getc|getlogin|getpeername|grep|int|join|keys|lc|map|open|pop|print|push|rand|readdir|ref|scalar|select|shift|sort|split|srand|time|uc|unshift|values|wantarray|warn)(?![a-zA-Z0-9_])""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'method',
|
|
'expr': r"""&(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'methodref',
|
|
'expr': r"""&\$(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'bareword method',
|
|
'expr': r"""(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*(?=[ \n]*(?:\(|->))""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': "delimiter",
|
|
'expr': r"""\(|\)|\[|\]|{|}|,|;|=>|=|\?|(?<!:):(?!=:)""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': "unary operator",
|
|
'expr': r"""\+=|-=|\*=|/=|//=|%=|&=\|\^=|>>=|<<=|\*\*=""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': "operator",
|
|
'expr': r"""\+|<=>|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|=~|!~|!=|%|!|\.""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'bareword',
|
|
'expr': r"""(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*""",
|
|
'action': lex.make_token},
|
|
|
|
{'name': 'default',
|
|
'expr': r""".|\n""",
|
|
'action': lex.silent}
|
|
]
|
|
|
|
def _default_rules(self):
|
|
"""subclasses can override this to define defaults for a grammar"""
|
|
sub_exprs = {}
|
|
string_rules = []
|
|
|
|
for rdir in PerlGrammar.GRAMMAR_LIST:
|
|
self.add_rule(**rdir)
|
|
|
|
if INTERPOLATION_HIGHLIGHTING:
|
|
if rdir['name'] in ('scalar', 'system scalar', 'array', 'hash',
|
|
'system array'):
|
|
rdir2 = rdir.copy()
|
|
rdir2['name'] = 'interpolated ' + rdir['name']
|
|
string_rules.append(lex.Rule(**rdir2))
|
|
elif rdir['name'] in ('heredoc', 'string1', 'string2'):
|
|
sub_exprs[rdir['name']] = rdir['expr']
|
|
|
|
if INTERPOLATION_HIGHLIGHTING:
|
|
string_rules.append(lex.Rule(name="default string",
|
|
expr=r"""(?:\\.|[^\\\$]|\n)+|\$""",
|
|
action=lex.make_token))
|
|
string_grammar = lex.Grammar(rules=string_rules)
|
|
|
|
self.insert(0, lex.SubRule(name='heredoc',
|
|
expr=sub_exprs['heredoc'],
|
|
grammar=string_grammar))
|
|
|
|
self.insert(4, lex.SubRule(name="string1",
|
|
expr=sub_exprs['string1'],
|
|
grammar=string_grammar))
|
|
|
|
self.insert(5, lex.SubRule(name="string2",
|
|
expr=sub_exprs['string2'],
|
|
grammar=string_grammar))
|