From 5cf1a2843ebbe4f4be371073543b75f35adc28d1 Mon Sep 17 00:00:00 2001 From: moculus Date: Sun, 1 Apr 2007 05:35:10 +0000 Subject: [PATCH] perl perl perl --HG-- branch : pmacs2 --- lex2_perl.py | 139 ++++++++++++++++++++++++++++---------------- test3.py | 159 +++++++++++++++++++++++---------------------------- 2 files changed, 162 insertions(+), 136 deletions(-) diff --git a/lex2_perl.py b/lex2_perl.py index f144d08..1a2e2b3 100755 --- a/lex2_perl.py +++ b/lex2_perl.py @@ -3,23 +3,23 @@ from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, RegionR class PodGrammar(Grammar): rules = [ PatternRule( - name=r'header', + name=r'entry', pattern=r'(?<=^=head[1-4]) +.*$', ), PatternRule( - name=r'indent_level', + name=r'entry', pattern=r'(?<=^=over) +.*$', ), PatternRule( - name=r'item_entry', + name=r'entry', pattern=r'(?<=^=item) +.*$', ), PatternRule( - name=r'format', + name=r'entry', pattern=r'(?:(?<=^=begin)|(?<=^=end)) +.*$', ), PatternRule( - name=r'encoding_type', + name=r'entry', pattern=r'(?<=^=encoding) +.*$', ), ] @@ -31,20 +31,20 @@ class StringGrammar(Grammar): pattern=r'\\.', ), PatternRule( - name=r'hash_deref', - pattern=r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*(?:->{(?:[a-zA-Z_][a-zA-Z_0-9]*|'(?:\\.|[^'\\])*'|\"(\\.|[^\\\"])*\")})+", + name=r'deref', + pattern=r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*(?:->{\$?(?:[a-zA-Z_][a-zA-Z_0-9]*|'(?:\\.|[^'\\])*'|\"(\\.|[^\\\"])*\")}|->\[\$?[0-9a-zA-Z_]+\])+", ), PatternRule( - name=r'length_scalar', + name=r'length', pattern=r"\$#[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*", ), ContextPatternRule( - name=r'system_scalar', + name=r'scalar', pattern=r"\$[^A-Za-z0-9 %(delim)s](?![A-Za-z0-9_])", fallback=r"\$[^A-Za-z0-9 ](?![A-Za-z0-9_])", ), PatternRule( - name=r'system_array', + name=r'array', pattern=r"@_", ), PatternRule( @@ -63,12 +63,33 @@ class StringGrammar(Grammar): class PerlGrammar(Grammar): rules = [ + # heredocs RegionRule( - name=r'heredoc', - start=r"<< *(?P[a-zA-Z0-9_]+) *;", + name=r'heredoc1', + start=r"<<(?P[a-zA-Z0-9_]+) *;", grammar=StringGrammar(), end=r'^%(heredoc)s$', ), + RegionRule( + name=r'heredoc1', + start=r"<< *\"(?P[a-zA-Z0-9_]+)\" *;", + grammar=StringGrammar(), + end=r'^%(heredoc)s$', + ), + RegionRule( + name=r'heredoc2', + start=r"<< *'(?P[a-zA-Z0-9_]+)' *;", + grammar=Grammar(), + end=r'^%(heredoc)s$', + ), + RegionRule( + name=r'eval_heredoc', + start=r"<< *`(?P[a-zA-Z0-9_]+)` *;", + grammar=StringGrammar(), + end=r'^%(heredoc)s$', + ), + + # end block RegionRule( name=r'endblock', start=r"^__END__|__DATA__ *$", @@ -112,25 +133,29 @@ class PerlGrammar(Grammar): pattern=r"(?)(?:STDIN|STDERR|STDOUT|and|cmp|continue|do|else|elsif|eq|eval|foreach|for|if|last|my|next|ne|not|or|our|package|require|return|sub|undef|unless|until|use|while)(?![a-zA-Z0-9_])", ), PatternRule( - name=r'bareword_hash_index', + name=r'hash_key', pattern=r'(?<={)[A-Za-z0-9_]+(?=})', ), PatternRule( - name=r'bareword_hash_key', + name=r'hash_key', pattern=r'[A-Za-z0-9_]+(?= *=>)', ), PatternRule( - name=r'length_scalar', + name=r'length', pattern=r"\$#[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*", ), PatternRule( - name=r'system_scalar', + name=r'scalar', pattern=r"\$[][> *\()", + ), PatternRule( name=r'scalar', pattern=r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*", @@ -144,41 +169,41 @@ class PerlGrammar(Grammar): pattern=r"%\$*[A-Za-z_](?:[A-Za-z0-9_]|::)*", ), PatternRule( - name=r'dereference', + name=r'deref', pattern=r"[@%\$&\*](?={)", ), RegionRule( - name=r'quoted_region1', + name=r'quoted', start=r'q[rqwx]? *(?P[^ #])', grammar=Grammar(), end=r'%(delim)s', ), RegionRule( - name=r'quoted_region2', + name=r'quoted', start=r'q[rqwx]?#', grammar=Grammar(), end=r'#', ), RegionRule( - name=r'bracket_quoted_region1', + name=r'quoted', start=r'q[rqwx]? *\(', grammar=Grammar(), end=r'\)', ), RegionRule( - name=r'bracket_quoted_region2', + name=r'quoted', start=r'q[rqwx]? *{', grammar=Grammar(), end=r'}', ), RegionRule( - name=r'bracket_quoted_region3', + name=r'quoted', start=r'q[rqwx]? *<', grammar=Grammar(), end=r'>', ), RegionRule( - name=r'bracket_quoted_region4', + name=r'quoted', start=r'q[rqwx]? *\[', grammar=Grammar(), end=r'\]', @@ -186,19 +211,19 @@ class PerlGrammar(Grammar): # match regexes RegionRule( - name=r'match_regex1', + name=r'match', start=r'(?:(?<==~)|(?<=!~)|(?<=\()) *(?P/)', grammar=StringGrammar(), end=r'/[a-z]*', ), RegionRule( - name=r'match_regex2', + name=r'match', start=r'm *(?P[^ #a-zA-Z0-9_])', grammar=StringGrammar(), end=r'%(delim)s[a-z]*', ), RegionRule( - name=r'match_regex3', + name=r'match', start=r'm(?P#)', grammar=StringGrammar(), end=r'#[a-z]*', @@ -206,7 +231,7 @@ class PerlGrammar(Grammar): # replace regexes DualRegionRule( - name=r'replace_regex1', + name=r'replace', start=r's *(?P[^ a-zA-Z0-9_])', grammar1=StringGrammar(), middle=r'%(delim)s', @@ -214,21 +239,39 @@ class PerlGrammar(Grammar): end=r'%(delim)s[a-z]*', ), DualRegionRule( - name=r'replace_regex2', - start=r's#', + name=r'replace', + start=r's(?P#)', grammar1=StringGrammar(), middle=r'#', grammar2=StringGrammar(), end=r'#[a-z]*', ), + # translate operator + DualRegionRule( + name=r'translate', + start=r'(?:y|tr) *(?P[^ a-zA-Z0-9_])', + grammar1=Grammar(), + middle=r'%(delim)s', + grammar2=Grammar(), + end=r'%(delim)s[a-z]*', + ), + DualRegionRule( + name=r'translate', + start=r'(?:y|tr)#', + grammar1=Grammar(), + middle=r'#', + grammar2=Grammar(), + end=r'#[a-z]*', + ), + + # some more basic stuff PatternRule( name=r'package', pattern=r"(?<=package )(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*", ), PatternRule( name=r'sub', - #pattern=r"(?<=sub )[a-zA-Z_][a-zA-Z_0-9]*(?= *{)", pattern=r"(?<=sub )[a-zA-Z_][a-zA-Z_0-9]*", ), PatternRule( @@ -240,33 +283,27 @@ class PerlGrammar(Grammar): pattern=r'[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', ), PatternRule( - name=r'instance_method', + name=r'method', pattern=r"(?<=->)[a-zA-Z_][a-zA-Z_0-9]*", ), PatternRule( - name=r'static_method', - pattern=r"&?(?:[a-zA-Z_][a-zA-Z_0-9]*::)+[a-zA-Z_][a-zA-Z_0-9]*", + name=r'function', + pattern=r"&\$*(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*", ), PatternRule( - name=r'builtin_method', + name=r'function', + pattern=r"(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*(?= *\()", + ), + PatternRule( + name=r'builtin', pattern=r"(?)&?(?:write|warn|wantarray|waitpid|wait|vec|values|utime|use|untie|unshift|unpack|unlink|undef|umask|ucfirst|uc|truncate|times|time|tied|tie|telldir|tell|syswrite|system|sysseek|sysread|sysopen|syscall|symlink|substr|sub|study|stat|srand|sqrt|sprintf|split|splice|sort|socketpair|socket|sleep|sin|shutdown|shmwrite|shmread|shmget|shmctl|shift|setsockopt|setservent|setpwent|setprotoent|setpriority|setpgrp|setnetent|sethostent|setgrent|send|semop|semget|semctl|select|seekdir|seek|scalar|rmdir|rindex|rewinddir|reverse|return|reset|require|rename|ref|redo|recv|readpipe|readlink|readline|readdir|read|rand|quotemeta|push|prototype|printf|print|pos|pop|pipe|package|pack|our|ord|opendir|open|oct|no|next|my|msgsnd|msgrcv|msgget|msgctl|mkdir|map|lstat|log|lock|localtime|local|listen|link|length|lcfirst|lc|last|kill|keys|join|ioctl|int|index|import|hex|grep|goto|gmtime|glob|getsockopt|getsockname|getservent|getservbyport|getservbyname|getpwuid|getpwnam|getpwent|getprotoent|getprotobynumber|getprotobyname|getpriority|getppid|getpgrp|getpeername|getnetent|getnetbyname|getnetbyaddr|getlogin|gethostent|gethostbyname|gethostbyaddr|getgrnam|getgrgid|getgrent|getc|formline|format|fork|flock|fileno|fcntl|exp|exit|exists|exec|eval|eof|endservent|endpwent|endprotoent|endnetent|endhostent|endgrent|each|dump|do|die|delete|defined|dbmopen|dbmclose|crypt|cos|continue|connect|closedir|close|chroot|chr|chown|chop|chomp|chmod|chdir|caller|bless|binmode|bind|atan2|alarm|accept|abs)(?![a-zA-Z0-9_])", ), PatternRule( - name=r'method', - pattern=r"&(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*", + name=r'class', + pattern=r"(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*", ), - PatternRule( - name=r'ref_method', - pattern=r"&\$(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*", - ), - PatternRule( - name=r'bareword_method', - pattern=r"(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*(?= *\()", - ), - #PatternRule( - # name=r'delimiter', - # pattern=r"\(|\)|\[|\]|{|}|,|;|->|=>|=|\?|(?|=>|=|\?|(?>=|<<=|\*\*=", ), PatternRule( diff --git a/test3.py b/test3.py index 4b5f86f..d6a9ea6 100644 --- a/test3.py +++ b/test3.py @@ -17,99 +17,86 @@ for i in range(0, len(color_list)): color_dict[color_names[i]] = color_list[i] token_colors = { - 'escaped': 'lpurple', - 'null': 'white', - 'delimiter': 'white', - 'pod.start': 'lred', - 'pod.null': 'lred', - 'pod.end': 'lred', - 'pod.header': 'lpurple', - 'pod.indent_level': 'lpurple', - 'pod.item_entry': 'lpurple', - 'pod.format': 'lpurple', - 'pod.encoding_type': 'lpurple', - 'sub': 'lcyan', - 'number': 'white', - 'operator': 'white', - 'heredoc': 'lgreen', - 'endblock': 'lred', - 'pod': 'lred', - 'comment': 'lred', - #'string1': 'lgreen', - 'string1.start': 'lgreen', - 'string1.null': 'lgreen', - 'string1.escaped': 'lpurple', - #'string1.scalar': 'yellow', - #'string1.system_scalar': 'yellow', - 'string1.hash_deref': 'yellow', - #'string1.hash_bareword_index': 'lgreen', - 'string1.end': 'lgreen', - #'string2': 'lgreen', - 'string2.start': 'lgreen', - 'string2.null': 'lgreen', - 'string2.end': 'lgreen', - 'evalstring': 'lcyan', - 'default_string': 'lgreen', - 'keyword': 'lpurple', - 'length_scalar': 'yellow', - 'system_scalar': 'yellow', - 'system_array': 'yellow', - 'scalar': 'yellow', - 'dereference': 'yellow', - 'array': 'yellow', - 'hash': 'yellow', - 'bareword_hash_index': 'lgreen', + # basic stuff + 'escaped': 'lpurple', + 'null': 'white', + 'delimiter': 'white', + 'sub': 'lcyan', + 'number': 'white', + 'operator': 'white', + 'endblock': 'lred', + 'keyword': 'lpurple', + 'scalar': 'yellow', + 'array': 'yellow', + 'deref': 'yellow', + 'hash': 'yellow', + 'hash_key': 'lgreen', + 'comment': 'lred', + 'function': 'lcyan', + 'builtin': 'lpurple', + 'method': 'lcyan', + 'bareword': 'white', + 'label': 'lcyan', + 'package': 'lcyan', + 'class': 'lcyan', + 'use': 'lcyan', + 'method': 'lcyan', + + # heredoc + 'heredoc1.start': 'lgreen', + 'heredoc1.null': 'lgreen', + 'heredoc1.end': 'lgreen', + 'heredoc2.start': 'lgreen', + 'heredoc2.null': 'lgreen', + 'heredoc2.end': 'lgreen', + 'eval_heredoc.start': 'lcyan', + 'eval_heredoc.null': 'lcyan', + 'eval_heredoc.end': 'lcyan', + + # pod + 'pod.start': 'lred', + 'pod.null': 'lred', + 'pod.entry': 'lpurple', + 'pod.end': 'lred', + + # "" strings + 'string1.start': 'lgreen', + 'string1.null': 'lgreen', + 'string1.escaped': 'lpurple', + 'string1.deref': 'yellow', + 'string1.end': 'lgreen', + + # '' strings + 'string2.start': 'lgreen', + 'string2.null': 'lgreen', + 'string2.end': 'lgreen', + + # `` strings + 'evalstring': 'lcyan', # quoted region - 'quoted_region1': 'lcyan', - 'quoted_region1.start': 'lcyan', - 'quoted_region1.null': 'lcyan', - 'quoted_region1.end': 'lcyan', - 'quoted_region2': 'lcyan', - 'quoted_region2.start': 'lcyan', - 'quoted_region2.null': 'lcyan', - 'quoted_region2.end': 'lcyan', + 'quoted': 'lcyan', + 'quoted.start': 'lcyan', + 'quoted.null': 'lcyan', + 'quoted.end': 'lcyan', # match regex - 'match_regex1.start': 'lcyan', - 'match_regex1.end': 'lcyan', - 'match_regex1.null': 'lcyan', - 'match_regex2.start': 'lcyan', - 'match_regex2.end': 'lcyan', - 'match_regex2.null': 'lcyan', - 'match_regex3.start': 'lcyan', - 'match_regex3.end': 'lcyan', - 'match_regex3.null': 'lcyan', + 'match.start': 'lcyan', + 'match.end': 'lcyan', + 'match.null': 'lcyan', # replace regex - 'replace_regex1.start': 'lcyan', - 'replace_regex1.middle': 'lcyan', - 'replace_regex1.end': 'lcyan', - 'replace_regex1.null': 'lcyan', - 'replace_regex2.start': 'lcyan', - 'replace_regex2.middle': 'lcyan', - 'replace_regex2.end': 'lcyan', - 'replace_regex2.null': 'lcyan', + 'replace.start': 'lcyan', + 'replace.middle': 'lcyan', + 'replace.end': 'lcyan', + 'replace.null': 'lcyan', + + # translate regex + 'translate.start': 'lpurple', + 'translate.middle': 'lpurple', + 'translate.end': 'lpurple', + 'translate.null': 'lpurple', - # - 'bareword_hash_key': 'lgreen', - 'interpolated_scalar': 'yellow', - 'interpolated_system_scalar': 'yellow', - 'interpolated_array': 'yellow', - 'interpolated_system_array': 'yellow', - 'interpolated_hash': 'yellow', - 'label': 'lcyan', - 'package': 'lcyan', - 'use': 'lcyan', - 'method': 'lcyan', - 'methodref': 'lcyan', - 'method_declaration': 'lcyan', - 'instance_method': 'lcyan', - 'static_method': 'lcyan', - 'builtin_method': 'lpurple', - 'bareword_method': 'lcyan', - #'bareword': 'yellow', - 'bizzaro': 'lpurple', } paths = sys.argv[1:]