removed redundant lex files

--HG--
branch : pmacs2
This commit is contained in:
moculus 2007-06-25 16:49:42 +00:00
parent dab07ad49f
commit 81915278d0
16 changed files with 0 additions and 1815 deletions

249
lex.py
View File

@ -1,249 +0,0 @@
#!/bin/env python
"""
lex - a lexer generator in python.
"""
__author__ = "Dan Williams (dan@osheim.org, dww4s@virginia.edu)"
__copyright__ = "2005"
# std imports
import os.path, re, sys, copy
# 2.3 imports
from optparse import OptionParser
# callbacks
def silent(rule, m, offset):
'''ignore a hit; return None'''
pass
def make_token(rule, m, offset):
'''return a token from a hit'''
return(Token(rule.name, m.start() + offset, m.end() + offset, m.group(0)))
class Token:
'''Used to store an instance of a lexical token'''
def __init__(self, name, start, end, s=None):
self.name = name
self.start = start
self.end = end
self.string = s
self.debug = False
def __repr__(self):
if len(self.string) < 10:
s = self.string
else:
s = self.string[:10] + "..."
return "<Token(%r, %d, %d, %r)>" % (self.name, self.start, self.end, s)
class Rule(object):
"""Defines a rule used by a lexer."""
def __init__(self, name="Unnamed", expr=r"(.|\n)", action=lambda x,y: None):
self.name = name
self.expr = expr
self.re = re.compile(self.expr)
self.action = action
def match(self, *args, **kw):
"""Determine if this rule is matched"""
return self.re.match(*args, **kw)
def act(self, lexer, m, offset=0):
"""Act on this rule"""
return self.action(self, m, offset)
class SubRule(Rule):
"""Defines a rule which parses a region according to its own grammar,
i.e. a sub-grammar with its own rules. This rule may return multiple
tokens and span multiple calls to the next() method of Lexer."""
def __init__(self, name="Unnamed", expr=r"(.|\n)", grammar=None):
self.name = name
self.expr = expr
self.re = re.compile(self.expr)
if grammar is None:
self.grammar = Grammar()
else:
self.grammar = grammar
self.lexer = Lexer(self.grammar)
self.data = None
self.index = None
def match(self, *args, **kw):
"""Determine if this rule is matched"""
m = self.re.match(*args, **kw)
if m is not None:
self.data = args[0][:m.end()]
self.index = args[1]
return m
def act(self, lexer, m):
"""Act on this match"""
self.lexer.lex(self.data, self.index)
try:
v = self.lexer.next()
lexer.sub_lexer = self.lexer
return v
except StopIteration:
lexer.sub_lexer = None
return None
class BalancedExprMatch:
def __init__(self, start, end, data):
self.s = start
self.e = end
self.d = data
def start(self):
return self.s
def end(self):
return self.e
def group(self, i):
if i == 0 or i == 1:
return self.d
else:
raise IndexError, "no such group"
def groupdict(self):
return {}
def groups(self):
return ()
def span(self):
return (self.s, self.e)
class BalancedExprRule(Rule):
"""
Defines a rule that need to take into account opening and closing
expressions, i.e. parenthesis, #if and #endif, etc.
"""
def __init__(self, name="Unnamed", start_expr=r"(#if +0)",
enter="#if", leave="#endif", action=lambda x,y: None):
self.name = name
self.start_expr = start_expr
self.start_re = re.compile(self.start_expr)
self.enter = enter
self.leave = leave
self.action = action
def match(self, *args, **kw):
if not self.start_re.match(*args):
return None
stack = []
data = args[0]
index = args[1]
start = index
if data[index:].startswith(self.enter):
stack.append(self.enter)
index += len(self.enter)
while len(stack) > 0 and index < len(data):
if data[index:].startswith(self.enter):
stack.append(self.enter)
index += len(self.enter)
elif data[index:].startswith(self.leave):
stack.pop(-1)
index += len(self.leave)
else:
index += 1
m = BalancedExprMatch(start, index, data[start:index])
return m
def act(self, lexer, m):
"""Act on this rule"""
return self.action(self, m)
class Grammar(list):
"""
Defines rules for lexing according to a given grammar.
The order of rules in the grammar is their precedence in matching.
"""
GRAMMAR_LIST = [ {'name': 'default'} ]
def __init__(self, *args, **kw):
"""useful values to pass in:
rules -> list of rules (ordered!)
if rules are not supplied, self._default_rules() is used"""
list.__init__(self)
if "rules" in kw:
for r in kw["rules"]:
self.append(r)
else:
self._default_rules()
self._post_init(*args, **kw)
def _default_rules(self):
"""subclasses can override this to define defaults for a grammar"""
for rdir in self.GRAMMAR_LIST:
self.add_rule(**rdir)
def _post_init(self, *args, **kw):
"""subclasses can override this to enable other behavior"""
pass
def add_rule(self, *args, **kw):
self.append(Rule(*args, **kw))
def clear_rules(self):
while len(self) > 0:
del self[0]
class Lexer(object):
"""Defines a lexer, a generator of lexical tokens, etc."""
def __init__(self, grammar=None, rules=None, data=None, index=0):
"""
If the grammar keyword is provided, then that grammar will be used.
Else, if the rules keyword is provided, that list of rules will be used
Else, the default (boring) grammar will be used.
Normally, lex(data) is used to (re-)intialize the lexer with data to
lex. If the data keyword is provided, then the lexer is ready to go
on instantiation.
"""
if grammar is not None:
self.grammar = grammar
elif rules is not None:
self.grammar = Grammar(rules=rules)
else:
self.grammar = Grammar()
self.data = data
self.index = index
self.offset = 0
self.sub_lexer = None
def lex(self, data=None, index=0, offset=0):
"""
(re-)initialize the lexer with data to lex, and optionally, an offset
to start at
"""
self.data = data
self.index = index
self.offset = offset
def __iter__(self):
if self.data is None:
raise Exception, "No data to be lexed"
return self
#def append(self, newdata, offset=0):
# self.data += newdata
# self.index += offset
def next(self):
# used for multiple levels of lexing
if self.sub_lexer is not None:
try:
return self.sub_lexer.next()
except StopIteration:
self.sub_lexer = None
if self.index >= len(self.data):
raise StopIteration
for rule in self.grammar:
m = rule.match(self.data, self.index)
if m:
self.index = m.end()
return rule.act(self, m, self.offset)
raise Exception, "Failed to consume last %d characters of input: %r" % \
(len(self.data) - self.index, self.data[self.index:])

View File

@ -1,353 +0,0 @@
from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
class PodGrammar(Grammar):
rules = [
PatternRule(
name=r'entry',
pattern=r'(?<=^=head[1-4]) +.*$',
),
PatternRule(
name=r'entry',
pattern=r'(?<=^=over) +.*$',
),
PatternRule(
name=r'entry',
pattern=r'(?<=^=item) +.*$',
),
PatternRule(
name=r'entry',
pattern=r'(?:(?<=^=begin)|(?<=^=end)) +.*$',
),
PatternRule(
name=r'entry',
pattern=r'(?<=^=encoding) +.*$',
),
]
class StringGrammar(Grammar):
rules = [
PatternRule(
name=r'escaped',
pattern=r'\\.',
),
PatternRule(
name=r'deref',
pattern=r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*(?:->{\$?(?:[a-zA-Z_][a-zA-Z_0-9]*|'(?:\\.|[^'\\])*'|\"(\\.|[^\\\"])*\")}|->\[\$?[0-9a-zA-Z_]+\])+",
),
PatternRule(
name=r'length',
pattern=r"\$#[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*",
),
ContextPatternRule(
name=r'scalar',
pattern=r"\$[^A-Za-z0-9 %(delim)s](?![A-Za-z0-9_])",
fallback=r"\$[^A-Za-z0-9 ](?![A-Za-z0-9_])",
),
#PatternRule(
# name=r'array',
# pattern=r"@_",
#),
PatternRule(
name=r'scalar',
pattern=r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*",
),
PatternRule(
name=r'cast',
pattern=r"[\$\@\%\&]{.*?}",
),
PatternRule(
name=r'array',
pattern=r"@\$*[A-Za-z_](?:[A-Za-z0-9_]|::)*",
),
PatternRule(
name=r'hash',
pattern=r"%\$*[A-Za-z_](?:[A-Za-z0-9_]|::)*",
),
]
class PerlGrammar(Grammar):
rules = [
# heredocs
RegionRule(
name=r'heredoc1',
start=r"<<(?P<heredoc>[a-zA-Z0-9_]+) *;",
grammar=StringGrammar(),
end=r'^%(heredoc)s$',
),
RegionRule(
name=r'heredoc1',
start=r'<< *"(?P<heredoc>[a-zA-Z0-9_]+)" *;',
grammar=StringGrammar(),
end=r'^%(heredoc)s$',
),
RegionRule(
name=r'heredoc2',
start=r"<< *'(?P<heredoc>[a-zA-Z0-9_]+)' *;",
grammar=Grammar(),
end=r'^%(heredoc)s$',
),
RegionRule(
name=r'eval_heredoc',
start=r"<< *`(?P<heredoc>[a-zA-Z0-9_]+)` *;",
grammar=StringGrammar(),
end=r'^%(heredoc)s$',
),
# end block
RegionRule(
name=r'endblock',
start=r"^__END__|__DATA__ *$",
grammar=Grammar(),
end=r'',
),
RegionRule(
name=r'pod',
start=r'^=[a-zA-Z0-9_]+',
grammar=PodGrammar(),
end=r'^=cut',
),
PatternRule(
name=r'comment',
pattern=r'#.*$',
),
RegionRule(
name=r'string1',
start=r'"',
grammar=StringGrammar(),
end=r'"',
),
RegionRule(
name=r'string2',
start=r"'",
grammar=Grammar(),
end=r"'",
),
RegionRule(
name=r'evalstring',
start=r"`",
grammar=StringGrammar(),
end=r"`",
),
PatternRule(
name=r'number',
pattern=r'0?\.[0-9]+|[0-9]+(?:\.[0-9]+)?',
),
PatternRule(
name=r'keyword',
pattern=r"(?<!->)(?:STDIN|STDERR|STDOUT|and|cmp|continue|do|else|elsif|eq|eval|foreach|for|if|last|my|next|ne|not|or|our|package|require|return|sub|undef|unless|until|use|while)(?![a-zA-Z0-9_])",
),
PatternRule(
name=r'hash_key',
pattern=r'(?<={)[A-Za-z0-9_]+(?=})',
),
PatternRule(
name=r'hash_key',
pattern=r'[A-Za-z0-9_]+(?= *=>)',
),
PatternRule(
name=r'length',
pattern=r"\$#[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*",
),
# XYZ
PatternRule(
name=r'cast',
pattern=r'[\$\@\%\^\&](?= *{)',
),
PatternRule(
name=r'scalar',
pattern=r"\$[][><ab/'\"_@\?#\$!%^|&*()](?![A-Za-z0-9_])",
),
PatternRule(
name=r'array',
pattern=r"@_",
),
PatternRule(
name=r'function',
pattern=r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*(?=-> *\()",
),
PatternRule(
name=r'scalar',
pattern=r"\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*",
),
PatternRule(
name=r'array',
pattern=r"@\$*[A-Za-z_](?:[A-Za-z0-9_]|::)*",
),
PatternRule(
name=r'hash',
pattern=r"%\$*[A-Za-z_](?:[A-Za-z0-9_]|::)*",
),
PatternRule(
name=r'deref',
pattern=r"[@%\$&\*](?={)",
),
RegionRule(
name=r'quoted',
start=r'q[rqwx]? *\(',
grammar=Grammar(),
end=r'\)',
),
RegionRule(
name=r'quoted',
start=r'q[rqwx]? *{',
grammar=Grammar(),
end=r'}',
),
RegionRule(
name=r'quoted',
start=r'q[rqwx]? *<',
grammar=Grammar(),
end=r'>',
),
RegionRule(
name=r'quoted',
start=r'q[rqwx]? *\[',
grammar=Grammar(),
end=r'\]',
),
RegionRule(
name=r'quoted',
start=r'q[rqwx]? *(?P<delim>[^ #])',
grammar=Grammar(),
end=r'%(delim)s',
),
RegionRule(
name=r'quoted',
start=r'q[rqwx]?#',
grammar=Grammar(),
end=r'#',
),
# match regexes
RegionRule(
name=r'match',
start=r'(?:(?<==~)|(?<=!~)|(?<=\()|(?<=split)) *(?P<delim>/)',
grammar=StringGrammar(),
end=r'/[a-z]*',
),
RegionRule(
name=r'match',
start=r'm *(?P<delim>[^ #a-zA-Z0-9_])',
grammar=StringGrammar(),
end=r'%(delim)s[a-z]*',
),
RegionRule(
name=r'match',
start=r'm(?P<delim>#)',
grammar=StringGrammar(),
end=r'#[a-z]*',
),
# replace regexes
DualRegionRule(
name=r'replace',
start=r's *(?P<delim>[^ a-zA-Z0-9_])',
grammar1=StringGrammar(),
middle=r'%(delim)s',
grammar2=StringGrammar(),
end=r'%(delim)s[a-z]*',
),
DualRegionRule(
name=r'replace',
start=r's(?P<delim>#)',
grammar1=StringGrammar(),
middle=r'#',
grammar2=StringGrammar(),
end=r'#[a-z]*',
),
# translate operator
DualRegionRule(
name=r'translate',
start=r'(?:y|tr) *(?P<delim>[^ a-zA-Z0-9_])',
grammar1=Grammar(),
middle=r'%(delim)s',
grammar2=Grammar(),
end=r'%(delim)s[a-z]*',
),
DualRegionRule(
name=r'translate',
start=r'(?:y|tr)#',
grammar1=Grammar(),
middle=r'#',
grammar2=Grammar(),
end=r'#[a-z]*',
),
# some more basic stuff
PatternRule(
name=r'package',
pattern=r"(?<=package )(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*",
),
PatternRule(
name=r'sub',
pattern=r"(?<=sub )[a-zA-Z_][a-zA-Z_0-9]*",
),
PatternRule(
name=r'use',
pattern=r"(?<=use )(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*",
),
PatternRule(
name=r'label',
pattern=r'[a-zA-Z_][a-zA-Z0-9_]*:(?!:)',
),
PatternRule(
name=r'method',
pattern=r"(?<=->)[a-zA-Z_][a-zA-Z_0-9]*",
),
PatternRule(
name=r'function',
pattern=r"&\$*(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*",
),
PatternRule(
name=r'function',
pattern=r"(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*(?= *\()",
),
PatternRule(
name=r'builtin',
pattern=r"(?<!->)&?(?:write|warn|wantarray|waitpid|wait|vec|values|utime|use|untie|unshift|unpack|unlink|undef|umask|ucfirst|uc|truncate|times|time|tied|tie|telldir|tell|syswrite|system|sysseek|sysread|sysopen|syscall|symlink|substr|sub|study|stat|srand|sqrt|sprintf|split|splice|sort|socketpair|socket|sleep|sin|shutdown|shmwrite|shmread|shmget|shmctl|shift|setsockopt|setservent|setpwent|setprotoent|setpriority|setpgrp|setnetent|sethostent|setgrent|send|semop|semget|semctl|select|seekdir|seek|scalar|rmdir|rindex|rewinddir|reverse|return|reset|require|rename|ref|redo|recv|readpipe|readlink|readline|readdir|read|rand|quotemeta|push|prototype|printf|print|pos|pop|pipe|package|pack|our|ord|opendir|open|oct|no|next|my|msgsnd|msgrcv|msgget|msgctl|mkdir|map|lstat|log|lock|localtime|local|listen|link|length|lcfirst|lc|last|kill|keys|join|ioctl|int|index|import|hex|grep|goto|gmtime|glob|getsockopt|getsockname|getservent|getservbyport|getservbyname|getpwuid|getpwnam|getpwent|getprotoent|getprotobynumber|getprotobyname|getpriority|getppid|getpgrp|getpeername|getnetent|getnetbyname|getnetbyaddr|getlogin|gethostent|gethostbyname|gethostbyaddr|getgrnam|getgrgid|getgrent|getc|formline|format|fork|flock|fileno|fcntl|exp|exit|exists|exec|eval|eof|endservent|endpwent|endprotoent|endnetent|endhostent|endgrent|each|dump|do|die|delete|defined|dbmopen|dbmclose|crypt|cos|continue|connect|closedir|close|chroot|chr|chown|chop|chomp|chmod|chdir|caller|bless|binmode|bind|atan2|alarm|accept|abs)(?![a-zA-Z0-9_])",
),
PatternRule(
name=r'class',
pattern=r"(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*(?=->)",
),
# # nested regions
# RegionRule(
# name=r'paren',
# start=r'\(',
# grammar=None,
# end=r'\)',
# ),
# RegionRule(
# name=r'brace',
# start=r'{',
# grammar=None,
# end=r'}',
# ),
# RegionRule(
# name=r'bracket',
# start=r'\[',
# grammar=None,
# end=r'\]',
# ),
# some basic stuff
PatternRule(
name=r'delimiter',
#pattern=r",|;|->|=>|=|\?|(?<!:):(?!=:)",
pattern=r",|;|->|=>|=|\?|\(|\)|{|}|\[|\](?<!:):(?!=:)",
),
PatternRule(
name=r'operator',
pattern=r"\+=|-=|\*=|/=|//=|%=|&=\|\^=|>>=|<<=|\*\*=",
),
PatternRule(
name=r'operator',
pattern=r"\+|<=>|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|=~|!~|!=|%|!|\.",
),
PatternRule(
name=r'bareword',
pattern=r'(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*',
),
]

View File

@ -1,115 +0,0 @@
from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
class StringGrammar(Grammar):
rules = [
PatternRule(
name=r'octal',
pattern=r'\\[0-7]{3}',
),
PatternRule(
name=r'escaped',
pattern=r'\\.',
),
#PatternRule(
# name=r'format',
# pattern=r'%(?:\([a-zA-Z_]+\))?[-# +]*(?:[0-9]+|\*)?\.?(?:[0-9]+|\*)?[hlL]?[a-zA-Z%]',
#),
]
class PythonGrammar(Grammar):
rules = [
PatternRule(
name=r'functiondef',
pattern=r'(?<=def )[a-zA-Z_][a-zA-Z0-9_]*',
),
PatternRule(
name=r'classdef',
pattern=r'(?<=class )[a-zA-Z_][a-zA-Z0-9_]*',
),
PatternRule(
name=r'reserved',
pattern=r'(?:True|None|False|Exception|self)(?![a-zA-Z0-9_])',
),
PatternRule(
name=r'keyword',
pattern=r'(?:yield|while|try|return|raise|print|pass|or|not|lambda|is|in|import|if|global|from|for|finally|exec|except|else|elif|del|def|continue|class|break|assert|as|and)(?![a-zA-Z0-9_])',
),
PatternRule(
name=r"builtin",
pattern=r'(?<!\.)(?:zip|xrange|vars|unicode|unichr|type|tuple|super|sum|str|staticmethod|sorted|slice|setattr|set|round|repr|reduce|raw_input|range|property|pow|ord|open|oct|object|max|min|map|long|locals|list|len|iter|issubclass|isinstance|int|input|id|hex|hash|hasattr|globals|getattr|frozenset|float|filter|file|execfile|eval|enumerate|divmod|dir|dict|delattr|complex|compile|coerce|cmp|classmethod|chr|callable|bool)(?![a-zA-Z0-9_])',
),
PatternRule(
name=r'methodcall',
pattern=r'(?<=\. )[a-zA-Z_][a-zA-Z0-9_]*(?= *\()',
),
PatternRule(
name=r'functioncall',
pattern=r'[a-zA-Z_][a-zA-Z0-9_]*(?= *\()',
),
PatternRule(
name=r'system_identifier',
pattern=r'__[a-zA-Z0-9_]+__',
),
PatternRule(
name=r'private_identifier',
pattern=r'__[a-zA-Z0-9_]*',
),
PatternRule(
name=r'hidden_identifier',
pattern=r'_[a-zA-Z0-9_]*',
),
PatternRule(
name=r'identifier',
pattern=r'[a-zA-Z_][a-zA-Z0-9_]*',
),
PatternRule(
name=r'delimiter',
pattern=r'\(|\)|\[|\]|{|}|@|,|:|\.|`|=|;|\+=|-=|\*=|/=|//=|%=|&=|\|=|\^=|>>=|<<=|\*\*=',
),
PatternRule(
name=r"operator",
pattern=r"\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%",
),
PatternRule(
name=r"integer",
pattern=r"(?<![\.0-9a-zA-Z_])(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?(?![\.0-9a-zA-Z_])",
),
PatternRule(
name=r"float",
pattern=r"(?<![\.0-9a-zA-Z_])(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)(?![\.0-9a-zA-Z_])",
),
PatternRule(
name=r"imaginary",
pattern=r"(?<![\.0-9a-zA-Z_])(?:[0-9]+|(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)[jJ])(?![\.0-9a-zA-Z_])",
),
RegionRule(
name=r'docstring',
start=r'^ *(?P<tag>"""|\'\'\')',
grammar=Grammar(),
end=r'%(tag)s',
),
RegionRule(
name=r'tq_string',
start=r'(?P<tag>"""|\'\'\')',
grammar=Grammar(),
end=r'%(tag)s',
),
RegionRule(
name=r'string',
start=r'(?P<tag>"|\')',
grammar=StringGrammar(),
end=r'%(tag)s',
),
PatternRule(
name=r'comment',
pattern=r'#.*$',
),
PatternRule(
name=r'continuation',
pattern=r'\\$',
),
]

View File

@ -1,16 +0,0 @@
import lex
class BlameGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': 'metadata',
'expr': "(?:^|(?<=\n))[0-9.]+ +[a-zA-Z0-9_]+ +[-0-9A-Za-z]+",
'action': lex.make_token},
{'name': 'data',
'expr': ".+(?:$|\n)",
'action': lex.make_token},
{'name': "default",
'expr': r'\\.|.|\n',
'action': lex.silent},
]

122
lex_c.py
View File

@ -1,122 +0,0 @@
#!/bin/env python
# 2.3 imports
from optparse import OptionParser
# our imports
import lex
class CGrammar(lex.Grammar):
GRAMMAR_LIST = [
# this might not be complete...
# see http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_3.html#SEC44
# we need to absorb the rest of the line cause otherwise shit happens
{'name': 'macro2',
'expr': r"#(?:define|import|include|undef)(?= )",
'action':lex.make_token},
{'name': 'macro1',
'expr': r"#(?:assert|cpu|elif|else|error|endif|error|ident|ifdef|ifndef|if|include_next|line|machine|pragma|pragma_once|system|unassert|warning)(?:[^\n]*\\\n)*[^\n]*?(?=\n)",
'action':lex.make_token},
{'name': 'header',
'expr': r'''(?<=#include) +(?:<[A-Za-z/0-9_]+\.h?>|"[A-Za-z/0-9_]+\.h")''',
'action': lex.make_token},
{'name': 'constant',
'expr': r'''(?<=#define) +[A-Za-z0-9_]+(?= |\(|\n|$)''',
'action': lex.make_token},
{'name': 'label',
'expr': r"""[a-zA-Z_]+(?=:)""",
'action': lex.make_token},
{'name': "c++ comment",
'expr': r'//.*(?:\n|$)',
'action': lex.make_token},
{'name': "c comment",
'expr': r"/\*(?:.|\n)*?(?:\*/|$)",
'action' : lex.make_token},
{'name': 'control',
'expr': r"(?:break|case|continue|default|do|else|for|goto|if|return|switch|while)(?![a-zA-Z_])",
'action': lex.make_token},
{'name': 'keyword',
'expr': r"(?:auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while)(?![a-zA-z_])",
'action': lex.make_token},
{'name': 'builtin',
'expr': r"(?:NULL|TRUE|FALSE)",
'action': lex.make_token},
{'name': "identifier",
'expr': r"[a-zA-Z_][a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "unary operator",
'expr': r"""\+=|-=|\*=|/=|//=|%=|&=\|\^=|>>=|<<=|\*\*=""",
'action': lex.make_token},
{'name': "operator",
'expr': r"""\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%""",
'action': lex.make_token},
# this is sketchy as hell
{'name': "delimiter",
'expr': r"""->|\.|\(|\)|\[|\]|{|}|@|,|:|`|;|=|\?""",
'action': lex.make_token},
{'name': "integer",
'expr': r"(?:0(?![x0-9])|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?",
'action': lex.make_token},
{'name': "float",
'expr': r"""[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+""",
'action': lex.make_token},
{'name': "string1",
'expr': r'"(?:\\.|[^"])*(?:"|.?$)',
'action': lex.make_token},
# Doesn't handle octal . . (yeah it does..heh...ughhh)
{'name': "char",
'expr': r"'(?:\\[^']+|[^'])(?:'|.?$)",
'action': lex.make_token},
{'name': "default",
'expr': r'\\.|.|\n',
'action': lex.silent}
]
def _default_rules(self):
"""subclasses can override this to define defaults for a grammar"""
lex.Grammar._default_rules(self)
self.insert(0, lex.BalancedExprRule(name='macro comment',
start_expr=r"#if +0",
enter="#if",
leave="#endif",
action=lex.make_token))
if __name__ == "__main__":
usage = "%%prog <file> [<file> ...]\n\n" \
"Lex one or more files according to the python grammar"
parser = OptionParser(usage=usage)
(options, args) = parser.parse_args()
g = CGrammar()
l = lex.Lexer(grammar=g)
for path in args:
f = open(path, 'r')
data = f.read()
f.close()
print "Lexing %s:" % (path)
l.lex(data)
for t in l:
if t is not None:
print t
#print "%-12s %-40s %d %d" % (t.rule.name, t.string, t.start, t.end)

View File

@ -1,41 +0,0 @@
import lex
class DiffGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': "left",
'expr': "(?:^|(?<=\n))\-.*(?:$|\n)",
'action': lex.make_token},
{'name': "right",
'expr': "(?:^|(?<=\n))\+.*(?:$|\n)",
'action': lex.make_token},
#RCS file: /usr/local/cvsroot/TBB_v2/main_application/lib/TBB/EfileServer.pm,v
#retrieving revision 1.57
#diff -u -r1.57 EfileServer.pm
{'name': "cvs metadata",
'expr': "(?:^|(?<=\n))Index: .*\n={67}\nRCS file: .*,v\nretrieving revision [0-9.]+\ndiff -u .*(?:$|\n)",
'action': lex.make_token},
{'name': "svn metadata",
'expr': "(?:^|(?<=\n))Index: .*\n={67}(?:$|\n)",
'action': lex.make_token},
{'name': "location",
'expr': "(?:^|(?<=\n))@@ [-+0-9a-z, ]* @@(?:$|\n)",
'action': lex.make_token},
{'name': "common",
'expr': "(?:^|(?<=\n)).*(?:$|\n)",
'action': lex.make_token},
{'name': "default",
'expr': r'\\.|.|\n',
'action': lex.silent}
]
def _default_rules(self):
"""subclasses can override this to define defaults for a grammar"""
for rdir in DiffGrammar.GRAMMAR_LIST:
self.add_rule(**rdir)

View File

@ -1,82 +0,0 @@
#!/bin/env python
# 2.3 imports
from optparse import OptionParser
# our imports
import lex
class JavascriptGrammar(lex.Grammar):
GRAMMAR_LIST = [
## {'name': "import statement",
## 'expr': r"""(?:^|(?<= ))import [ .]*(?=\n)""",
## 'action': lex.make_token},
{'name': "comment",
'expr': r'//.*(?=\n|$)',
'action': lex.make_token},
{'name': "function declaration",
'expr': r"(?<=function ) *[a-zA-Z0-9_]* *(?=\()",
'action': lex.make_token},
{'name': "class declaration",
'expr': r"(?<=class )[a-zA-Z_][a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': 'keyword',
'expr': r"""(?:and|break|class|continue|def|del|elif|else|except|exec|finally|for|from|function|global|if|import|in|is|lambda|new|not|or|pass|print|raise|return|try|var|while|yield)(?![a-zA-Z0-9_])""",
'action': lex.make_token},
{'name': "pseudo-keyword",
'expr': r"""(?:as|self|True|False|None|Exception)(?![a-zA-Z0-9_])""",
'action': lex.make_token},
## {'name': "built-in method",
## 'expr': r"""(?<!\.)(?:bool|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|input|int|isinstance|issubclass|iter|len|list|locals|long|map|min|max|object|oct|open|ord|pow|property|range|raw_input|reduce|repr|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)(?![a-zA-Z0-9_])""",
## 'action': lex.make_token},
{'name': "bound method",
'expr': r"(?<=\.)[a-zA-Z_][a-zA-Z0-9_]*(?= *\()",
'action': lex.make_token},
{'name': "identifier",
'expr': r"[a-zA-Z_][a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "delimiter",
'expr': r"""\(|\)|\[|\]|{|}|@|,|:|\.|`|=|;|\+=|-=|\*=|/=|//=|%=|&=|\|=|\^=|>>=|<<=|\*\*=""",
'action': lex.make_token},
{'name': "operator",
'expr': r"""\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%""",
'action': lex.make_token},
{'name': "integer",
'expr': r"(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?",
'action': lex.make_token},
{'name': "float",
'expr': r"""[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+""",
'action': lex.make_token},
{'name': "imaginary",
'expr': r"""[0-9]+|(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)[jJ]""",
'action': lex.make_token},
{'name': "string1",
'expr': r'"(?:\\.|[^\\"])*(?:"|.?$)',
'action': lex.make_token},
{'name': "string2",
'expr': r"'(?:\\.|[^\\'])*(?:'|.?$)",
'action': lex.make_token},
{'name': "continuation",
'expr': r'\\(?=(?:\n|$))',
'action': lex.make_token},
{'name': "default",
'expr': r'\\.|.|\n',
'action': lex.silent}
]

View File

@ -1,59 +0,0 @@
import lex, lex_text
class MuttGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': 'header',
'expr': r'(?:^|(?<=\n))(?:From|To|Cc|Bcc|Subject|Reply-To|In-Reply-To|Delivered-To|Date):',
'action': lex.make_token,
},
{'name': 'quote1',
'expr': r'(?:^|(?<=\n))(?:(?: *>){3})*(?: *>){1} *(?:[^ >\n][^\n]*)?(?:$|\n)',
'action': lex.make_token,
},
{'name': 'quote2',
'expr': r'(?:^|(?<=\n))(?:(?: *>){3})*(?: *>){2} *(?:[^ >\n][^\n]*)?(?:$|\n)',
'action': lex.make_token,
},
{'name': 'quote3',
'expr': r'(?:^|(?<=\n))(?:(?: *>){3})*(?: *>){3} *(?:[^ >\n][^\n]*)?(?:$|\n)',
'action': lex.make_token,
},
{'name': 'email',
'expr': r'(?:^|(?<=[ :\n]))<?[^<>@\n ]+@(?:[^<>@\.\n ]+\.)*[^<>@\.\n ]+>?',
'action': lex.make_token,
},
{'name': 'url',
'expr': r'(?:^|(?<=[ \n]))(?:http|https|ftp|sftp|file|smtp|smtps|torrent|news|jabber|irc|telnet)://(?:[^\.\n ]+\.)*[^\.\n ]+',
'action': lex.make_token,
},
{'name': 'continued word',
'expr': r"""([a-zA-Z][a-zA-Z-']*[a-zA-Z])-\n *([a-zA-Z][a-zA-Z-]*[a-zA-Z])""",
'action': lex_text.make_token_spell,
},
{'name': 'word',
'expr': r"""(?:[a-zA-Z][-']?)*[a-zA-Z]""",
'action': lex_text.make_token_spell,
},
{'name': 'stuff',
'expr': r"""[^ \n]+""",
'action': lex.make_token,
},
{'name': "default",
'expr': r'.| |\n',
'action': lex.silent,
},
]
def _default_rules(self):
"""subclasses can override this to define defaults for a grammar"""
for rdir in self.GRAMMAR_LIST:
self.add_rule(**rdir)

View File

@ -1,100 +0,0 @@
#!/bin/env python
# 2.3 imports
from optparse import OptionParser
# our imports
import lex
class NasmGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': 'keyword',
'expr': \
r"""(?:section|global|extern)(?![a-zA-Z_])""",
'action': lex.make_token},
{'name': "nasm macros",
'expr': r"%(?:define|undef|assign|strlen|macro|endmacro|if|elif|else|endif|ifdef|ifndef|include|push|pop|stacksize)(?![a-zA-Z_])",
'action': lex.make_token
},
{'name': "instructions",
'expr': \
r"""(?:jeq|jne|ja|jmp|push|pushad|pushfd|call|ret|sub|add|pop|popa|popad|popfd|call|and|cwd|cdq|cmp|cmpxchg|cpuid|div|divpd|enter|leave|fadd|fld|fmul|fsqrt|fsub|hlt|imul|inc|int|int3|lea|mov|movd|mul|neg|not|nop|or|sal|sar|shl|shr|shld|shrd|syscall|sysenter|sysexit|test|xchg|xadd|xor)(?![a-zA-Z_])""",
'action': lex.make_token},
{'name': "registers",
'expr': \
r"""(?:eax|ax|ah|al|ebx|bx|bh|bl|ecx|cx|ch|cl|esi|edi|esp|ebp)""",
'action': lex.make_token},
{'name': "prefix",
'expr': r"(?:dword|word|lock)",
'action': lex.make_token
},
{'name': "label",
'expr': r"[a-zA-Z_.][a-zA-Z0-9_.]*:",
'action': lex.make_token},
{'name': "identifier",
'expr': r"[a-zA-Z_][a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "integer",
'expr': r"(0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?",
'action': lex.make_token},
{'name': "float",
'expr': \
r"""[0-9]+\.[0-9]*|\.[0-9]+|([0-9]|
[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+""",
'action': lex.make_token},
{'name': "string3",
'expr': r'"""[.|\n]*?(?:"""|$)',
'action': lex.make_token},
{'name': "string1",
'expr': r'"(?:\\.|[^\\"])*(?:"|$)',
'action': lex.make_token},
{'name': "string2",
'expr': r"'(?:\\.|[^\\'])*(?:'|$)",
'action': lex.make_token},
{'name': "comment",
'expr': r'[;].*(?:\n|$)',
'action': lex.make_token},
{'name': "default",
'expr': r'\\.|.|\n',
'action': lex.silent}
]
def _default_rules(self):
"""subclasses can override this to define defaults for a grammar"""
for rdir in NasmGrammar.GRAMMAR_LIST:
self.add_rule(**rdir)
if __name__ == "__main__":
usage = "%%prog <file> [<file> ...]\n\n" \
"Lex one or more files according to the python grammar"
parser = OptionParser(usage=usage)
(options, args) = parser.parse_args()
g = NasmGrammar()
l = lex.Lexer(grammar=g)
for path in args:
f = open(path, 'r')
data = f.read()
f.close()
print "Lexing %s:" % (path)
l.lex(data)
for x in l:
if x is not None:
print x

View File

@ -1,207 +0,0 @@
#!/bin/env python
# 2.3 imports
from optparse import OptionParser
# our imports
import lex
# this will support perl's string interpolation; but, it can be slower and also
# possibly buggier
INTERPOLATION_HIGHLIGHTING = False
#INTERPOLATION_HIGHLIGHTING = True
class PerlGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': 'heredoc',
'expr': r"""<< *([a-zA-Z0-9_]+) *;(?:.*?\n)*?(?:\1|$)""",
'action': lex.make_token},
{'name': 'endblock',
'expr': r"""(?:^|\n)(?:__END__|__DATA__)(?:.|\n)*$""",
'action': lex.make_token},
{'name': 'pod',
'expr': r"""(?:^|(?<=\n))=[a-zA-Z0-9_]+.*(?:\n(?!=cut).*)*(?:\n=cut|$)""",
'action': lex.make_token},
{'name': "comment",
'expr': r'[#].*(?:\n|$)',
'action': lex.make_token},
{'name': "string1",
'expr': r'''"(?:\\(?:.|\n)|[^\\"]|[ \n])*(?:"|.?$)''',
'action': lex.make_token},
{'name': "string2",
'expr': r"""'(?:\\(?:.|\n)|[^\\'])*(?:'|.?$)""",
'action': lex.make_token},
{'name': "evalstring",
'expr': r"""`(?:\\(?:.|\n)|[^\\`])*(?:`|.?$)""",
'action': lex.make_token},
{'name': 'number',
'expr': r"""0?\.[0-9]+|[0-9]+(?:\.[0-9]+)?""",
'action': lex.make_token},
{'name': 'label',
'expr': r"""[a-zA-Z_][a-zA-Z0-9_]*:(?= |\n)""",
'action': lex.make_token},
{'name': 'keyword',
'expr': r"""(?<!->)(?:STDIN|STDERR|STDOUT|and|cmp|continue|do|else|elsif|eq|eval|foreach|for|if|last|my|next|ne|not|no|or|our|package|require|return|sub|undef|unless|until|use|while)(?![a-zA-Z_])""",
'action': lex.make_token},
{'name': 'hash bareword index',
'expr': r"(?<={)[A-Za-z0-9_]+(?=})",
'action': lex.make_token},
{'name': 'literal hash bareword index',
'expr': r"[A-Za-z0-9_]+(?= *=>)",
'action': lex.make_token},
{'name': 'length scalar',
'expr': r"\$#[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*",
'action': lex.make_token},
{'name': 'system scalar',
'expr': r"\$[][><ab/'\"_@\?#\$!%^|&*()](?![A-Za-z0-9_])",
'action': lex.make_token},
{'name': 'system array',
'expr': r"@_",
'action': lex.make_token},
{'name': 'scalar',
'expr': r"""\$\$*[A-Za-z0-9_](?:[A-Za-z0-9_]|::)*""",
'action': lex.make_token},
{'name': 'array',
'expr': r"""@\$*[A-Za-z_](?:[A-Za-z0-9_]|::)*""",
'action': lex.make_token},
{'name': 'hash',
'expr': r"""%\$*[A-Za-z_](?:[A-Za-z0-9_]|::)*""",
'action': lex.make_token},
{'name': 'dereference',
'expr': r"""[@%\$&\*](?={)""",
'action': lex.make_token},
# this isn't totally right but it handle's q//, q{} and q() which are
# the commonest
{'name': 'quoted region',
'expr': r"""q.\((?:\\.|[^\\\)])*\)|q./(?:\\.|[^\\/])*/|q.\{(?:\\.|[^\\\}])*\}""",
'action': lex.make_token},
# match regexes are annoying: the basic gist is easy, but all the perl
# crap sucks. if the m is not present, you have to use / as the
# delimiter. otherwise, you can use any non-alphanumeric-or-whitespace
# character. if you use <, (, [, or {, you close with the opposite kind
# of thing. we have to special-case those last 4. ugh.
#
# basic gist: /(\\.|[^\\])*?/[a-z]*
{'name': 'match regex',
'expr': r"""(?:(?<==~)|(?<=!~)|(?<=\()) */(?:\\.|[^\\/])*/[a-z]*|m([^<[{(A-Za-z0-9 \t\n])(?:\\.|[^\\])*?\1[a-z]*|m\((?:\\.|[^\\])*?\)[a-z]*|m{(?:\\.|[^\\])*?}[a-z]*|m<(?:\\.|[^\\])*?>[a-z]*|m\[(?:\\.|[^\\])*?\][a-z]*""",
'action': lex.make_token},
# we officially don't support the bullshit s{a}{b} thing perl has going.
# those guys are on crack. we only support things like s#a#b# or s/a/b/.
# same comments as above apply
{'name': 'replace regex',
'expr': r"""(?:y|tr|s)([^<[{(A-Za-z0-9 \t\n])(?:\\.|[^\\])*?\1(?:\\.|[^\\])*?\1[a-z]*""",
'action': lex.make_token},
{'name': 'package',
'expr': r"""(?<=package )(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*""",
'action': lex.make_token},
{'name': 'use',
'expr': r"""(?<=use )(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*""",
'action': lex.make_token},
{'name': 'instance method',
'expr': r"""(?<=->)[a-zA-Z_][a-zA-Z_0-9]*""",
'action': lex.make_token},
{'name': 'static method',
'expr': r"""&?(?:[a-zA-Z_][a-zA-Z_0-9]*::)+[a-zA-Z_][a-zA-Z_0-9]*""",
'action': lex.make_token},
{'name': 'method declaration',
'expr': r"""(?<=sub )[a-zA-Z_][a-zA-Z_0-9]*(?=[ \n]*{)""",
'action': lex.make_token},
{'name': 'built-in method',
'expr': r"""(?<!->)&?(?:write|warn|wantarray|waitpid|wait|vec|values|utime|use|untie|unshift|unpack|unlink|undef|umask|ucfirst|uc|truncate|times|time|tied|tie|telldir|tell|syswrite|system|sysseek|sysread|sysopen|syscall|symlink|substr|sub|study|stat|srand|sqrt|sprintf|split|splice|sort|socketpair|socket|sleep|sin|shutdown|shmwrite|shmread|shmget|shmctl|shift|setsockopt|setservent|setpwent|setprotoent|setpriority|setpgrp|setnetent|sethostent|setgrent|send|semop|semget|semctl|select|seekdir|seek|scalar|rmdir|rindex|rewinddir|reverse|return|reset|require|rename|ref|redo|recv|readpipe|readlink|readline|readdir|read|rand|quotemeta|push|prototype|printf|print|pos|pop|pipe|package|pack|our|ord|opendir|open|oct|no|next|my|msgsnd|msgrcv|msgget|msgctl|mkdir|map|lstat|log|lock|localtime|local|listen|link|length|lcfirst|lc|last|kill|keys|join|ioctl|int|index|import|hex|grep|goto|gmtime|glob|getsockopt|getsockname|getservent|getservbyport|getservbyname|getpwuid|getpwnam|getpwent|getprotoent|getprotobynumber|getprotobyname|getpriority|getppid|getpgrp|getpeername|getnetent|getnetbyname|getnetbyaddr|getlogin|gethostent|gethostbyname|gethostbyaddr|getgrnam|getgrgid|getgrent|getc|formline|format|fork|flock|fileno|fcntl|exp|exit|exists|exec|eval|eof|endservent|endpwent|endprotoent|endnetent|endhostent|endgrent|each|dump|do|die|delete|defined|dbmopen|dbmclose|crypt|cos|continue|connect|closedir|close|chroot|chr|chown|chop|chomp|chmod|chdir|caller|bless|binmode|bind|atan2|alarm|accept|abs)(?![a-zA-Z0-9_])""",
#'expr':r"""(?<!->)&?(?:abs|accept|alarm|atan2|bind|binmode|bless|caller|chdir|chmod|chomp|chop|chown|chroot|chr|closedir|close|connect|cos|crypt|dbmclose|dbmopen|defined|delete|die|dump|each|eof|exec|exists|exit|exp|fcntl|fileno|flock|fork|format|formline|getc|getlogin|getpeername|grep|int|join|keys|lc|map|open|pop|print|push|rand|readdir|ref|scalar|select|shift|sort|split|srand|time|uc|unshift|values|wantarray|warn)(?![a-zA-Z0-9_])""",
'action': lex.make_token},
{'name': 'method',
'expr': r"""&(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*""",
'action': lex.make_token},
{'name': 'methodref',
'expr': r"""&\$(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*""",
'action': lex.make_token},
{'name': 'bareword method',
'expr': r"""(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*(?=[ \n]*(?:\(|->))""",
'action': lex.make_token},
{'name': "delimiter",
'expr': r"""\(|\)|\[|\]|{|}|,|;|=>|=|\?|(?<!:):(?!=:)""",
'action': lex.make_token},
{'name': "unary operator",
'expr': r"""\+=|-=|\*=|/=|//=|%=|&=\|\^=|>>=|<<=|\*\*=""",
'action': lex.make_token},
{'name': "operator",
'expr': r"""\+|<=>|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|=~|!~|!=|%|!|\.""",
'action': lex.make_token},
{'name': 'bareword',
'expr': r"""(?:[a-zA-Z_][a-zA-Z_0-9]*::)*[a-zA-Z_][a-zA-Z_0-9]*""",
'action': lex.make_token},
{'name': 'default',
'expr': r""".|\n""",
'action': lex.silent}
]
def _default_rules(self):
"""subclasses can override this to define defaults for a grammar"""
sub_exprs = {}
string_rules = []
for rdir in PerlGrammar.GRAMMAR_LIST:
self.add_rule(**rdir)
if INTERPOLATION_HIGHLIGHTING:
if rdir['name'] in ('scalar', 'system scalar', 'array', 'hash',
'system array'):
rdir2 = rdir.copy()
rdir2['name'] = 'interpolated ' + rdir['name']
string_rules.append(lex.Rule(**rdir2))
elif rdir['name'] in ('heredoc', 'string1', 'string2'):
sub_exprs[rdir['name']] = rdir['expr']
if INTERPOLATION_HIGHLIGHTING:
string_rules.append(lex.Rule(name="default string",
expr=r"""(?:\\.|[^\\\$]|\n)+|\$""",
action=lex.make_token))
string_grammar = lex.Grammar(rules=string_rules)
self.insert(0, lex.SubRule(name='heredoc',
expr=sub_exprs['heredoc'],
grammar=string_grammar))
self.insert(4, lex.SubRule(name="string1",
expr=sub_exprs['string1'],
grammar=string_grammar))
self.insert(5, lex.SubRule(name="string2",
expr=sub_exprs['string2'],
grammar=string_grammar))

View File

@ -1,102 +0,0 @@
#!/bin/env python
# 2.3 imports
from optparse import OptionParser
# our imports
import lex
class PythonGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': "import statement",
'expr': r"""(?:^|(?<= ))import [ .]*(?=\n)""",
'action': lex.make_token},
{'name': "method declaration",
'expr': r"(?<=def )[a-zA-Z_][a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "class declaration",
'expr': r"(?<=class )[a-zA-Z_][a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': 'keyword',
'expr': r"""(?:and|assert|break|class|continue|def|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|not|or|pass|print|raise|return|try|while|yield)(?![a-zA-Z0-9_])""",
'action': lex.make_token},
{'name': "pseudo-keyword",
'expr': r"""(?:as|self|True|False|None|Exception)(?![a-zA-Z0-9_])""",
'action': lex.make_token},
{'name': "built-in method",
'expr': r"""(?<!\.)(?:bool|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|input|int|isinstance|issubclass|iter|len|list|locals|long|map|min|max|object|oct|open|ord|pow|property|range|raw_input|reduce|repr|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)(?![a-zA-Z0-9_])""",
'action': lex.make_token},
{'name': "bound method",
'expr': r"(?<=\.)[a-zA-Z_][a-zA-Z0-9_]*(?= *\()",
'action': lex.make_token},
{'name': "system_identifier",
'expr': r"__[a-zA-Z0-9_]*__",
'action': lex.make_token},
{'name': "private_identifier",
'expr': r"__[a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "hidden_identifier",
'expr': r"_[a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "identifier",
'expr': r"[a-zA-Z_][a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "delimiter",
'expr': r"""\(|\)|\[|\]|{|}|@|,|:|\.|`|=|;|\+=|-=|\*=|/=|//=|%=|&=|\|=|\^=|>>=|<<=|\*\*=""",
'action': lex.make_token},
{'name': "operator",
'expr': r"""\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%""",
'action': lex.make_token},
{'name': "integer",
'expr': r"(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?",
'action': lex.make_token},
{'name': "float",
'expr': r"""[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+""",
'action': lex.make_token},
{'name': "imaginary",
'expr': r"""[0-9]+|(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)[jJ]""",
'action': lex.make_token},
{'name': "string4",
'expr': r'"""(?:.|\n)*?(?:"""|$)',
'action': lex.make_token},
{'name': "string3",
'expr': r"'''(?:.|\n)*?(?:'''|$)",
'action': lex.make_token},
{'name': "string1",
'expr': r'"(?:\\.|[^\\"])*(?:"|.?$)',
'action': lex.make_token},
{'name': "string2",
'expr': r"'(?:\\.|[^\\'])*(?:'|.?$)",
'action': lex.make_token},
{'name': "comment",
'expr': r'[#].*(?=\n|$)',
'action': lex.make_token},
{'name': "continuation",
'expr': r'\\(?=(?:\n|$))',
'action': lex.make_token},
{'name': "default",
'expr': r'\\.|.|\n',
'action': lex.silent}
]

View File

@ -1,85 +0,0 @@
# 2.3 imports
from optparse import OptionParser
# our imports
import lex
class ShGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': "method",
'expr': r"""[a-zA-Z_][a-zA-Z0-9_]*(?=\(\))""",
'action': lex.make_token},
{'name': 'reserved',
'expr': r"""(?:case|done|do|elif|else|esac|fi|for|function|if|in|select|then|until|while|time)(?![a-zA-Z0-9_=])""",
'action': lex.make_token},
{'name': 'builtin',
'expr': r"""(?:source|alias|bg|bind|break|builtin|cd|command|compgen|complete|declare|dirs|disown|echo|enable|eval|exec|exit|export|fc|fg|getops|hash|help|history|jobs|kill|let|local|logout|popd|printf|pushd|pwd|readonly|read|return|set|shift|shopt|suspend|test|times|trap|type|ulimit|umask|unalias|unset|wait)(?![a-zA-Z0-9_=/])""",
'action': lex.make_token},
{'name': 'operator',
'expr': r"""(?:-eq|-ne|-gt|-lt|-ge|-le| = | != )""",
'action': lex.make_token},
## {'name': 'redirection',
## 'expr': r"(?:[1-6] *)?> *(?:&[1-6]|(?:\\.|[^\\\"';| ])+)",
## 'action': lex.make_token},
{'name': 'delimiter',
'expr': """[][\(\);\{\}|&><]""",
'action': lex.make_token},
## {'name': 'variable0',
## 'expr': r"""(?:(?<=\n)|^) *[a-zA-Z_][a-zA-Z0-9_]*(?=\=)""",
## 'action': lex.make_token},
{'name': 'variable0',
'expr': r"""(?:(?<=\n) *|^ *| +)[a-zA-Z_][a-zA-Z0-9_]*(?=\=)""",
'action': lex.make_token},
{'name': "variable1",
'expr': r"\${(?:[a-zA-Z0-9_]+|\?\$)}",
'action': lex.make_token},
{'name': "variable2",
'expr': r"\$[^({][a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "variable3",
'expr': r"\$(?=\()",
'action': lex.make_token},
{'name': "eval",
'expr': r'`(?:\\.|[^\\`])*(?:`|.?$)',
'action': lex.make_token},
{'name': "string1",
'expr': r'"(?:\\.|[^\\"])*(?:"|.?$)',
'action': lex.make_token},
{'name': "string2",
'expr': r"'(?:\\.|[^\\'])*(?:'|.?$)",
'action': lex.make_token},
{'name': 'continuation',
'expr': r"""\\(?= *(\n|$))""",
'action': lex.make_token},
{'name': "comment",
'expr': r'[#].*(?:\n|$)',
'action': lex.make_token},
{'name': 'bareword',
'expr': r"""[a-zA-Z0-9_-]+""",
'action': lex.make_token},
{'name': "default",
'expr': r'\\.|.|\n',
'action': lex.silent}
]
def _default_rules(self):
"""subclasses can override this to define defaults for a grammar"""
for rdir in ShGrammar.GRAMMAR_LIST:
self.add_rule(**rdir)

View File

@ -1,70 +0,0 @@
import lex
class SqlGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': "sql comment",
'expr': r'--[^\n]*',
'action': lex.make_token},
{'name': "c comment",
'expr': r'/\*(?:.| |\n)*?(?:\*/|$)',
'action': lex.make_token},
{'name': 'delimiter',
'expr': r'[][();,\.:$]',
'action': lex.make_token},
{'name': 'attribute1',
'expr': r'''(?:CHECK|EXISTS|UNIQUE|NOT NULL|DEFAULT|PRIMARY KEY|MINVALUE|FOREIGN KEY|REFERENCES)(?![A-Za-z0-9_])''',
'action': lex.make_token},
{'name': 'attribute2',
'expr': r'''(?:check|exists|unique|not null|default|primary key|minvalue|foreign key|references)(?![A-Za-z0-9_])''',
'action': lex.make_token},
{'name': 'operator1',
'expr': r'''(?:CASE|WHEN|THEN|ELSE|END|NOT|AND|OR|IS NOT|IS|IN|NOT IN)(?![A-Za-z0-9_])''',
'action': lex.make_token},
{'name': 'operator2',
'expr': r'''(?:case|when|then|else|end|not|and|or|is not|is|in|not in)(?![A-Za-z0-9_])''',
'action': lex.make_token},
{'name': 'keyword1',
'expr': r'''(?:CREATE DATABASE|CREATE INDEX|CREATE SEQUENCE|CREATE TABLE|CREATE TRIGGER|CREATE VIEW|SELECT|INSERT|UPDATE|DELETE|DROP DATABASE|DROP INDEX|DROP SEQUENCE|DROP TABLE|DROP TRIGGER|DROP VIEW|CREATE USER|ALTER USER|DROP USER|DROP FUNCTION|GRANT|REVOKE|CREATE FUNCTION|CREATE OR REPLACE FUNCTION|CREATE OR REPLACE VIEW|CREATE LANGUAGE|CREATE OPERATOR|CREATE TYPE)(?![A-Za-z0-9_])''',
'action': lex.make_token},
{'name': 'keyword2',
'expr': r'''(?:create database|create index|create sequence|create table|create trigger|create view|select|insert|update|delete|drop database|drop index|drop sequence|drop table|drop trigger|drop view|create user|alter user|drop user|drop function|grant|revoke|create function|create or replace function|create or replace view|create language|create operator|create type)(?![A-Za-z0-9_])''',
'action': lex.make_token},
{'name': 'pseudo-keyword1',
'expr': r'''(?:RETURNS|LANGUAGE|RIGHT JOIN|LEFT JOIN|INNER JOIN|OUTER JOIN|JOIN|WHERE|NULL|TRUE|FALSE|INTO|VALUES|AS|FROM|ORDER BY|ASC|DESC|LIMIT|DISTINCT|CASCADE|USING|ON)(?![A-Za-z0-9_])''',
'action': lex.make_token},
{'name': 'pseudo-keyword1',
'expr': r'''(?:returns|language|right join|left join|inner join|outer join|join|where|null|true|false|into|values|as|from|order by|asc|desc|limit|distinct|cascade|using|on)(?![A-Za-z0-9_])''',
'action': lex.make_token},
{'name': 'type1',
'expr': '(?:VOID|ROW|SERIAL|VARCHAR|FLOAT|INTEGER|INT|TEXT|TIMESTAMPTZ|TIMESTAMP|DATETZ|DATE|TIMETZ|TIME|BOOLEAN|BOOL)(?![A-Za-z0-9_])',
'action': lex.make_token},
{'name': 'type2',
'expr': '(?:void|row|serial|varchar|float|integer|int|text|timestamptz|timestamp|datetz|date|timetz|time|boolean|bool)(?![A-Za-z0-9_])',
'action': lex.make_token},
{'name': 'function',
'expr': r'''(?:nextval|current_timestamp|current_time|current_date)(?![A-Za-z0-9_])''',
'action': lex.make_token},
{'name': 'string',
'expr': r"""'(?:\\.|[^\\'])*(?:'|$)""",
'action': lex.make_token},
{'name': 'quoted',
'expr': r'''"(?:\\.|[^\\"])*(?:"|$)''',
'action': lex.make_token},
{'name': 'bareword',
'expr': r'''[A-Za-z0-9_]+''',
'action': lex.make_token},
{'name': "default",
'expr': r'\\.|.|\n',
'action': lex.silent}
]

View File

@ -1,43 +0,0 @@
import os
import ispell, lex
def make_token_spell(rule, m, offset):
'''return a token from a hit'''
# first let's figure out the actual word we need to check
if rule.name == 'continued word':
word = '%s%s' % (m.group(1), m.group(2))
else:
word = m.group(0)
# okay, now we check the spelling; we don't spell-check all caps words
if ispell.can_spell() and \
not ispell.get_speller().check(word, caps=False, title=False):
name = "misspelled %s" % rule.name
else:
name = rule.name
return(lex.Token(name, m.start() + offset, m.end() + offset, word))
class TextGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': 'continued word',
'expr': r"""([a-zA-Z][a-zA-Z-']*[a-zA-Z])-\n *([a-zA-Z][a-zA-Z-]*[a-zA-Z])""",
'action': make_token_spell},
{'name': 'word',
'expr': r"""(?:[a-zA-Z][-']?)*[a-zA-Z]""",
'action': make_token_spell,
},
{'name': 'stuff',
'expr': r"""[^ \n]+""",
'action': lex.make_token,
},
{'name': "default",
'expr': r'.| |\n',
'action': lex.silent}
]
def _default_rules(self):
"""subclasses can override this to define defaults for a grammar"""
for rdir in TextGrammar.GRAMMAR_LIST:
self.add_rule(**rdir)

View File

@ -1,88 +0,0 @@
#!/bin/env python
# 2.3 imports
from optparse import OptionParser
# our imports
import lex
class TTGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': 'comment',
'expr': r'''<!--(?:.| |\n)+?(?:-->|$)''',
'action': lex.make_token},
{'name': 'template',
'expr': r'''\[%(?:.| |\n)*?%\]''',
#'expr': r'''\[%%\]''',
'action': lex.make_token},
{'name': 'ltb',
'expr': r'<![^>]*>',
'action': lex.make_token},
{'name': 'ltq',
'expr': r'<\?',
'action': lex.make_token},
{'name': 'gtq',
'expr': r'\?>',
'action': lex.make_token},
{'name': 'ltc',
'expr': r'</',
'action': lex.make_token},
{'name': 'gtc',
'expr': r'/>',
'action': lex.make_token},
{'name': 'lt',
'expr': r'<',
'action': lex.make_token},
{'name': 'gt',
'expr': r'>',
'action': lex.make_token},
{'name': 'nodevalue',
'expr': r'''(?:(?<=>)|(?<=%\]))(?:[^<\[]|\n|\[(?!%.*?%\]))+?(?=(?:<|\[%))''',
'action': lex.make_token},
{'name': 'whitespace',
'expr': r'''(?: |\n)+''',
'action': lex.silent},
{'name': 'namespace',
'expr': r'[a-zA-Z_]+:',
'action': lex.make_token},
#{'name': 'xi',
# 'expr': r'xi:',
# 'action': lex.make_token},
{'name': 'opentag',
'expr': r'(?:(?<=<)|(?<=xi:))[^ >\n/]+',
'action': lex.make_token},
{'name': 'attrvalue',
'expr': r'''(?<==)"(?:\\.|[^"\\])*(?:"|\\?$)|(?<==)'(?:\\.|[^'\\])*(?:'|\\?$)''',
'action': lex.make_token},
{'name': 'attrname',
'expr': r'[^ \n=>]+(?:(?==)|$)',
'action': lex.make_token},
{'name': 'closetag',
'expr': r'[^ =\n<>/]+',
'action': lex.make_token},
{'name': 'default',
'expr': r""".|\n""",
'action': lex.silent}
]
def _default_rules(self):
"""subclasses can override this to define defaults for a grammar"""
for rdir in TTGrammar.GRAMMAR_LIST:
self.add_rule(**rdir)

View File

@ -1,83 +0,0 @@
#!/bin/env python
# 2.3 imports
from optparse import OptionParser
# our imports
import lex
class XMLGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': 'comment',
'expr': r'''<!--(?:.| |\n)+?(?:-->|$)''',
'action': lex.make_token},
{'name': 'ltb',
'expr': r'<![^>]*>',
'action': lex.make_token},
{'name': 'ltq',
'expr': r'<\?',
'action': lex.make_token},
{'name': 'gtq',
'expr': r'\?>',
'action': lex.make_token},
{'name': 'ltc',
'expr': r'</',
'action': lex.make_token},
{'name': 'gtc',
'expr': r'/>',
'action': lex.make_token},
{'name': 'lt',
'expr': r'<',
'action': lex.make_token},
{'name': 'gt',
'expr': r'>',
'action': lex.make_token},
{'name': 'nodevalue',
'expr': r'''(?<=>)(?:[^<]|\n)+?(?=<)''',
'action': lex.make_token},
{'name': 'whitespace',
'expr': r'''(?: |\n)+''',
'action': lex.silent},
{'name': 'namespace',
'expr': r'[a-zA-Z_]+:',
'action': lex.make_token},
#{'name': 'xi',
# 'expr': r'xi:',
# 'action': lex.make_token},
{'name': 'opentag',
'expr': r'(?:(?<=<)|(?<=xi:))[^ >\n/]+',
'action': lex.make_token},
{'name': 'attrvalue',
'expr': r'''(?<==)"(?:\\.|[^"\\])*(?:"|\\?$)|(?<==)'(?:\\.|[^'\\])*(?:'|\\?$)''',
'action': lex.make_token},
{'name': 'attrname',
'expr': r'[^ \n=>]+(?:(?==)|$)',
'action': lex.make_token},
{'name': 'closetag',
'expr': r'[^ =\n<>/]+',
'action': lex.make_token},
{'name': 'default',
'expr': r""".|\n""",
'action': lex.silent}
]
def _default_rules(self):
"""subclasses can override this to define defaults for a grammar"""
for rdir in XMLGrammar.GRAMMAR_LIST:
self.add_rule(**rdir)