103 lines
3.6 KiB
Python
103 lines
3.6 KiB
Python
|
#!/bin/env python
|
||
|
|
||
|
# 2.3 imports
|
||
|
from optparse import OptionParser
|
||
|
|
||
|
# our imports
|
||
|
import lex
|
||
|
|
||
|
class PythonGrammar(lex.Grammar):
|
||
|
GRAMMAR_LIST = [
|
||
|
{'name': "import statement",
|
||
|
'expr': r"""(?:^|(?<= ))import [ .]*(?=\n)""",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "method declaration",
|
||
|
'expr': r"(?<=def )[a-zA-Z_][a-zA-Z0-9_]*",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "class declaration",
|
||
|
'expr': r"(?<=class )[a-zA-Z_][a-zA-Z0-9_]*",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': 'keyword',
|
||
|
'expr': r"""(?:and|assert|break|class|continue|def|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|not|or|pass|print|raise|return|try|while|yield)(?![a-zA-Z0-9_])""",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "pseudo-keyword",
|
||
|
'expr': r"""(?:as|self|True|False|None|Exception)(?![a-zA-Z0-9_])""",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "built-in method",
|
||
|
'expr': r"""(?<!\.)(?:bool|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|input|int|isinstance|issubclass|iter|len|list|locals|long|map|min|max|object|oct|open|ord|pow|property|range|raw_input|reduce|repr|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)(?![a-zA-Z0-9_])""",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "bound method",
|
||
|
'expr': r"(?<=\.)[a-zA-Z_][a-zA-Z0-9_]*(?= *\()",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "system_identifier",
|
||
|
'expr': r"__[a-zA-Z0-9_]*__",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "private_identifier",
|
||
|
'expr': r"__[a-zA-Z0-9_]*",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "hidden_identifier",
|
||
|
'expr': r"_[a-zA-Z0-9_]*",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "identifier",
|
||
|
'expr': r"[a-zA-Z_][a-zA-Z0-9_]*",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "delimiter",
|
||
|
'expr': r"""\(|\)|\[|\]|{|}|@|,|:|\.|`|=|;|\+=|-=|\*=|/=|//=|%=|&=|\|=|\^=|>>=|<<=|\*\*=""",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "operator",
|
||
|
'expr': r"""\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%""",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "integer",
|
||
|
'expr': r"(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "float",
|
||
|
'expr': r"""[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+""",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "imaginary",
|
||
|
'expr': r"""[0-9]+|(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)[jJ]""",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "string4",
|
||
|
'expr': r'"""(?:.|\n)*?(?:"""|$)',
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "string3",
|
||
|
'expr': r"'''(?:.|\n)*?(?:'''|$)",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "string1",
|
||
|
'expr': r'"(?:\\.|[^\\"])*(?:"|.?$)',
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "string2",
|
||
|
'expr': r"'(?:\\.|[^\\'])*(?:'|.?$)",
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "comment",
|
||
|
'expr': r'[#].*(?=\n|$)',
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "continuation",
|
||
|
'expr': r'\\(?=(?:\n|$))',
|
||
|
'action': lex.make_token},
|
||
|
|
||
|
{'name': "default",
|
||
|
'expr': r'\\.|.|\n',
|
||
|
'action': lex.silent}
|
||
|
]
|