pmacs3/lex_python.py

103 lines
3.6 KiB
Python
Executable File

#!/bin/env python
# 2.3 imports
from optparse import OptionParser
# our imports
import lex
class PythonGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': "import statement",
'expr': r"""(?:^|(?<= ))import [ .]*(?=\n)""",
'action': lex.make_token},
{'name': "method declaration",
'expr': r"(?<=def )[a-zA-Z_][a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "class declaration",
'expr': r"(?<=class )[a-zA-Z_][a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': 'keyword',
'expr': r"""(?:and|assert|break|class|continue|def|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|not|or|pass|print|raise|return|try|while|yield)(?![a-zA-Z0-9_])""",
'action': lex.make_token},
{'name': "pseudo-keyword",
'expr': r"""(?:as|self|True|False|None|Exception)(?![a-zA-Z0-9_])""",
'action': lex.make_token},
{'name': "built-in method",
'expr': r"""(?<!\.)(?:bool|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|input|int|isinstance|issubclass|iter|len|list|locals|long|map|min|max|object|oct|open|ord|pow|property|range|raw_input|reduce|repr|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)(?![a-zA-Z0-9_])""",
'action': lex.make_token},
{'name': "bound method",
'expr': r"(?<=\.)[a-zA-Z_][a-zA-Z0-9_]*(?= *\()",
'action': lex.make_token},
{'name': "system_identifier",
'expr': r"__[a-zA-Z0-9_]*__",
'action': lex.make_token},
{'name': "private_identifier",
'expr': r"__[a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "hidden_identifier",
'expr': r"_[a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "identifier",
'expr': r"[a-zA-Z_][a-zA-Z0-9_]*",
'action': lex.make_token},
{'name': "delimiter",
'expr': r"""\(|\)|\[|\]|{|}|@|,|:|\.|`|=|;|\+=|-=|\*=|/=|//=|%=|&=|\|=|\^=|>>=|<<=|\*\*=""",
'action': lex.make_token},
{'name': "operator",
'expr': r"""\+|<>|<<|<=|<|-|>>|>=|>|\*\*|&|\*|\||/|\^|==|//|~|!=|%""",
'action': lex.make_token},
{'name': "integer",
'expr': r"(?:0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?",
'action': lex.make_token},
{'name': "float",
'expr': r"""[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+""",
'action': lex.make_token},
{'name': "imaginary",
'expr': r"""[0-9]+|(?:[0-9]+\.[0-9]*|\.[0-9]+|(?:[0-9]|[0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+)[jJ]""",
'action': lex.make_token},
{'name': "string4",
'expr': r'"""(?:.|\n)*?(?:"""|$)',
'action': lex.make_token},
{'name': "string3",
'expr': r"'''(?:.|\n)*?(?:'''|$)",
'action': lex.make_token},
{'name': "string1",
'expr': r'"(?:\\.|[^\\"])*(?:"|.?$)',
'action': lex.make_token},
{'name': "string2",
'expr': r"'(?:\\.|[^\\'])*(?:'|.?$)",
'action': lex.make_token},
{'name': "comment",
'expr': r'[#].*(?=\n|$)',
'action': lex.make_token},
{'name': "continuation",
'expr': r'\\(?=(?:\n|$))',
'action': lex.make_token},
{'name': "default",
'expr': r'\\.|.|\n',
'action': lex.silent}
]