#!/bin/env python # 2.3 imports from optparse import OptionParser # our imports import lex class NasmGrammar(lex.Grammar): GRAMMAR_LIST = [ {'name': 'keyword', 'expr': \ r"""(?:section|global|extern)(?![a-zA-Z_])""", 'action': lex.make_token}, {'name': "nasm macros", 'expr': r"%(?:define|undef|assign|strlen|macro|endmacro|if|elif|else|endif|ifdef|ifndef|include|push|pop|stacksize)(?![a-zA-Z_])", 'action': lex.make_token }, {'name': "instructions", 'expr': \ r"""(?:jeq|jne|ja|jmp|push|pushad|pushfd|call|ret|sub|add|pop|popa|popad|popfd|call|and|cwd|cdq|cmp|cmpxchg|cpuid|div|divpd|enter|leave|fadd|fld|fmul|fsqrt|fsub|hlt|imul|inc|int|int3|lea|mov|movd|mul|neg|not|nop|or|sal|sar|shl|shr|shld|shrd|syscall|sysenter|sysexit|test|xchg|xadd|xor)(?![a-zA-Z_])""", 'action': lex.make_token}, {'name': "registers", 'expr': \ r"""(?:eax|ax|ah|al|ebx|bx|bh|bl|ecx|cx|ch|cl|esi|edi|esp|ebp)""", 'action': lex.make_token}, {'name': "prefix", 'expr': r"(?:dword|word|lock)", 'action': lex.make_token }, {'name': "label", 'expr': r"[a-zA-Z_.][a-zA-Z0-9_.]*:", 'action': lex.make_token}, {'name': "identifier", 'expr': r"[a-zA-Z_][a-zA-Z0-9_]*", 'action': lex.make_token}, {'name': "integer", 'expr': r"(0|[1-9][0-9]*|0[0-7]+|0[xX][0-9a-fA-F]+)[lL]?", 'action': lex.make_token}, {'name': "float", 'expr': \ r"""[0-9]+\.[0-9]*|\.[0-9]+|([0-9]| [0-9]+\.[0-9]*|\.[0-9]+)[eE][\+-]?[0-9]+""", 'action': lex.make_token}, {'name': "string3", 'expr': r'"""[.|\n]*?(?:"""|$)', 'action': lex.make_token}, {'name': "string1", 'expr': r'"(?:\\.|[^\\"])*(?:"|$)', 'action': lex.make_token}, {'name': "string2", 'expr': r"'(?:\\.|[^\\'])*(?:'|$)", 'action': lex.make_token}, {'name': "comment", 'expr': r'[;].*(?:\n|$)', 'action': lex.make_token}, {'name': "default", 'expr': r'\\.|.|\n', 'action': lex.silent} ] def _default_rules(self): """subclasses can override this to define defaults for a grammar""" for rdir in NasmGrammar.GRAMMAR_LIST: self.add_rule(**rdir) if __name__ == "__main__": usage = "%%prog [ ...]\n\n" \ "Lex one or more files according to the python grammar" parser = OptionParser(usage=usage) (options, args) = parser.parse_args() g = NasmGrammar() l = lex.Lexer(grammar=g) for path in args: f = open(path, 'r') data = f.read() f.close() print "Lexing %s:" % (path) l.lex(data) for x in l: if x is not None: print x