pmacs3/lex_xml.py

84 lines
2.0 KiB
Python
Executable File

#!/bin/env python
# 2.3 imports
from optparse import OptionParser
# our imports
import lex
class XMLGrammar(lex.Grammar):
GRAMMAR_LIST = [
{'name': 'comment',
'expr': r'''<!--(?:.| |\n)+?(?:-->|$)''',
'action': lex.make_token},
{'name': 'ltb',
'expr': r'<![^>]*>',
'action': lex.make_token},
{'name': 'ltq',
'expr': r'<\?',
'action': lex.make_token},
{'name': 'gtq',
'expr': r'\?>',
'action': lex.make_token},
{'name': 'ltc',
'expr': r'</',
'action': lex.make_token},
{'name': 'gtc',
'expr': r'/>',
'action': lex.make_token},
{'name': 'lt',
'expr': r'<',
'action': lex.make_token},
{'name': 'gt',
'expr': r'>',
'action': lex.make_token},
{'name': 'nodevalue',
'expr': r'''(?<=>)(?:[^<]|\n)+?(?=<)''',
'action': lex.make_token},
{'name': 'whitespace',
'expr': r'''(?: |\n)+''',
'action': lex.silent},
{'name': 'namespace',
'expr': r'[a-zA-Z_]+:',
'action': lex.make_token},
#{'name': 'xi',
# 'expr': r'xi:',
# 'action': lex.make_token},
{'name': 'opentag',
'expr': r'(?:(?<=<)|(?<=xi:))[^ >\n/]+',
'action': lex.make_token},
{'name': 'attrvalue',
'expr': r'''(?<==)"(?:\\.|[^"\\])*(?:"|\\?$)|(?<==)'(?:\\.|[^'\\])*(?:'|\\?$)''',
'action': lex.make_token},
{'name': 'attrname',
'expr': r'[^ \n=>]+(?:(?==)|$)',
'action': lex.make_token},
{'name': 'closetag',
'expr': r'[^ =\n<>/]+',
'action': lex.make_token},
{'name': 'default',
'expr': r""".|\n""",
'action': lex.silent}
]
def _default_rules(self):
"""subclasses can override this to define defaults for a grammar"""
for rdir in XMLGrammar.GRAMMAR_LIST:
self.add_rule(**rdir)