pmacs3/lex_text.py

import os
import ispell, lex

def make_token_spell(rule, m):
    '''return a token from a hit'''
    # first let's figure out the actual word we need to check
    if rule.name == 'continued word':
        word = '%s%s' % (m.group(1), m.group(2))
    else:
        word = m.group(0)

    # okay, now we check the spelling; we don't spell-check all caps words
    if ispell.can_spell() and \
        not ispell.get_speller().check(word, caps=False, title=True):
        name = "misspelled %s" % rule.name
    else:
        name = rule.name

    return lex.Token(name, m.start(), m.end(), m.group(0))

class TextGrammar(lex.Grammar):
    GRAMMAR_LIST = [
        {'name': 'continued word',
         'expr': r"""([a-zA-Z][a-zA-Z-']*[a-zA-Z])-\n *([a-zA-Z][a-zA-Z-]*[a-zA-Z])""",
         'action': make_token_spell},

        {'name': 'word',
         'expr': r"""[a-zA-Z][a-zA-Z-']*[a-zA-Z]""",
         'action': make_token_spell},

        {'name': "default",
         'expr': r'.| |\n',
         'action': lex.silent}
        ]

    def _default_rules(self):
        """subclasses can override this to define defaults for a grammar"""
        for rdir in TextGrammar.GRAMMAR_LIST:
            self.add_rule(**rdir)