import os import ispell, lex def make_token_spell(rule, m, offset): '''return a token from a hit''' # first let's figure out the actual word we need to check if rule.name == 'continued word': word = '%s%s' % (m.group(1), m.group(2)) else: word = m.group(0) # okay, now we check the spelling; we don't spell-check all caps words if ispell.can_spell() and \ not ispell.get_speller().check(word, caps=False, title=False): name = "misspelled %s" % rule.name else: name = rule.name return(lex.Token(name, m.start() + offset, m.end() + offset, word)) class TextGrammar(lex.Grammar): GRAMMAR_LIST = [ {'name': 'continued word', 'expr': r"""([a-zA-Z][a-zA-Z-']*[a-zA-Z])-\n *([a-zA-Z][a-zA-Z-]*[a-zA-Z])""", 'action': make_token_spell}, {'name': 'word', 'expr': r"""(?:[a-zA-Z][-']?)*[a-zA-Z]""", 'action': make_token_spell, }, {'name': 'stuff', 'expr': r"""[^ \n]+""", 'action': lex.make_token, }, {'name': "default", 'expr': r'.| |\n', 'action': lex.silent} ] def _default_rules(self): """subclasses can override this to define defaults for a grammar""" for rdir in TextGrammar.GRAMMAR_LIST: self.add_rule(**rdir)