2007-03-06 10:05:38 -05:00
|
|
|
import os
|
|
|
|
import ispell, lex
|
|
|
|
|
2007-06-13 22:38:46 -04:00
|
|
|
def make_token_spell(rule, m, offset):
|
2007-03-06 10:05:38 -05:00
|
|
|
'''return a token from a hit'''
|
|
|
|
# first let's figure out the actual word we need to check
|
|
|
|
if rule.name == 'continued word':
|
|
|
|
word = '%s%s' % (m.group(1), m.group(2))
|
|
|
|
else:
|
|
|
|
word = m.group(0)
|
|
|
|
# okay, now we check the spelling; we don't spell-check all caps words
|
|
|
|
if ispell.can_spell() and \
|
2007-06-13 22:38:46 -04:00
|
|
|
not ispell.get_speller().check(word, caps=False, title=False):
|
2007-03-06 10:05:38 -05:00
|
|
|
name = "misspelled %s" % rule.name
|
|
|
|
else:
|
|
|
|
name = rule.name
|
2007-06-13 22:38:46 -04:00
|
|
|
return(lex.Token(name, m.start() + offset, m.end() + offset, word))
|
2007-03-06 10:05:38 -05:00
|
|
|
|
|
|
|
class TextGrammar(lex.Grammar):
|
|
|
|
GRAMMAR_LIST = [
|
|
|
|
{'name': 'continued word',
|
|
|
|
'expr': r"""([a-zA-Z][a-zA-Z-']*[a-zA-Z])-\n *([a-zA-Z][a-zA-Z-]*[a-zA-Z])""",
|
|
|
|
'action': make_token_spell},
|
2007-06-13 22:38:46 -04:00
|
|
|
|
2007-03-06 10:05:38 -05:00
|
|
|
{'name': 'word',
|
2007-06-13 22:38:46 -04:00
|
|
|
'expr': r"""(?:[a-zA-Z][-']?)*[a-zA-Z]""",
|
|
|
|
'action': make_token_spell,
|
|
|
|
},
|
|
|
|
|
|
|
|
{'name': 'stuff',
|
|
|
|
'expr': r"""[^ \n]+""",
|
|
|
|
'action': lex.make_token,
|
|
|
|
},
|
2007-03-06 10:05:38 -05:00
|
|
|
|
|
|
|
{'name': "default",
|
|
|
|
'expr': r'.| |\n',
|
|
|
|
'action': lex.silent}
|
|
|
|
]
|
|
|
|
|
|
|
|
def _default_rules(self):
|
|
|
|
"""subclasses can override this to define defaults for a grammar"""
|
|
|
|
for rdir in TextGrammar.GRAMMAR_LIST:
|
|
|
|
self.add_rule(**rdir)
|