pmacs3/lex_mutt.py

import os
import ispell, lex

def make_token(rule, m):
    '''return a token from a hit'''
    return(lex.Token(rule.name, m.start(), m.end(), m.group(0)))

def make_token_spell(rule, m):
    '''return a token from a hit'''
    # first let's figure out the actual word we need to check
    if rule.name == 'continued word':
        word = '%s%s' % (m.group(1), m.group(2))
    else:
        word = m.group(0)

    # okay, now we check the spelling; we don't spell-check all caps words
    if ispell.can_spell() and \
        not ispell.get_speller().check(word, caps=False, title=False):
        name = "misspelled %s" % rule.name
    else:
        name = rule.name

    return lex.Token(name, m.start(), m.end(), m.group(0))

class MuttGrammar(lex.Grammar):
    GRAMMAR_LIST = [
        {'name': 'header',
         'expr': r'(?:^|(?<=\n))(?:From|To|Cc|Bcc|Subject|Reply-To|In-Reply-To|Delivered-To|Date):',
         'action': make_token,
        },

        {'name': 'quote1',
         'expr': r'(?:^|(?<=\n))(?:(?: *>){3})*(?: *>){1} *(?:[^ >\n][^\n]*)?(?:$|\n)',
         'action': make_token,
        },

        {'name': 'quote2',
         'expr': r'(?:^|(?<=\n))(?:(?: *>){3})*(?: *>){2} *(?:[^ >\n][^\n]*)?(?:$|\n)',
         'action': make_token,
        },

        {'name': 'quote3',
         'expr': r'(?:^|(?<=\n))(?:(?: *>){3})*(?: *>){3} *(?:[^ >\n][^\n]*)?(?:$|\n)',
         'action': make_token,
        },

        {'name': 'email',
         'expr': r'(?:^|(?<=[ :\n]))<?[^<>@\n ]+@(?:[^<>@\.\n ]+\.)*[^<>@\.\n ]+>?',
         'action': make_token,
        },

        {'name': 'url',
         'expr': r'(?:^|(?<=[ \n]))(?:http|https|ftp|sftp|file|smtp|smtps|torrent|news|jabber|irc|telnet)://(?:[^\.\n ]+\.)*[^\.\n ]+',
         'action': make_token,
        },

        {'name': 'continued word',
         'expr': r"""([a-zA-Z][a-zA-Z-']*[a-zA-Z])-\n *([a-zA-Z][a-zA-Z-]*[a-zA-Z])""",
         'action': make_token_spell,
        },
        
        {'name': 'word',
         'expr': r"""(?:[a-zA-Z][-']?)*[a-zA-Z]""",
         'action': make_token_spell,
        },

        {'name': 'stuff',
         'expr': r"""[^ \n]+""",
         'action': make_token,
        },

        {'name': "default",
         'expr': r'.| |\n',
         'action': lex.silent,
        },
    ]

    def _default_rules(self):
        """subclasses can override this to define defaults for a grammar"""
        for rdir in self.GRAMMAR_LIST:
            self.add_rule(**rdir)
try this again --HG-- branch : pmacs2 2007-03-06 10:05:38 -05:00			`import os`
			`import ispell, lex`

			`def make_token(rule, m):`
			`'''return a token from a hit'''`
			`return(lex.Token(rule.name, m.start(), m.end(), m.group(0)))`

			`def make_token_spell(rule, m):`
			`'''return a token from a hit'''`
			`# first let's figure out the actual word we need to check`
			`if rule.name == 'continued word':`
			`word = '%s%s' % (m.group(1), m.group(2))`
			`else:`
			`word = m.group(0)`

			`# okay, now we check the spelling; we don't spell-check all caps words`
			`if ispell.can_spell() and \`
			`not ispell.get_speller().check(word, caps=False, title=False):`
			`name = "misspelled %s" % rule.name`
			`else:`
			`name = rule.name`

			`return lex.Token(name, m.start(), m.end(), m.group(0))`

			`class MuttGrammar(lex.Grammar):`
			`GRAMMAR_LIST = [`
			`{'name': 'header',`
			`'expr': r'(?:^\|(?<=\n))(?:From\|To\|Cc\|Bcc\|Subject\|Reply-To\|In-Reply-To\|Delivered-To\|Date):',`
			`'action': make_token,`
			`},`

			`{'name': 'quote1',`
			`'expr': r'(?:^\|(?<=\n))(?:(?: >){3})(?: >){1} (?:[^ >\n][^\n]*)?(?:$\|\n)',`
			`'action': make_token,`
			`},`

			`{'name': 'quote2',`
			`'expr': r'(?:^\|(?<=\n))(?:(?: >){3})(?: >){2} (?:[^ >\n][^\n]*)?(?:$\|\n)',`
			`'action': make_token,`
			`},`

			`{'name': 'quote3',`
			`'expr': r'(?:^\|(?<=\n))(?:(?: >){3})(?: >){3} (?:[^ >\n][^\n]*)?(?:$\|\n)',`
			`'action': make_token,`
			`},`

			`{'name': 'email',`
			`'expr': r'(?:^\|(?<=[ :\n]))<?[^<>@\n ]+@(?:[^<>@\.\n ]+\.)*[^<>@\.\n ]+>?',`
			`'action': make_token,`
			`},`

			`{'name': 'url',`
			`'expr': r'(?:^\|(?<=[ \n]))(?:http\|https\|ftp\|sftp\|file\|smtp\|smtps\|torrent\|news\|jabber\|irc\|telnet)://(?:[^\.\n ]+\.)*[^\.\n ]+',`
			`'action': make_token,`
			`},`

			`{'name': 'continued word',`
			`'expr': r"""([a-zA-Z][a-zA-Z-'][a-zA-Z])-\n ([a-zA-Z][a-zA-Z-]*[a-zA-Z])""",`
			`'action': make_token_spell,`
			`},`

			`{'name': 'word',`
			`'expr': r"""(?:[a-zA-Z][-']?)*[a-zA-Z]""",`
			`'action': make_token_spell,`
			`},`

			`{'name': 'stuff',`
			`'expr': r"""[^ \n]+""",`
			`'action': make_token,`
			`},`

			`{'name': "default",`
			`'expr': r'.\| \|\n',`
			`'action': lex.silent,`
			`},`
			`]`

			`def _default_rules(self):`
			`"""subclasses can override this to define defaults for a grammar"""`
			`for rdir in self.GRAMMAR_LIST:`
			`self.add_rule(**rdir)`