From 01c101d1ad82626c87aba221aae58db9f222a966 Mon Sep 17 00:00:00 2001 From: moculus Date: Sun, 15 Jul 2007 13:44:20 +0000 Subject: [PATCH] lex3 ftw!!! BIG efficiency speed-up --HG-- branch : pmacs2 --- highlight2.py | 14 ++++++++------ lex3.py | 28 +++++++++++++++++----------- mode2.py | 2 +- mode_bds.py | 2 +- mode_blame.py | 2 +- mode_c.py | 2 +- mode_console.py | 2 +- mode_consolemini.py | 2 +- mode_css.py | 2 +- mode_diff.py | 2 +- mode_javascript.py | 2 +- mode_life.py | 2 +- mode_mutt.py | 2 +- mode_nasm.py | 2 +- mode_perl.py | 2 +- mode_python.py | 2 +- mode_sh.py | 2 +- mode_sql.py | 2 +- mode_text.py | 2 +- mode_tt.py | 2 +- mode_xml.py | 2 +- 21 files changed, 44 insertions(+), 36 deletions(-) diff --git a/highlight2.py b/highlight2.py index d48bb1c..f0820d7 100644 --- a/highlight2.py +++ b/highlight2.py @@ -1,5 +1,5 @@ import sys -from lex2 import Token +from lex3 import Token color_list = [] color_list.extend(['\033[3%dm' % x for x in range(0, 8)]) @@ -70,17 +70,18 @@ class Highlighter: def highlight(self, lines): self.tokens = [[] for l in lines] - self.lexer.lex(lines, y=0, x=0) - for token in self.lexer: + #self.lexer.lex(lines, y=0, x=0) + #for token in self.lexer: + for token in self.lexer.lex(lines, y=0, x=0): self.tokens[token.y].append(token) # relexing # ====================== def relex(self, lines, y1, x1, y2, x2, token=None): if token: - self.lexer.resume(lines, y1, 0, token) + gen = self.lexer.resume(lines, y1, 0, token) else: - self.lexer.lex(lines, y1, 0) + gen = self.lexer.lex(lines, y1, 0) # these keep track of the current y coordinate, the current token index # on line[y], and the current "new token", respectively. @@ -97,7 +98,8 @@ class Highlighter: # if we need another new_token, then try to get it. if getnext: try: - new_token = self.lexer.next() + #new_token = self.lexer.next() + new_token = gen.next() getnext = False except StopIteration: # ok, so this means that ALL the rest of the tokens didn't diff --git a/lex3.py b/lex3.py index 4b5966c..97d3d35 100755 --- a/lex3.py +++ b/lex3.py @@ -14,6 +14,7 @@ class Token(object): self.string = s self.parent = parent self.matchd = matchd + assert parent is None or hasattr(parent, 'name'), 'oh no %r' % parent def parents(self): if self.parent is not None: parents = self.parent.parents() @@ -119,27 +120,29 @@ class RegionRule(Rule): def match(self, lexer, parent): return self.start_re.match(self.get_line(lexer), lexer.x) def lex(self, lexer, parent, m): - t1 = self.make_token(lexer, 'start', None, m, m.groupdict()) + t1 = self.make_token(lexer, m.group(0), 'start', parent, m.groupdict()) yield t1 if self.end: - endre = re.compile(self.end % t1.matchd, self.reflags) + stopre = re.compile(self.end % t1.matchd, self.reflags) else: - endre = None - for t2 in self._lex(lexer, [t1], 'start', 'end'): + stopre = None + for t2 in self._lex(lexer, [t1], 'end', stopre): yield t2 raise StopIteration def resume(self, lexer, toresume): assert toresume t1 = toresume[0] + assert t1.name if self.end: - endre = re.compile(self.end % t1.matchd, self.reflags) + stopre = re.compile(self.end % t1.matchd, self.reflags) else: - endre = None - for t2 in self._lex(lexer, t1, 'end', endre): + stopre = None + for t2 in self._lex(lexer, [t1], 'end', stopre): yield t2 raise StopIteration def _lex(self, lexer, toresume, stopname, stopre): + assert toresume parent = toresume[0] reenter = len(toresume) > 1 null_t = None @@ -166,7 +169,7 @@ class RegionRule(Rule): if null_t: yield null_t null_t = None - yield self.make_token(lexer, stopname, parent, m, {}) + yield self.make_token(lexer, m.group(0), stopname, parent) done = True break @@ -209,7 +212,7 @@ class DualRegionRule(RegionRule): return self.start_re.match(self.get_line(lexer), lexer.x) def lex(self, lexer, parent, m): assert m - t1 = self.make_token(lexer, 'start', parent, m, m.groupdict()) + t1 = self.make_token(lexer, m.group(0), 'start', parent, m.groupdict()) yield t1 t2 = None @@ -234,8 +237,10 @@ class DualRegionRule(RegionRule): t1 = t2 = None if toresume[0].name == 'start': t1 = toresume[0] + assert t1.name elif toresume[0].name == 'middle': t2 = toresume[0] + assert t2.name else: raise Exception, "invalid name %r" % toresume[0].name @@ -282,7 +287,7 @@ class Lexer: self.x = 0 self.lines = None def get_line(self): - return self.lines[lexer.y] + '\n' + return self.lines[self.y] + '\n' def lex(self, lines, y=0, x=0): self.y = y self.x = x @@ -315,9 +320,10 @@ class Lexer: def _lex(self): null_t = None + parent = None while self.y < len(self.lines): line = self.get_line() - while not done and self.x < len(line): + while self.x < len(line): m = None for rule in self.grammar.rules: m = rule.match(self, parent) diff --git a/mode2.py b/mode2.py index bfe636c..4299eea 100644 --- a/mode2.py +++ b/mode2.py @@ -1,6 +1,6 @@ import os, sets, string import color, method -from lex2 import Lexer +from lex3 import Lexer DEBUG = False diff --git a/mode_bds.py b/mode_bds.py index 5f40a2a..f1f35e9 100644 --- a/mode_bds.py +++ b/mode_bds.py @@ -1,5 +1,5 @@ import color, mode2 -from lex2 import Grammar, PatternRule, RegionRule, Grammar +from lex3 import Grammar, PatternRule, RegionRule, Grammar from mode_perl import PerlGrammar from mode_xml import OpenTagGrammar from mode_perl import StringGrammar diff --git a/mode_blame.py b/mode_blame.py index 64b3bbb..e5858af 100644 --- a/mode_blame.py +++ b/mode_blame.py @@ -1,7 +1,7 @@ import color, mode2 from point2 import Point -from lex2 import Grammar, PatternRule, RegionRule, DualRegionRule +from lex3 import Grammar, PatternRule, RegionRule, DualRegionRule class MetadataGrammar(Grammar): rules = [ diff --git a/mode_c.py b/mode_c.py index 6bd2bcb..a0adc02 100644 --- a/mode_c.py +++ b/mode_c.py @@ -1,5 +1,5 @@ import color, mode2, tab2 -from lex2 import Grammar, PatternRule, RegionRule +from lex3 import Grammar, PatternRule, RegionRule from mode_python import StringGrammar # this might not be complete... diff --git a/mode_console.py b/mode_console.py index 49b2a4f..4116f76 100644 --- a/mode_console.py +++ b/mode_console.py @@ -1,5 +1,5 @@ import color, mode2 -from lex2 import Grammar, PatternRule, RegionRule +from lex3 import Grammar, PatternRule, RegionRule from mode_python import StringGrammar class ConsoleGrammar(Grammar): diff --git a/mode_consolemini.py b/mode_consolemini.py index eb772dc..8f59d24 100644 --- a/mode_consolemini.py +++ b/mode_consolemini.py @@ -1,6 +1,6 @@ import code, string, StringIO, sys, traceback import color, completer, method, mode2 -from lex2 import Grammar, PatternRule +from lex3 import Grammar, PatternRule from point2 import Point class Console(mode2.Fundamental): diff --git a/mode_css.py b/mode_css.py index 1030949..f0e7bac 100644 --- a/mode_css.py +++ b/mode_css.py @@ -1,5 +1,5 @@ import color, mode2 -from lex2 import Grammar, PatternRule, NocasePatternRule, RegionRule, NocaseRegionRule +from lex3 import Grammar, PatternRule, NocasePatternRule, RegionRule, NocaseRegionRule from point2 import Point class StringGrammar(Grammar): diff --git a/mode_diff.py b/mode_diff.py index 8a481c5..ceea689 100644 --- a/mode_diff.py +++ b/mode_diff.py @@ -1,5 +1,5 @@ import color, method, mode2, re -from lex2 import Grammar, PatternRule, RegionRule +from lex3 import Grammar, PatternRule, RegionRule class DiffGrammar(Grammar): rules = [ diff --git a/mode_javascript.py b/mode_javascript.py index 2b9a7e0..6ad6e16 100644 --- a/mode_javascript.py +++ b/mode_javascript.py @@ -1,5 +1,5 @@ import color, mode2, tab2 -from lex2 import Grammar, PatternRule, RegionRule +from lex3 import Grammar, PatternRule, RegionRule from point2 import Point from mode_python import StringGrammar diff --git a/mode_life.py b/mode_life.py index 5079b0a..59e8107 100644 --- a/mode_life.py +++ b/mode_life.py @@ -1,7 +1,7 @@ import re, sets, string, sys import color, commands, default, method, mode2, regex, tab2 from point2 import Point -from lex2 import Grammar, PatternRule, ContextPatternRule, \ +from lex3 import Grammar, PatternRule, ContextPatternRule, \ RegionRule, DualRegionRule class Life(mode2.Fundamental): diff --git a/mode_mutt.py b/mode_mutt.py index 3ffbf97..fe39909 100644 --- a/mode_mutt.py +++ b/mode_mutt.py @@ -1,5 +1,5 @@ import color, mode2, method, mode_text -from lex2 import Grammar, PatternRule +from lex3 import Grammar, PatternRule class MuttGrammar(Grammar): rules = [ diff --git a/mode_nasm.py b/mode_nasm.py index b276d0d..97ad478 100644 --- a/mode_nasm.py +++ b/mode_nasm.py @@ -1,5 +1,5 @@ import color, mode2 -from lex2 import Grammar, PatternRule, RegionRule +from lex3 import Grammar, PatternRule, RegionRule class StringGrammar(Grammar): rules = [ diff --git a/mode_perl.py b/mode_perl.py index f6a8e3c..5877e04 100644 --- a/mode_perl.py +++ b/mode_perl.py @@ -1,7 +1,7 @@ import re, sets, string, sys import color, commands, default, method, mode2, regex, tab2 from point2 import Point -from lex2 import Grammar, PatternRule, ContextPatternRule, RegionRule, DualRegionRule +from lex3 import Grammar, PatternRule, ContextPatternRule, RegionRule, DualRegionRule from method import Argument, Method class PodGrammar(Grammar): diff --git a/mode_python.py b/mode_python.py index 6c80ba9..759809d 100644 --- a/mode_python.py +++ b/mode_python.py @@ -2,7 +2,7 @@ import commands, os.path, sets, string import color, completer, default, mode2, method, regex, tab2 import ctag_python from point2 import Point -from lex2 import Grammar, PatternRule, RegionRule +from lex3 import Grammar, PatternRule, RegionRule class StringGrammar(Grammar): rules = [ diff --git a/mode_sh.py b/mode_sh.py index 2f22c0a..36d4827 100644 --- a/mode_sh.py +++ b/mode_sh.py @@ -1,5 +1,5 @@ import color, mode2, tab2 -from lex2 import Grammar, PatternRule, RegionRule +from lex3 import Grammar, PatternRule, RegionRule class StringGrammar(Grammar): rules = [ diff --git a/mode_sql.py b/mode_sql.py index a3d8d38..2abb41f 100644 --- a/mode_sql.py +++ b/mode_sql.py @@ -1,5 +1,5 @@ import color, mode2, tab2 -from lex2 import Grammar, PatternRule, NocasePatternRule, RegionRule, NocaseRegionRule, DualRegionRule, NocaseDualRegionRule +from lex3 import Grammar, PatternRule, NocasePatternRule, RegionRule, NocaseRegionRule, DualRegionRule, NocaseDualRegionRule from mode_python import StringGrammar class PlPgSqlGrammar(Grammar): diff --git a/mode_text.py b/mode_text.py index 86c1b19..5471e3e 100644 --- a/mode_text.py +++ b/mode_text.py @@ -1,5 +1,5 @@ import color, mode2, method, ispell -from lex2 import Token, Rule, PatternRule, RegionRule, Grammar +from lex3 import Token, Rule, PatternRule, RegionRule, Grammar class WordRule(PatternRule): def __init__(self): diff --git a/mode_tt.py b/mode_tt.py index a2d3d51..64fcb1b 100644 --- a/mode_tt.py +++ b/mode_tt.py @@ -1,5 +1,5 @@ import color, mode2 -from lex2 import Grammar, PatternRule, RegionRule +from lex3 import Grammar, PatternRule, RegionRule class TagGrammar(Grammar): rules = [ diff --git a/mode_xml.py b/mode_xml.py index c623e06..156c464 100644 --- a/mode_xml.py +++ b/mode_xml.py @@ -1,5 +1,5 @@ import color, mode2 -from lex2 import Grammar, PatternRule, RegionRule +from lex3 import Grammar, PatternRule, RegionRule class OpenTagGrammar(Grammar): rules = [