lex3 ftw!!! BIG efficiency speed-up

--HG--
branch : pmacs2
This commit is contained in:
moculus 2007-07-15 13:44:20 +00:00
parent b33772cb8f
commit 01c101d1ad
21 changed files with 44 additions and 36 deletions

View File

@ -1,5 +1,5 @@
import sys import sys
from lex2 import Token from lex3 import Token
color_list = [] color_list = []
color_list.extend(['\033[3%dm' % x for x in range(0, 8)]) color_list.extend(['\033[3%dm' % x for x in range(0, 8)])
@ -70,17 +70,18 @@ class Highlighter:
def highlight(self, lines): def highlight(self, lines):
self.tokens = [[] for l in lines] self.tokens = [[] for l in lines]
self.lexer.lex(lines, y=0, x=0) #self.lexer.lex(lines, y=0, x=0)
for token in self.lexer: #for token in self.lexer:
for token in self.lexer.lex(lines, y=0, x=0):
self.tokens[token.y].append(token) self.tokens[token.y].append(token)
# relexing # relexing
# ====================== # ======================
def relex(self, lines, y1, x1, y2, x2, token=None): def relex(self, lines, y1, x1, y2, x2, token=None):
if token: if token:
self.lexer.resume(lines, y1, 0, token) gen = self.lexer.resume(lines, y1, 0, token)
else: else:
self.lexer.lex(lines, y1, 0) gen = self.lexer.lex(lines, y1, 0)
# these keep track of the current y coordinate, the current token index # these keep track of the current y coordinate, the current token index
# on line[y], and the current "new token", respectively. # on line[y], and the current "new token", respectively.
@ -97,7 +98,8 @@ class Highlighter:
# if we need another new_token, then try to get it. # if we need another new_token, then try to get it.
if getnext: if getnext:
try: try:
new_token = self.lexer.next() #new_token = self.lexer.next()
new_token = gen.next()
getnext = False getnext = False
except StopIteration: except StopIteration:
# ok, so this means that ALL the rest of the tokens didn't # ok, so this means that ALL the rest of the tokens didn't

28
lex3.py
View File

@ -14,6 +14,7 @@ class Token(object):
self.string = s self.string = s
self.parent = parent self.parent = parent
self.matchd = matchd self.matchd = matchd
assert parent is None or hasattr(parent, 'name'), 'oh no %r' % parent
def parents(self): def parents(self):
if self.parent is not None: if self.parent is not None:
parents = self.parent.parents() parents = self.parent.parents()
@ -119,27 +120,29 @@ class RegionRule(Rule):
def match(self, lexer, parent): def match(self, lexer, parent):
return self.start_re.match(self.get_line(lexer), lexer.x) return self.start_re.match(self.get_line(lexer), lexer.x)
def lex(self, lexer, parent, m): def lex(self, lexer, parent, m):
t1 = self.make_token(lexer, 'start', None, m, m.groupdict()) t1 = self.make_token(lexer, m.group(0), 'start', parent, m.groupdict())
yield t1 yield t1
if self.end: if self.end:
endre = re.compile(self.end % t1.matchd, self.reflags) stopre = re.compile(self.end % t1.matchd, self.reflags)
else: else:
endre = None stopre = None
for t2 in self._lex(lexer, [t1], 'start', 'end'): for t2 in self._lex(lexer, [t1], 'end', stopre):
yield t2 yield t2
raise StopIteration raise StopIteration
def resume(self, lexer, toresume): def resume(self, lexer, toresume):
assert toresume assert toresume
t1 = toresume[0] t1 = toresume[0]
assert t1.name
if self.end: if self.end:
endre = re.compile(self.end % t1.matchd, self.reflags) stopre = re.compile(self.end % t1.matchd, self.reflags)
else: else:
endre = None stopre = None
for t2 in self._lex(lexer, t1, 'end', endre): for t2 in self._lex(lexer, [t1], 'end', stopre):
yield t2 yield t2
raise StopIteration raise StopIteration
def _lex(self, lexer, toresume, stopname, stopre): def _lex(self, lexer, toresume, stopname, stopre):
assert toresume
parent = toresume[0] parent = toresume[0]
reenter = len(toresume) > 1 reenter = len(toresume) > 1
null_t = None null_t = None
@ -166,7 +169,7 @@ class RegionRule(Rule):
if null_t: if null_t:
yield null_t yield null_t
null_t = None null_t = None
yield self.make_token(lexer, stopname, parent, m, {}) yield self.make_token(lexer, m.group(0), stopname, parent)
done = True done = True
break break
@ -209,7 +212,7 @@ class DualRegionRule(RegionRule):
return self.start_re.match(self.get_line(lexer), lexer.x) return self.start_re.match(self.get_line(lexer), lexer.x)
def lex(self, lexer, parent, m): def lex(self, lexer, parent, m):
assert m assert m
t1 = self.make_token(lexer, 'start', parent, m, m.groupdict()) t1 = self.make_token(lexer, m.group(0), 'start', parent, m.groupdict())
yield t1 yield t1
t2 = None t2 = None
@ -234,8 +237,10 @@ class DualRegionRule(RegionRule):
t1 = t2 = None t1 = t2 = None
if toresume[0].name == 'start': if toresume[0].name == 'start':
t1 = toresume[0] t1 = toresume[0]
assert t1.name
elif toresume[0].name == 'middle': elif toresume[0].name == 'middle':
t2 = toresume[0] t2 = toresume[0]
assert t2.name
else: else:
raise Exception, "invalid name %r" % toresume[0].name raise Exception, "invalid name %r" % toresume[0].name
@ -282,7 +287,7 @@ class Lexer:
self.x = 0 self.x = 0
self.lines = None self.lines = None
def get_line(self): def get_line(self):
return self.lines[lexer.y] + '\n' return self.lines[self.y] + '\n'
def lex(self, lines, y=0, x=0): def lex(self, lines, y=0, x=0):
self.y = y self.y = y
self.x = x self.x = x
@ -315,9 +320,10 @@ class Lexer:
def _lex(self): def _lex(self):
null_t = None null_t = None
parent = None
while self.y < len(self.lines): while self.y < len(self.lines):
line = self.get_line() line = self.get_line()
while not done and self.x < len(line): while self.x < len(line):
m = None m = None
for rule in self.grammar.rules: for rule in self.grammar.rules:
m = rule.match(self, parent) m = rule.match(self, parent)

View File

@ -1,6 +1,6 @@
import os, sets, string import os, sets, string
import color, method import color, method
from lex2 import Lexer from lex3 import Lexer
DEBUG = False DEBUG = False

View File

@ -1,5 +1,5 @@
import color, mode2 import color, mode2
from lex2 import Grammar, PatternRule, RegionRule, Grammar from lex3 import Grammar, PatternRule, RegionRule, Grammar
from mode_perl import PerlGrammar from mode_perl import PerlGrammar
from mode_xml import OpenTagGrammar from mode_xml import OpenTagGrammar
from mode_perl import StringGrammar from mode_perl import StringGrammar

View File

@ -1,7 +1,7 @@
import color, mode2 import color, mode2
from point2 import Point from point2 import Point
from lex2 import Grammar, PatternRule, RegionRule, DualRegionRule from lex3 import Grammar, PatternRule, RegionRule, DualRegionRule
class MetadataGrammar(Grammar): class MetadataGrammar(Grammar):
rules = [ rules = [

View File

@ -1,5 +1,5 @@
import color, mode2, tab2 import color, mode2, tab2
from lex2 import Grammar, PatternRule, RegionRule from lex3 import Grammar, PatternRule, RegionRule
from mode_python import StringGrammar from mode_python import StringGrammar
# this might not be complete... # this might not be complete...

View File

@ -1,5 +1,5 @@
import color, mode2 import color, mode2
from lex2 import Grammar, PatternRule, RegionRule from lex3 import Grammar, PatternRule, RegionRule
from mode_python import StringGrammar from mode_python import StringGrammar
class ConsoleGrammar(Grammar): class ConsoleGrammar(Grammar):

View File

@ -1,6 +1,6 @@
import code, string, StringIO, sys, traceback import code, string, StringIO, sys, traceback
import color, completer, method, mode2 import color, completer, method, mode2
from lex2 import Grammar, PatternRule from lex3 import Grammar, PatternRule
from point2 import Point from point2 import Point
class Console(mode2.Fundamental): class Console(mode2.Fundamental):

View File

@ -1,5 +1,5 @@
import color, mode2 import color, mode2
from lex2 import Grammar, PatternRule, NocasePatternRule, RegionRule, NocaseRegionRule from lex3 import Grammar, PatternRule, NocasePatternRule, RegionRule, NocaseRegionRule
from point2 import Point from point2 import Point
class StringGrammar(Grammar): class StringGrammar(Grammar):

View File

@ -1,5 +1,5 @@
import color, method, mode2, re import color, method, mode2, re
from lex2 import Grammar, PatternRule, RegionRule from lex3 import Grammar, PatternRule, RegionRule
class DiffGrammar(Grammar): class DiffGrammar(Grammar):
rules = [ rules = [

View File

@ -1,5 +1,5 @@
import color, mode2, tab2 import color, mode2, tab2
from lex2 import Grammar, PatternRule, RegionRule from lex3 import Grammar, PatternRule, RegionRule
from point2 import Point from point2 import Point
from mode_python import StringGrammar from mode_python import StringGrammar

View File

@ -1,7 +1,7 @@
import re, sets, string, sys import re, sets, string, sys
import color, commands, default, method, mode2, regex, tab2 import color, commands, default, method, mode2, regex, tab2
from point2 import Point from point2 import Point
from lex2 import Grammar, PatternRule, ContextPatternRule, \ from lex3 import Grammar, PatternRule, ContextPatternRule, \
RegionRule, DualRegionRule RegionRule, DualRegionRule
class Life(mode2.Fundamental): class Life(mode2.Fundamental):

View File

@ -1,5 +1,5 @@
import color, mode2, method, mode_text import color, mode2, method, mode_text
from lex2 import Grammar, PatternRule from lex3 import Grammar, PatternRule
class MuttGrammar(Grammar): class MuttGrammar(Grammar):
rules = [ rules = [

View File

@ -1,5 +1,5 @@
import color, mode2 import color, mode2
from lex2 import Grammar, PatternRule, RegionRule from lex3 import Grammar, PatternRule, RegionRule
class StringGrammar(Grammar): class StringGrammar(Grammar):
rules = [ rules = [

View File

@ -1,7 +1,7 @@
import re, sets, string, sys import re, sets, string, sys
import color, commands, default, method, mode2, regex, tab2 import color, commands, default, method, mode2, regex, tab2
from point2 import Point from point2 import Point
from lex2 import Grammar, PatternRule, ContextPatternRule, RegionRule, DualRegionRule from lex3 import Grammar, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
from method import Argument, Method from method import Argument, Method
class PodGrammar(Grammar): class PodGrammar(Grammar):

View File

@ -2,7 +2,7 @@ import commands, os.path, sets, string
import color, completer, default, mode2, method, regex, tab2 import color, completer, default, mode2, method, regex, tab2
import ctag_python import ctag_python
from point2 import Point from point2 import Point
from lex2 import Grammar, PatternRule, RegionRule from lex3 import Grammar, PatternRule, RegionRule
class StringGrammar(Grammar): class StringGrammar(Grammar):
rules = [ rules = [

View File

@ -1,5 +1,5 @@
import color, mode2, tab2 import color, mode2, tab2
from lex2 import Grammar, PatternRule, RegionRule from lex3 import Grammar, PatternRule, RegionRule
class StringGrammar(Grammar): class StringGrammar(Grammar):
rules = [ rules = [

View File

@ -1,5 +1,5 @@
import color, mode2, tab2 import color, mode2, tab2
from lex2 import Grammar, PatternRule, NocasePatternRule, RegionRule, NocaseRegionRule, DualRegionRule, NocaseDualRegionRule from lex3 import Grammar, PatternRule, NocasePatternRule, RegionRule, NocaseRegionRule, DualRegionRule, NocaseDualRegionRule
from mode_python import StringGrammar from mode_python import StringGrammar
class PlPgSqlGrammar(Grammar): class PlPgSqlGrammar(Grammar):

View File

@ -1,5 +1,5 @@
import color, mode2, method, ispell import color, mode2, method, ispell
from lex2 import Token, Rule, PatternRule, RegionRule, Grammar from lex3 import Token, Rule, PatternRule, RegionRule, Grammar
class WordRule(PatternRule): class WordRule(PatternRule):
def __init__(self): def __init__(self):

View File

@ -1,5 +1,5 @@
import color, mode2 import color, mode2
from lex2 import Grammar, PatternRule, RegionRule from lex3 import Grammar, PatternRule, RegionRule
class TagGrammar(Grammar): class TagGrammar(Grammar):
rules = [ rules = [

View File

@ -1,5 +1,5 @@
import color, mode2 import color, mode2
from lex2 import Grammar, PatternRule, RegionRule from lex3 import Grammar, PatternRule, RegionRule
class OpenTagGrammar(Grammar): class OpenTagGrammar(Grammar):
rules = [ rules = [