experimental lexing stuff!

--HG--
branch : pmacs2
This commit is contained in:
moculus 2007-07-11 16:20:33 +00:00
parent 685a318e5e
commit eb37a919b7
3 changed files with 57 additions and 120 deletions

83
lex2.py
View File

@ -63,6 +63,8 @@ class Rule:
raise Exception, "%s rule cannot match!" % self.name raise Exception, "%s rule cannot match!" % self.name
def make_token(self, lexer, s, name, parent=None, matchd={}): def make_token(self, lexer, s, name, parent=None, matchd={}):
return Token(name, self, lexer.y, lexer.x, s, parent, matchd) return Token(name, self, lexer.y, lexer.x, s, parent, matchd)
def _get_line(self, lexer):
return lexer.lines[lexer.y] + '\n'
def _set_group(self, group): def _set_group(self, group):
if group is None: if group is None:
self.group = self.name self.group = self.name
@ -78,7 +80,8 @@ class ConstantRule(Rule):
self.length = len(self.constant) self.length = len(self.constant)
self._set_group(group) self._set_group(group)
def match(self, lexer, parent): def match(self, lexer, parent):
if lexer.lines[lexer.y][lexer.x:].startswith(self.constant): line = self._get_line(lexer)
if line[lexer.x:].startswith(self.constant):
token = self.make_token(lexer, self.constant, self.name, parent) token = self.make_token(lexer, self.constant, self.name, parent)
lexer.add_token(token) lexer.add_token(token)
lexer.x += self.length lexer.x += self.length
@ -102,7 +105,8 @@ class PatternRule(Rule):
lexer.add_token(token) lexer.add_token(token)
lexer.x += len(s) lexer.x += len(s)
def match(self, lexer, parent): def match(self, lexer, parent):
m = self.re.match(lexer.lines[lexer.y], lexer.x) line = self._get_line(lexer)
m = self.re.match(line, lexer.x)
if m: if m:
self._match(lexer, parent, m) self._match(lexer, parent, m)
return True return True
@ -127,7 +131,8 @@ class ContextPatternRule(PatternRule):
r = re.compile(self.pattern % parent.matchd) r = re.compile(self.pattern % parent.matchd)
except KeyError: except KeyError:
r = self.fallback_re r = self.fallback_re
m = r.match(lexer.lines[lexer.y], lexer.x) line = self._get_line(lexer)
m = r.match(line, lexer.x)
if m: if m:
self._match(lexer, parent, m) self._match(lexer, parent, m)
return True return True
@ -157,7 +162,8 @@ class RegionRule(Rule):
return True return True
def match(self, lexer, parent): def match(self, lexer, parent):
m = self.start_re.match(lexer.lines[lexer.y], lexer.x) line = self._get_line(lexer)
m = self.start_re.match(line, lexer.x)
if m: if m:
self._match(lexer, parent, m, []) self._match(lexer, parent, m, [])
return True return True
@ -208,15 +214,16 @@ class RegionRule(Rule):
# if this line is empty, then we skip it, but here we insert # if this line is empty, then we skip it, but here we insert
# an empty null token just so we have something # an empty null token just so we have something
if not reenter and len(lexer.lines[lexer.y]) == 0: #if not reenter and len(lexer.lines[lexer.y]) == 0:
null_t = Token('null', None, lexer.y, lexer.x, '', parent) # null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t) # lexer.add_token(null_t)
null_t = None # null_t = None
# ok, as long as we haven't found the end token, and have more # ok, as long as we haven't found the end token, and have more
# data on the current line to read, we will process tokens # data on the current line to read, we will process tokens
while (not done and lexer.y == old_y and #while (not done and lexer.y == old_y and
lexer.x < len(lexer.lines[lexer.y])): # lexer.x < len(lexer.lines[lexer.y])):
while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
# if we are reentering mid-parse, then that takes precedence # if we are reentering mid-parse, then that takes precedence
if reenter: if reenter:
reenter = False reenter = False
@ -225,14 +232,17 @@ class RegionRule(Rule):
null_t = None null_t = None
if lexer.y >= len(lexer.lines): if lexer.y >= len(lexer.lines):
return True return True
elif lexer.x >= len(lexer.lines[lexer.y]): #elif lexer.x >= len(lexer.lines[lexer.y]):
elif lexer.x >= len(lexer.lines[lexer.y]) + 1:
lexer.y += 1 lexer.y += 1
lexer.x = 0 lexer.x = 0
line = self._get_line(lexer)
# if we are looking for an end token, then see if we've # if we are looking for an end token, then see if we've
# found it. if so, then we are done! # found it. if so, then we are done!
if self.end: if self.end:
m = end_re.match(lexer.lines[lexer.y], lexer.x) m = end_re.match(line, lexer.x)
if m: if m:
self._add_from_regex('end', lexer, parent, m, {}) self._add_from_regex('end', lexer, parent, m, {})
done = True done = True
@ -247,6 +257,7 @@ class RegionRule(Rule):
null_t = None null_t = None
break break
# if we never found a token, then we need to add another # if we never found a token, then we need to add another
# character to the current null token (which we should # character to the current null token (which we should
# create if it isn't set). # create if it isn't set).
@ -254,8 +265,10 @@ class RegionRule(Rule):
if null_t is None: if null_t is None:
null_t = Token('null', None, lexer.y, lexer.x, '', parent) null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t) lexer.add_token(null_t)
if len(lexer.lines[lexer.y]) > lexer.x: #if len(lexer.lines[lexer.y]) > lexer.x:
null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) if lexer.x < len(line):
#null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
null_t.add_to_string(line[lexer.x])
lexer.x += 1 lexer.x += 1
# ok, since we're soon going to be on a different line (or # ok, since we're soon going to be on a different line (or
@ -311,7 +324,8 @@ class DualRegionRule(Rule):
return True return True
def match(self, lexer, parent): def match(self, lexer, parent):
# see if we can match our start token # see if we can match our start token
m = self.start_re.match(lexer.lines[lexer.y], lexer.x) line = self._get_line(lexer)
m = self.start_re.match(line, lexer.x)
if m: if m:
t1 = self._add_from_regex('start', lexer, parent, m, m.groupdict()) t1 = self._add_from_regex('start', lexer, parent, m, m.groupdict())
t2 = self._match_first(lexer, t1, []) t2 = self._match_first(lexer, t1, [])
@ -341,14 +355,15 @@ class DualRegionRule(Rule):
# if this line is empty, then we will skip it, but here we insert # if this line is empty, then we will skip it, but here we insert
# an empty null token just so we have something # an empty null token just so we have something
if len(lexer.lines[lexer.y]) == 0: #if len(lexer.lines[lexer.y]) == 0:
null_t = Token('null', None, lexer.y, lexer.x, '', parent) # null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t) # lexer.add_token(null_t)
null_t = None # null_t = None
# ok, as long as we haven't found the end token, and have more # ok, as long as we haven't found the end token, and have more
# data on the current line to read, we will process tokens # data on the current line to read, we will process tokens
while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]): #while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]):
while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
# if we are reentering mid-parse, then that takes precedence # if we are reentering mid-parse, then that takes precedence
if reenter: if reenter:
raise Exception, "aw damn1" raise Exception, "aw damn1"
@ -361,9 +376,11 @@ class DualRegionRule(Rule):
#null_t = None #null_t = None
#break #break
line = self._get_line(lexer)
# see if we have found the middle token. if so, we can then # see if we have found the middle token. if so, we can then
# proceed to "stage 2" # proceed to "stage 2"
m2 = middle_re.match(lexer.lines[lexer.y], lexer.x) m2 = middle_re.match(line, lexer.x)
if m2: if m2:
d2 = dict(d1.items() + m2.groupdict().items()) d2 = dict(d1.items() + m2.groupdict().items())
t2 = self._add_from_regex('middle', lexer, parent, m2, d2) t2 = self._add_from_regex('middle', lexer, parent, m2, d2)
@ -386,7 +403,8 @@ class DualRegionRule(Rule):
if null_t is None: if null_t is None:
null_t = Token('null', None, lexer.y, lexer.x, '', parent) null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t) lexer.add_token(null_t)
null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) #null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
null_t.add_to_string(line[lexer.x])
lexer.x += 1 lexer.x += 1
# ok, since we're soon going to be on a different line (or # ok, since we're soon going to be on a different line (or
@ -433,17 +451,18 @@ class DualRegionRule(Rule):
# if this line is empty, then we will skip it, but here weinsert # if this line is empty, then we will skip it, but here weinsert
# an empty null token just so we have something # an empty null token just so we have something
if len(lexer.lines[lexer.y]) == 0: #if len(lexer.lines[lexer.y]) == 0:
null_t = Token('null', None, lexer.y, lexer.x, '', parent) # null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t) # lexer.add_token(null_t)
null_t = None # null_t = None
# ok, as long as we haven't found the end token, and have more # ok, as long as we haven't found the end token, and have more
# data on the current line to read, we will process tokens # data on the current line to read, we will process tokens
while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]): while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
# see if we have found the middle token. if so, we can then # see if we have found the middle token. if so, we can then
# proceed to "stage 2" # proceed to "stage 2"
m3 = end_re.match(lexer.lines[lexer.y], lexer.x) line = self._get_line(lexer)
m3 = end_re.match(line, lexer.x)
if m3: if m3:
t3 = self._add_from_regex('end', lexer, parent, m3, {}) t3 = self._add_from_regex('end', lexer, parent, m3, {})
done = True done = True
@ -465,7 +484,8 @@ class DualRegionRule(Rule):
if null_t is None: if null_t is None:
null_t = Token('null', None, lexer.y, lexer.x, '', parent) null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t) lexer.add_token(null_t)
null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) #null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
null_t.add_to_string(line[lexer.x])
lexer.x += 1 lexer.x += 1
# ok, since we're soon going to be on a different line (or # ok, since we're soon going to be on a different line (or
@ -556,7 +576,8 @@ class Lexer:
return self.tokens.pop(0) return self.tokens.pop(0)
while self.y < len(self.lines): while self.y < len(self.lines):
line = self.lines[self.y] #line = self.lines[self.y]
line = self.lines[self.y] + '\n'
while self.x < len(line): while self.x < len(line):
curr_t = None curr_t = None
for rule in self.grammar.rules: for rule in self.grammar.rules:
@ -566,6 +587,8 @@ class Lexer:
if null_t is None: if null_t is None:
null_t = Token('null', None, self.y, self.x, '') null_t = Token('null', None, self.y, self.x, '')
self.add_token(null_t) self.add_token(null_t)
#assert line[self.x] != '\n', "DAMN"
#assert line[self.x] != '$', "DAMN"
null_t.add_to_string(line[self.x]) null_t.add_to_string(line[self.x])
self.x += 1 self.x += 1
null_t = None null_t = None

View File

@ -2,7 +2,7 @@ import commands, os.path, sets, string
import color, completer, default, mode2, lex2, method, regex, tab2 import color, completer, default, mode2, lex2, method, regex, tab2
import ctag_python import ctag_python
from point2 import Point from point2 import Point
from lex2 import Grammar, PatternRule, RegionRule from lex2 import Grammar, PatternRule, RegionRule, ConstantRule
class StringGrammar(Grammar): class StringGrammar(Grammar):
rules = [ rules = [
@ -33,7 +33,8 @@ class PythonGrammar(Grammar):
RegionRule(r'string', r'"', StringGrammar, r'"'), RegionRule(r'string', r'"', StringGrammar, r'"'),
RegionRule(r'string', r"'", StringGrammar, r"'"), RegionRule(r'string', r"'", StringGrammar, r"'"),
PatternRule(r'comment', r'#.*$'), PatternRule(r'comment', r'#.*$'),
PatternRule(r'continuation', r'\\$'), PatternRule(r'continuation', r'\\\n$'),
PatternRule(r'eol', r'\n$'),
] ]
class PythonTabber(tab2.StackTabber): class PythonTabber(tab2.StackTabber):
@ -125,7 +126,7 @@ class PythonTabber(tab2.StackTabber):
# since we're done with the string, resume our indentation level # since we're done with the string, resume our indentation level
self._opt_pop('string') self._opt_pop('string')
elif fqname == 'delimiter': elif fqname == 'delimiter':
# we only reall care about a colon as part of a one-line statement, # we only really care about a colon as part of a one-line statement,
# i.e. "while ok: foo()" or "if True: print 3" # i.e. "while ok: foo()" or "if True: print 3"
if token.string == ':': if token.string == ':':
if self.markers and self.markers[-1].name in ('[', '{'): if self.markers and self.markers[-1].name in ('[', '{'):
@ -183,7 +184,6 @@ class Python(mode2.Fundamental):
# highlighting # highlighting
self.colors = { self.colors = {
'keyword': color.build('cyan', 'default'), 'keyword': color.build('cyan', 'default'),
#'reserved': color.build('cyan', 'default'),
'reserved': color.build('magenta', 'default'), 'reserved': color.build('magenta', 'default'),
'builtin': color.build('cyan', 'default'), 'builtin': color.build('cyan', 'default'),
'functionname': color.build('blue', 'default'), 'functionname': color.build('blue', 'default'),

View File

@ -1,86 +0,0 @@
import tab, point
class CTabber(tab.TokenStackTabber):
close_tags = {')': '(',
']': '[',
'}': '{'}
def stack_append_const(self, c):
self.stack_append((c, self.tab_stack[-1][1] + 4))
def stack_append_unique_const(self, c):
if self.tab_stack[-1][0] != c:
self.stack_append((c, self.tab_stack[-1][1] + 4))
def stack_pop_const(self, *c_args):
if self.tab_stack[-1][0] in c_args:
self.stack_pop()
def stack_pop_all_const(self, *c_args):
while self.tab_stack[-1][0] in c_args:
self.stack_pop()
def handle_token(self, prev_token, token, next_token, y=None):
buffer = self.mode.window.buffer
name = token.name
s = token.string
if name == "c comment":
if self.tab_stack[-1][0] != "c comment":
self.stack_append(("c comment", self.tab_stack[-1][1]))
else:
self.line_depth += 1
p = point.Point(len(buffer.lines[self.y]), self.y)
offset = buffer.get_point_offset(p)
if token.end <= offset or next_token is not None:
self.stack_pop()
elif name == "macro":
self.line_depth -= 4
elif name == "operator" and next_token is None:
self.stack_append_const_unique("cont")
elif name == "label":
self.line_depth -= 4
#self.line_depth = 0
elif name == "keyword":
if (s == "do" or
s == "else" or
s == "for" or
s == "if" or
s == "while"):
self.stack_append_const("block")
elif s == "case":
if prev_token is None:
self.line_depth -= 4
elif name == "delimiter":
if s == "{" or s == "(" or s == "[":
if s == "{":
if prev_token is None and self.tab_stack[-1][0] == "block":
self.line_depth -= 4
self.stack_pop_const("block")
#self.stack_pop_const("block", "cont")
else:
self.stack_pop_const("cont")
if next_token is None:
self.stack_append((s, self.tab_stack[-1][1] + 4))
else:
p = buffer.get_offset_point(next_token.start)
self.stack_append((s, p.x))
elif s == "}" or s == ")" or s == "]":
if s == "}":
self.stack_pop_all_const("block", "cont")
else:
self.stack_pop_all_const("cont")
if self.tab_stack[-1][0] == self.close_tags[s]:
self.stack_pop()
if prev_token is None:
self.line_depth = self.tab_stack[-1][1]
elif self.errors is False:
err = "tag mismatch, line %d: expected %r, got %r" % \
(self.y, self.tab_stack[-1][0], s)
self.mode.window.application.set_error(err)
self.errors = True
if s == "}":
self.stack_pop_all_const("block", "cont")
elif (s == "=" or s == "?") and next_token is None:
self.stack_append_const_unique("cont")
elif s == ',':
self.stack_pop_all_const("cont")
elif s == ';':
self.stack_pop_all_const("block", "cont")