branch : pmacs2
This commit is contained in:
moculus 2007-07-14 14:21:22 +00:00
parent b681024851
commit cb663fa934
13 changed files with 130 additions and 139 deletions

26
IDEAS
View File

@ -1,10 +1,20 @@
2007/07/11:
2007/07/14:
We need a way to have a rule that matches the end of the line. Many languages
have regions whose "end" token is merely the end of the line. In those cases,
our grammars employ hacks (with varying levels of success) to get around the
fact that rules must match 1-or-more characters from the buffer.
The rules are currently confusingly implemented, and have poor performance when
used in deeply nested grammars.
One solution would be to artificially include a newline character at the end of
the line, which could be matched in regexes. Another would be to create a new
type of rule and write some special-case code in the region rules.
We need to refactor lex2 so that rules have two methods:
1. match():
This method should return whether or not the rule can match the current input
that the lexer is lexing. If its result is true, the result will be passed
(along with the lexer, etc.) to the rule's lex() method. Otherwise, the next
rule will be tried.
2. lex():
This method is a generator, which is expected to return one or more tokens. In
addition to the arguments given to match() it will be passed the result of the
call to match() (which is guaranteed to be true, and will most often be a
re.Match object). As all generators, this method will raise StopIteration when
there are no more tokens to return, and will raise LexError if there are other
problems.

View File

@ -1,5 +1,7 @@
package TBB::Reporting2;
my $bar =~ s/foob/blag/g;
my $foo = {
'foo',
'bar',

View File

@ -1,5 +1,5 @@
import sys
import lex2
from lex2 import Token
color_list = []
color_list.extend(['\033[3%dm' % x for x in range(0, 8)])
@ -274,10 +274,10 @@ class Highlighter:
post_change_list.append(t2)
# add in the new data
newtokens[y1].append(lex2.Token('new', '', y1, x1, newlines[0]))
newtokens[y1].append(Token('new', '', y1, x1, newlines[0]))
for i in range(1, len(newlines)):
yi = y1 + i
newtokens[yi].append(lex2.Token('new', '', yi, 0, newlines[i]))
newtokens[yi].append(Token('new', '', yi, 0, newlines[i]))
# add the post-change tokens back
for t in post_change_list:

133
lex2.py
View File

@ -1,6 +1,8 @@
import re
import util
valid_name_re = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$')
full_name_re = re.compile('^([a-zA-Z_]+)([0-9]*)$')
reserved_names = ['start', 'middle', 'end', 'null']
class Token(object):
@ -59,7 +61,10 @@ class Token(object):
return "<Token(%r, %r, %d, %d, %r)>" % fields
class Rule:
name = 'abstract'
def __init__(self, name):
assert valid_name_re.match(name), 'invalid name %r' % name
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
def match(self, lexer, parent):
raise Exception, "%s rule cannot match!" % self.name
def make_token(self, lexer, s, name, parent=None, matchd={}):
@ -74,9 +79,7 @@ class Rule:
class ConstantRule(Rule):
def __init__(self, name, constant, group=None):
assert valid_name_re.match(name), 'invalid name %r' % name
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
Rule.__init__(self, name)
self.constant = constant
self.length = len(self.constant)
self._set_group(group)
@ -92,9 +95,7 @@ class ConstantRule(Rule):
class PatternRule(Rule):
def __init__(self, name, pattern, group=None):
assert valid_name_re.match(name), 'invalid name %r' % name
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
Rule.__init__(self, name)
self.pattern = pattern
self._compile()
self._set_group(group)
@ -120,9 +121,7 @@ class NocasePatternRule(PatternRule):
class ContextPatternRule(PatternRule):
def __init__(self, name, pattern, fallback, group=None):
assert valid_name_re.match(name), 'invalid name %r' % name
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
Rule.__init__(self, name)
self.pattern = pattern
self.fallback = fallback
self.fallback_re = re.compile(fallback)
@ -142,9 +141,7 @@ class ContextPatternRule(PatternRule):
class RegionRule(Rule):
def __init__(self, name, start, grammar, end, group=None):
assert valid_name_re.match(name), 'invalid name %r' % name
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
Rule.__init__(self, name)
self.start = start
self.grammar = grammar
self.end = end
@ -157,7 +154,6 @@ class RegionRule(Rule):
return re.compile(self.end % d)
def resume(self, lexer, toresume):
#raise Exception, "%r %r" % (lexer, toresume) #XYZ
assert toresume, "can't resume without tokens to resume!"
self._match(lexer, None, None, toresume)
return True
@ -204,7 +200,6 @@ class RegionRule(Rule):
# reference named groups from the start token. if we have no end,
# well, then, we're never getting out of here alive!
if self.end:
#end_re = re.compile(self.end % d)
end_re = self._compile_end(d)
# ok, so as long as we aren't done (we haven't found an end token),
@ -213,17 +208,8 @@ class RegionRule(Rule):
while not done and lexer.y < len(lexer.lines):
old_y = lexer.y
# if this line is empty, then we skip it, but here we insert
# an empty null token just so we have something
#if not reenter and len(lexer.lines[lexer.y]) == 0:
# null_t = Token('null', None, lexer.y, lexer.x, '', parent)
# lexer.add_token(null_t)
# null_t = None
# ok, as long as we haven't found the end token, and have more
# data on the current line to read, we will process tokens
#while (not done and lexer.y == old_y and
# lexer.x < len(lexer.lines[lexer.y])):
while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
# if we are reentering mid-parse, then that takes precedence
if reenter:
@ -233,7 +219,6 @@ class RegionRule(Rule):
null_t = None
if lexer.y >= len(lexer.lines):
return True
#elif lexer.x >= len(lexer.lines[lexer.y]):
elif lexer.x >= len(lexer.lines[lexer.y]) + 1:
lexer.y += 1
lexer.x = 0
@ -266,9 +251,7 @@ class RegionRule(Rule):
if null_t is None:
null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t)
#if len(lexer.lines[lexer.y]) > lexer.x:
if lexer.x < len(line):
#null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
null_t.add_to_string(line[lexer.x])
lexer.x += 1
@ -294,15 +277,12 @@ class NocaseRegionRule(RegionRule):
class DualRegionRule(Rule):
def __init__(self, name, start, grammar1, middle, grammar2, end, group=None):
assert valid_name_re.match(name), 'invalid name %r' % name
assert name not in reserved_names, "reserved rule name: %r" % name
self.name = name
Rule.__init__(self, name)
self.start = start
self.grammar1 = grammar1
self.middle = middle
self.grammar2 = grammar2
self.end = end
#self.start_re = re.compile(start)
self.start_re = self._compile_start()
self._set_group(group)
@ -353,7 +333,6 @@ class DualRegionRule(Rule):
d1 = parent.matchd
assert parent.name == 'start'
null_t = None
#middle_re = re.compile(self.middle % d1)
middle_re = self._compile_middle(d1)
d2 = {}
@ -364,28 +343,15 @@ class DualRegionRule(Rule):
while not done and lexer.y < len(lexer.lines):
old_y = lexer.y
# if this line is empty, then we will skip it, but here we insert
# an empty null token just so we have something
#if len(lexer.lines[lexer.y]) == 0:
# null_t = Token('null', None, lexer.y, lexer.x, '', parent)
# lexer.add_token(null_t)
# null_t = None
# ok, as long as we haven't found the end token, and have more
# data on the current line to read, we will process tokens
#while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]):
while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]) + 1:
# if we are reentering mid-parse, then that takes precedence
if reenter:
raise Exception, "aw damn1"
#reenter = False
#xrule = rulecontext[0].rule
#xd = rulecontext[0].matchd
#assert rule2.resume(lexer, xcontext, xd, rulecontext[1:]), \
# "%r %r %r %r" % (lexer, xcontext, xd, rulecontext[1:])
#found = True
#null_t = None
#break
reenter = False
rule2 = toresume[1].rule
rule2.resume(lexer, toresume[1:])
null_t = None
line = self._get_line(lexer)
@ -414,7 +380,6 @@ class DualRegionRule(Rule):
if null_t is None:
null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t)
#null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
null_t.add_to_string(line[lexer.x])
lexer.x += 1
@ -436,10 +401,8 @@ class DualRegionRule(Rule):
if reenter:
assert parent is toresume[0]
assert parent.name == 'middle'
#assert parent.name == 'middle'
d3 = parent.matchd
null_t = None
#end_re = re.compile(self.end % d3)
end_re = self._compile_end(d3)
# ok, so as long as we aren't done (we haven't found an end token),
@ -451,22 +414,10 @@ class DualRegionRule(Rule):
# if we are reentering mid-parse, then that takes precedence
if reenter:
raise Exception, "aw damn2"
#reenter = False
#xrule = rulecontext[0].rule
#xd = rulecontext[0].matchd
#assert rule2.resume(lexer, xcontext, xd, rulecontext[1:]), \
# "%r %r %r %r" % (lexer, xcontext, xd, rulecontext[1:])
#found = True
#null_t = None
#break
# if this line is empty, then we will skip it, but here weinsert
# an empty null token just so we have something
#if len(lexer.lines[lexer.y]) == 0:
# null_t = Token('null', None, lexer.y, lexer.x, '', parent)
# lexer.add_token(null_t)
# null_t = None
reenter = False
rule2 = toresume[1].rule
rule2.resume(lexer, toresume[1:])
null_t = None
# ok, as long as we haven't found the end token, and have more
# data on the current line to read, we will process tokens
@ -496,7 +447,6 @@ class DualRegionRule(Rule):
if null_t is None:
null_t = Token('null', None, lexer.y, lexer.x, '', parent)
lexer.add_token(null_t)
#null_t.add_to_string(lexer.lines[lexer.y][lexer.x])
null_t.add_to_string(line[lexer.x])
lexer.x += 1
@ -525,30 +475,7 @@ class Grammar:
rule.grammar = self
if hasattr(rule, 'grammar2') and rule.grammar is None:
rule.grammar = self
grammars = {}
grammars['null'] = Grammar()
crash = False
def add(name, grammar):
global crash, grammars
if crash and name in grammars:
raise Exception, "oh no! already have a grammar for %r" %name
else:
grammars[name] = grammar
def get(name):
global crash, grammars
try:
return grammars[name]
except KeyError:
if crash:
raise
elif name == 'null':
return Grammar()
else:
return get('null')
grammar = Grammar()
class Lexer:
def __init__(self, name, grammar):
@ -569,16 +496,22 @@ class Lexer:
self.tokens = []
def resume(self, lines, y, x, token):
#raise Exception, "%r %r" % (self, token) #XYZ
self.y = y
self.x = x
self.lines = lines
self.tokens = []
toresume = token.parents()
# this is a special case for the "middle" rule of a dual region rule
i = 0
while i < len(toresume):
if i > 0 and toresume[i].name == 'middle' and toresume[i-1].name == 'start':
del toresume[i-1]
else:
i += 1
if toresume:
toresume[0].rule.resume(self, toresume)
#else:
# raise Exception, "dammmmit"
def __iter__(self):
if self.lines is None:
@ -586,13 +519,10 @@ class Lexer:
return self
def next(self):
null_t = None
null_t = None
if self.tokens:
return self.tokens.pop(0)
while self.y < len(self.lines):
#line = self.lines[self.y]
line = self.lines[self.y] + '\n'
while self.x < len(line):
curr_t = None
@ -603,14 +533,11 @@ class Lexer:
if null_t is None:
null_t = Token('null', None, self.y, self.x, '')
self.add_token(null_t)
#assert line[self.x] != '\n', "DAMN"
#assert line[self.x] != '$', "DAMN"
null_t.add_to_string(line[self.x])
self.x += 1
null_t = None
self.y += 1
self.x = 0
if self.tokens:
return self.tokens.pop(0)
else:

View File

@ -1,5 +1,6 @@
import os, sets, string
import color, lex2, method
import color, method
from lex2 import Lexer
DEBUG = False
@ -156,7 +157,7 @@ class Fundamental(Handler):
# lexing for highlighting, etc.
if self.grammar:
self.lexer = lex2.Lexer(self.name(), self.grammar)
self.lexer = Lexer(self.name(), self.grammar)
# tab handling
if self.tabbercls:

View File

@ -1,4 +1,4 @@
import color, lex2, mode2
import color, mode2
from lex2 import Grammar, PatternRule, RegionRule, Grammar
from mode_perl import PerlGrammar
from mode_xml import OpenTagGrammar

View File

@ -1,12 +1,17 @@
import color, mode2
from lex2 import Grammar, PatternRule
from lex2 import Grammar, PatternRule, RegionRule
from mode_python import StringGrammar
class ConsoleGrammar(Grammar):
rules = [
PatternRule(name=r'mesg', pattern=r'^[A-Za-z].*$'),
PatternRule(name=r'input', pattern=r'^>>>.*$'),
PatternRule(name=r'input', pattern=r'^-->.*$'),
PatternRule(name=r'output', pattern=r'^ .*$'),
PatternRule(r'mesg', r'^[A-Za-z].*$'),
PatternRule(r'input', r'^>>>.*$'),
PatternRule(r'input', r'^-->.*$'),
#PatternRule(r'output', r'^ .*$'),
RegionRule(r'string', r'"', StringGrammar, r'"'),
RegionRule(r'string', r"'", StringGrammar, r"'"),
PatternRule(r'bareword', r'[a-zA-Z_][a-zA-Z0-9_]*'),
]
class Console(mode2.Fundamental):
grammar = ConsoleGrammar()
@ -16,6 +21,12 @@ class Console(mode2.Fundamental):
'mesg': color.build('blue', 'default'),
'input': color.build('cyan', 'default'),
'output': color.build('default', 'default'),
'string.start': color.build('green', 'default'),
'string.octal': color.build('magenta', 'default'),
'string.escaped': color.build('magenta', 'default'),
'string.null': color.build('green', 'default'),
'string.end': color.build('green', 'default'),
}
def name(self):
return "Console"

View File

@ -1,5 +1,5 @@
import re, sets, string, sys
import color, commands, default, lex2, method, mode2, regex, tab2
import color, commands, default, method, mode2, regex, tab2
from point2 import Point
from lex2 import Grammar, ConstantRule, PatternRule, ContextPatternRule, \
RegionRule, DualRegionRule

View File

@ -1,5 +1,5 @@
import re, sets, string, sys
import color, commands, default, lex2, method, mode2, regex, tab2
import color, commands, default, method, mode2, regex, tab2
from point2 import Point
from lex2 import Grammar, PatternRule, ContextPatternRule, RegionRule, DualRegionRule
from method import Argument, Method
@ -11,11 +11,6 @@ class PodGrammar(Grammar):
RegionRule(r'entry', r'(?<=^=item) +.*$', Grammar, '^\n$'),
RegionRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$', Grammar, '^\n$'),
RegionRule(r'entry', r'(?<=^=encoding) +.*$', Grammar, '^\n$'),
#PatternRule(r'entry', r'(?<=^=head[1-4]) +.*$'),
#PatternRule(r'entry', r'(?<=^=over) +.*$'),
#PatternRule(r'entry', r'(?<=^=item) +.*$'),
#PatternRule(r'entry', r'(?:(?<=^=begin)|(?<=^=end)) +.*$'),
#PatternRule(r'entry', r'(?<=^=encoding) +.*$'),
]
class StringGrammar(Grammar):
@ -168,12 +163,12 @@ class PerlTabber(tab2.StackTabber):
return currlvl
class Perl(mode2.Fundamental):
tabbercls = PerlTabber
grammar = PerlGrammar
opentokens = ('delimiter',)
opentags = {'(': ')', '[': ']', '{': '}'}
closetoken = ('delimiter',)
closetags = {')': '(', ']': '[', '}': '{'}
tabbercls = PerlTabber
grammar = PerlGrammar
opentokens = ('delimiter',)
opentags = {'(': ')', '[': ']', '{': '}'}
closetokens = ('delimiter',)
closetags = {')': '(', ']': '[', '}': '{'}
def __init__(self, w):
mode2.Fundamental.__init__(self, w)
@ -183,7 +178,7 @@ class Perl(mode2.Fundamental):
#self.add_action_and_bindings(PerlHashCleanup2(), ('C-c h',))
self.add_action_and_bindings(PerlViewModulePerldoc(), ('C-c v',))
self.add_action_and_bindings(PerlViewWordPerldoc(), ('C-c p',))
#self.add_action_and_bindings(PerlWrapLine(), ('M-q',))
self.add_action_and_bindings(PerlWrapLine(), ('M-q',))
self.add_action_and_bindings(PerlGotoFunction(), ('C-c M-g',))
self.add_action_and_bindings(PerlWhichFunction(), ('C-c w',))
self.add_action_and_bindings(PerlListFunctions(), ('C-c W',))
@ -497,6 +492,46 @@ class PerlHashCleanup(Method):
window.kill(start_p, end_p)
window.insert_string(start_p, data)
class PerlWrapLine(Method):
'''Wrap Comments and POD'''
margin = 80
comment_re = re.compile('(#+)( *)(.*)')
def _is_newline(self, t):
return t.name == 'eol'
def _is_space(self, t):
return t.name == 'null' and regex.space.match(t.string)
def _detect_line_type(self, w, y):
c = w.logical_cursor()
highlighter = w.buffer.highlights[w.mode.name()]
ltype = None
for t in highlighter.tokens[c.y]:
if self._is_space(t):
pass
elif t.name == 'comment':
if ltype:
return None
else:
ltype = 'comment'
elif t.name == 'eol':
return ltype
else:
return None
def _execute(self, w, **vargs):
c = w.logical_cursor()
ltype = self._detect_line_type(w, c.y)
if ltype == 'comment':
return self._fix_comments(c, w)
elif ltype == 'pod':
return self._fix_pod(c, w)
else:
w.set_error("did not detect comment or pod lines")
return
def _fix_comments(self, c, w):
w.set_error("comment!")
def _fix_pod(self, c, w):
pass
#class PerlWrapLine(Method):
# '''Wrap lines, comments, POD'''
# margin = 80

View File

@ -1,5 +1,5 @@
import commands, os.path, sets, string
import color, completer, default, mode2, lex2, method, regex, tab2
import color, completer, default, mode2, method, regex, tab2
import ctag_python
from point2 import Point
from lex2 import Grammar, PatternRule, RegionRule, ConstantRule

View File

@ -136,4 +136,3 @@ def _end(w):
w.application.last_search = w.buffer.make_string()
w.buffer.method.old_cursor = None
w.buffer.method.old_window = None
w.buffer.method.is_literal = None

View File

@ -1,4 +1,4 @@
import color, lex2, mode2
import color, mode2
from lex2 import Grammar, PatternRule, RegionRule
class OpenTagGrammar(Grammar):

View File

@ -50,3 +50,9 @@ def count_leading_whitespace(s):
m = regex.leading_whitespace.match(s)
assert m, "count leading whitespace failed somehow"
return m.end() - m.start()
def dump(x):
d = {}
for name in dir(x):
d[name] = getattr(x, name)
return '%s: %r' % (x, d)