diff --git a/lex2.py b/lex2.py index 98bd4bc..f4463a6 100755 --- a/lex2.py +++ b/lex2.py @@ -78,7 +78,6 @@ class RegionRule(Rule): m = self.start_re.match(lexer.lines[lexer.y], lexer.x) if m: self._add_from_regex(context, 'start', lexer, m) - null_t_name = '.'.join(context + [self.name, 'null']) null_t = None @@ -86,14 +85,18 @@ class RegionRule(Rule): end_re = re.compile(self.end % m.groupdict()) done = False + # NOTE: need to better handle matches that might consume more than + # one line of input. #### also, seems like some "region" matching isn't + # working, and finally, like the end token(s) might not be handled correctly while not done and lexer.y < len(lexer.lines): - line = lexer.lines[lexer.y] - if len(line) == 0: + old_y = lexer.y + if len(lexer.lines[lexer.y]) == 0: null_t = Token(null_t_name, lexer.y, lexer.x, '') lexer.add_token(null_t) - while not done and lexer.x < len(line): + null_t = None + while not done and lexer.y == old_y and lexer.x < len(lexer.lines[lexer.y]): if self.end: - m = end_re.match(line, lexer.x) + m = end_re.match(lexer.lines[lexer.y], lexer.x) if m: self._add_from_regex(context, 'end', lexer, m) done = True @@ -109,17 +112,19 @@ class RegionRule(Rule): if null_t is None: null_t = Token(null_t_name, lexer.y, lexer.x, '') lexer.add_token(null_t) - null_t.add_to_string(line[lexer.x]) + null_t.add_to_string(lexer.lines[lexer.y][lexer.x]) lexer.x += 1 - + null_t = None - if not done: + if not done and old_y == lexer.y: lexer.y += 1 lexer.x = 0 return True else: return False +# NOTE: this needs to get synced up with RegionRule's changes... +# right now, it has at least 2-3 different bugs. suck! class DualRegionRule(Rule): def __init__(self, name, start, grammar1, middle, grammar2, end): assert valid_name_re.match(name), 'invalid name %r' % name diff --git a/lex2_perl.py b/lex2_perl.py index f7f0469..9650842 100755 --- a/lex2_perl.py +++ b/lex2_perl.py @@ -116,7 +116,7 @@ class PerlGrammar(Grammar): ), PatternRule( name=r'bareword_hash_index', - pattern=r'(?<={) *[A-Za-z0-9_]+(?=})', + pattern=r'(?<={)[A-Za-z0-9_]+(?=})', ), PatternRule( name=r'bareword_hash_key', diff --git a/test3.py b/test3.py index 2da3a67..b78d2b8 100644 --- a/test3.py +++ b/test3.py @@ -17,64 +17,64 @@ for i in range(0, len(color_list)): color_dict[color_names[i]] = color_list[i] token_colors = { - 'null': 'white', - 'delimiter': 'white', - 'pod.start': 'lred', - 'pod.null': 'lred', - 'pod.end': 'lred', - 'pod.header': 'lpurple', - 'sub': 'lcyan', - 'number': 'white', - 'operator': 'white', - 'heredoc': 'lgreen', - 'endblock': 'lred', - 'pod': 'lred', - 'comment': 'lred', - 'string1': 'lgreen', - 'string1.start': 'lgreen', - 'string1.null': 'lgreen', - 'string1.escaped': 'lpurple', - 'string1.scalar': 'yellow', - 'string1.system_scalar': 'yellow', - 'string1.hash_deref': 'yellow', - 'string1.hash_bareword_index': 'lgreen', - 'string1.end': 'lgreen', - 'string2': 'lgreen', - 'string2.start': 'lgreen', - 'string2.null': 'lgreen', - 'string2.end': 'lgreen', - 'evalstring': 'lcyan', - 'default_string': 'lgreen', - 'keyword': 'lpurple', - 'length_scalar': 'yellow', - 'system_scalar': 'yellow', - 'system_array': 'yellow', - 'scalar': 'yellow', - 'dereference': 'yellow', - 'array': 'yellow', - 'hash': 'yellow', - 'hash_bareword_index': 'lgreen', - 'quoted_region': 'lcyan', - 'match_regex': 'lcyan', - 'replace_regex': 'lcyan', - 'literal_hash_bareword_index': 'lgreen', - 'interpolated_scalar': 'yellow', - 'interpolated_system_scalar': 'yellow', - 'interpolated_array': 'yellow', - 'interpolated_system_array': 'yellow', - 'interpolated_hash': 'yellow', - 'label': 'lcyan', - 'package': 'lcyan', - 'use': 'lcyan', - 'method': 'lcyan', - 'methodref': 'lcyan', - 'method_declaration': 'lcyan', - 'instance_method': 'lcyan', - 'static_method': 'lcyan', - 'builtin_method': 'lpurple', - 'bareword_method': 'lcyan', - 'bareword': 'yellow', - 'bizzaro': 'lpurple', + 'null': 'white', + 'delimiter': 'white', + 'pod.start': 'lred', + 'pod.null': 'lred', + 'pod.end': 'lred', + 'pod.header': 'lpurple', + 'sub': 'lcyan', + 'number': 'white', + 'operator': 'white', + 'heredoc': 'lgreen', + 'endblock': 'lred', + 'pod': 'lred', + 'comment': 'lred', + #'string1': 'lgreen', + 'string1.start': 'lgreen', + 'string1.null': 'lgreen', + 'string1.escaped': 'lpurple', + #'string1.scalar': 'yellow', + #'string1.system_scalar': 'yellow', + 'string1.hash_deref': 'yellow', + #'string1.hash_bareword_index': 'lgreen', + 'string1.end': 'lgreen', + #'string2': 'lgreen', + 'string2.start': 'lgreen', + 'string2.null': 'lgreen', + 'string2.end': 'lgreen', + 'evalstring': 'lcyan', + 'default_string': 'lgreen', + 'keyword': 'lpurple', + 'length_scalar': 'yellow', + 'system_scalar': 'yellow', + 'system_array': 'yellow', + 'scalar': 'yellow', + 'dereference': 'yellow', + 'array': 'yellow', + 'hash': 'yellow', + 'bareword_hash_index': 'lgreen', + 'quoted_region': 'lcyan', + 'match_regex': 'lcyan', + 'replace_regex': 'lcyan', + 'bareword_hash_key': 'lgreen', + 'interpolated_scalar': 'yellow', + 'interpolated_system_scalar': 'yellow', + 'interpolated_array': 'yellow', + 'interpolated_system_array': 'yellow', + 'interpolated_hash': 'yellow', + 'label': 'lcyan', + 'package': 'lcyan', + 'use': 'lcyan', + 'method': 'lcyan', + 'methodref': 'lcyan', + 'method_declaration': 'lcyan', + 'instance_method': 'lcyan', + 'static_method': 'lcyan', + 'builtin_method': 'lpurple', + 'bareword_method': 'lcyan', + 'bareword': 'yellow', + 'bizzaro': 'lpurple', } paths = sys.argv[1:]