From 5e799b381a474625f20bc6d7b41cc13c7a9003c9 Mon Sep 17 00:00:00 2001 From: d6 Date: Thu, 3 Feb 2022 01:32:35 -0500 Subject: [PATCH] get + and ? working in regex parser --- regex.tal | 59 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/regex.tal b/regex.tal index c11f4c2..e3e8453 100644 --- a/regex.tal +++ b/regex.tal @@ -52,6 +52,8 @@ @stack-is-empty "stack 20 "is 20 "empty 00 @arena-is-full "arena 20 "is 20 "full 00 @star-invariant "star 20 "invariant 20 "failed 00 +@plus-invariant "plus 20 "invariant 20 "failed 00 +@qmark-invariant "question 20 "mark 20 "invariant 20 "failed 00 ( REGEX MATCHING ) @@ -152,15 +154,21 @@ @peek-to-star ( -> is-star^ ) ;pos LDA2 LDA LIT '* EQU JMP2r +( is pos currently pointing to a plus? ) +@peek-to-plus ( -> is-plus^ ) + ;pos LDA2 LDA LIT '+ EQU JMP2r + +( is pos currently pointing to a qmark? ) +@peek-to-qmark ( -> is-qmark^ ) + ;pos LDA2 LDA LIT '? EQU JMP2r + ( just increment pos ) @skip ;pos LDA2 INC2 ;pos STA2 JMP2r ( TODO: ) ( 1. character groups: [] and [^] ) -( 2. one-or-more: + ) -( 3. zero-or-one: ? ) -( 4. symbolic escapes, e.g. \n ) +( 2. symbolic escapes, e.g. \n ) ( STRETCH GOALS: ) ( a. ^ and $ ) @@ -205,6 +213,8 @@ DUP LIT ') EQU ;c-rpar JCN2 DUP LIT '\ EQU ;c-esc JCN2 DUP LIT '* EQU ;c-star JCN2 + DUP LIT '+ EQU ;c-plus JCN2 + DUP LIT '? EQU ;c-qmark JCN2 ;c-char JMP2 ( either finalize the given r0/r1 or else wrap it in ) @@ -213,9 +223,13 @@ ( we use this look-ahead approach rather than compiling ) ( star nodes directly since the implementation is simpler. ) @c-peek-and-finalize ( r0* r1* -> r2* ) - ;peek-to-star JSR2 ( r0 r1 next-is-star? ) - ,&next-is-star JCN ,&finally JMP ( r0 r1 ) - &next-is-star ;skip JSR2 POP2 ;alloc-star JSR2 DUP2 ( star ) + ;peek-to-star JSR2 ( r0 r1 next-is-star? ) ,&next-is-star JCN + ;peek-to-plus JSR2 ( r0 r1 next-is-plus? ) ,&next-is-plus JCN + ;peek-to-qmark JSR2 ( r0 r1 next-is-qmark? ) ,&next-is-qmark JCN + ,&finally JMP ( r0 r1 ) + &next-is-star ;skip JSR2 POP2 ;alloc-star JSR2 DUP2 ,&finally JMP + &next-is-plus ;skip JSR2 POP2 ;alloc-plus JSR2 DUP2 ,&finally JMP + &next-is-qmark ;skip JSR2 POP2 ;alloc-qmark JSR2 DUP2 ,&finally JMP &finally ;push-next JSR2 ;compile-region-loop JMP2 ( called when we reach EOF of the input string ) @@ -302,6 +316,20 @@ POP ;star-invariant ;error! JSR2 +( called if we parse a "+" ) +( ) +( actually calling this means the code broke an invariant somewhere. ) +@c-plus ( c^ -> regex* ) + POP + ;plus-invariant ;error! JSR2 + +( called if we parse a "?" ) +( ) +( actually calling this means the code broke an invariant somewhere. ) +@c-qmark ( c^ -> regex* ) + POP + ;qmark-invariant ;error! JSR2 + ( ALLOCATING REGEX NDOES ) @alloc3 ( mode^ -> r* ) @@ -341,6 +369,25 @@ ;set-next JSR2 ( [r] ) STH2r JMP2r +@alloc-plus ( expr* -> r* ) + #05 ;alloc JSR2 STH2 ( expr [r] ) + #05 STH2kr STA ( expr [r] ) + DUP2 STH2kr INC2 STA2 ( expr [r] ) + #0000 STH2kr #0003 ADD2 STA2 ( expr [r] ) + STH2r SWP2 STH2k ( r expr [expr] ) + ;set-next JSR2 ( [expr] ) + STH2r JMP2r + +@alloc-qmark ( expr* -> r* ) + ;alloc-empty JSR2 STH2k ( expr e [e] ) + OVR2 ;set-next JSR2 ( expr [e] ) + #05 ;alloc JSR2 STH2 ( expr [r e] ) + #04 STH2kr STA ( expr [r e] ) + STH2kr INC2 STA2 ( [r e] ) + SWP2r STH2r STH2kr ( e r [r] ) + #0003 ADD2 STA2 ( [r] ) + STH2r JMP2r + ( if r is 0000, allocate an empty node ) @alloc-if-null ( r* -> r2* ) ORAk ,&return JCN POP2 ;alloc-empty JSR2 &return JMP2r