From d21a8b56f1a5664b1894306b32f1b203925de7c6 Mon Sep 17 00:00:00 2001 From: d6 Date: Sun, 30 Jan 2022 15:11:03 -0500 Subject: [PATCH] star refactor working --- regex.tal | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/regex.tal b/regex.tal index 34d381d..0dabf2d 100644 --- a/regex.tal +++ b/regex.tal @@ -76,6 +76,7 @@ @stack-is-full "stack 20 "is 20 "full 00 @stack-is-empty "stack 20 "is 20 "empty 00 @arena-is-full "arena 20 "is 20 "full 00 +@star-invariant "star 20 "invariant 20 "failed 00 @match ( str* regex* -> bool^ ) ;reset-stack JSR2 @@ -149,7 +150,9 @@ INC2 ( pos s+1 [c] ) SWP2 STA2 ,&return JMP ( [c] ) &is-eof POP2 POP2 - &return STHr ( c ) JMP2r + &return STHr ( c ) + ;pos LDA2 ;emit-short JSR2 LIT '> emit DUP ;emit-byte JSR2 newline + JMP2r ( read pos ) @peek ( -> c^ ) @@ -160,7 +163,10 @@ ( pos += 1 ) @skip - ;pos LDA2 INC2 ;pos STA2 JMP2r + ;pos LDA2 ;emit-short JSR2 LIT '! emit + ;pos LDA2 INC2 ;pos STA2 + ;pos LDA2 ;emit-short JSR2 newline + JMP2r ( TODO: [] + ? ) ( compile an expression string into a regex graph ) @@ -192,14 +198,14 @@ DUP LIT '( EQU ;c-lpar JCN2 DUP LIT ') EQU ;c-rpar JCN2 DUP LIT '\ EQU ;c-esc JCN2 - DUP LIT '* EQU ;c-star JCN2 + DUP LIT '* EQU ;c-star JCN2 ;c-char JMP2 -@c-peek-and-finalize ( r* -> r2* ) - ;peek-to-star JSR2 ( r next-is-star? ) - ,&next-is-star JCN ,&finally JMP ( r ) - &next-is-star ;skip JSR2 ;alloc-star JSR2 ( star ) - &finally DUP2 ;push-next JSR2 ;compile-region-loop JMP2 +@c-peek-and-finalize ( r0* r1* -> r2* ) + ;peek-to-star JSR2 ( r0 r1 next-is-star? ) + ,&next-is-star JCN ,&finally JMP ( r0 r1 ) + &next-is-star ;skip JSR2 POP2 ;alloc-star JSR2 DUP2 ( star ) + &finally ;push-next JSR2 ;compile-region-loop JMP2 @c-done ( c^ -> r2* ) POP @@ -222,18 +228,17 @@ ;parens LDA2 #0000 EQU2 ,&mismatched-parens JCN ;parens LDA2 #0001 SUB2 ;parens STA2 ( parens-- ) ;unroll-stack JSR2 - ;push-next JSR2 - ;compile-region-loop JMP2 + ;c-peek-and-finalize JMP2 &mismatched-parens ;mismatched-parens ;error! JSR2 @c-dot ( c^ -> r2* ) POP ;alloc-dot JSR2 ( dot ) - ;c-peek-and-finalize JMP2 + DUP2 ;c-peek-and-finalize JMP2 @c-char ( c^ -> r2* ) ;alloc-lit JSR2 ( lit ) - ;c-peek-and-finalize JMP2 + DUP2 ;c-peek-and-finalize JMP2 ( TODO: escaping rules not quite right ) @c-esc ( c^ -> r2* ) @@ -241,16 +246,10 @@ ;read JSR2 ;c-char JMP2 +( we don't expect to actually handle this ) @c-star ( c^ -> regex* ) POP - ;pop4 JSR2 SWP2 STH2 STH2k ( x1 [x0 x1] ) - ;alloc-star JSR2 ( r ) - STH2r STH2kr ( r x1 x0 [x0] ) - ;remove-from JSR2 ( r [x0] ) - STH2r OVR2 OVR2 ( r x0 r x0 ) ;set-next JSR2 - OVR2 #0003 ADD2 #0000 SWP2 STA2 ( fixme: manually zeroing next ) - ( r x0 ) SWP2 ;push4 JSR2 - ;compile-region-loop JMP2 + ;star-invariant ;error! JSR2 ( allocate node types ------ )