diff --git a/regex.tal b/regex.tal index 93ade47..f964e99 100644 --- a/regex.tal +++ b/regex.tal @@ -34,6 +34,7 @@ %space { #20 emit } %newline { #0a emit } %print { debug newline } +%error! { #00 #00 DIV } ( test -> ) @@ -80,7 +81,7 @@ LDAk #03 EQU ;do-literal JCN2 LDAk #04 EQU ;do-or JCN2 LDAk #05 EQU ;do-or JCN2 ( same code as the or case ) - ( LIT 'x emit ) #00 #00 DIV ( should not happen ) + error! @goto-backtrack ( -> bool^ ) ;stack-exist JSR2 ,&has-stack JCN ( do we have stack? ) @@ -162,14 +163,10 @@ ( by #ffff #ffff. above that we start with #0000 #0000 ) ( to signal an empty node. ) @compile-region ( -> r2* ) -( #abcd #1234 ;emit-short JSR2 ;emit-short JSR2 space newline ) - #ffff #ffff ;push4 JSR2 - #0000 #0000 ;push4 JSR2 + #ffff #ffff ;push4 JSR2 ( stack delimiter ) + #0000 #0000 ;push4 JSR2 ( stack frame start ) @compile-region-loop ;read JSR2 -( LIT '> emit space DUP ;emit-byte JSR2 space print - ;emit-stack JSR2 - ;emit-arena JSR2 newline ) DUP #00 EQU ;c-done JCN2 DUP LIT '* EQU ;c-star JCN2 DUP LIT '| EQU ;c-or JCN2 @@ -180,79 +177,55 @@ ;c-char JMP2 @c-done ( c^ -> r2* ) -( LIT '$ emit print ) POP ;parens LDA2 #0000 GTH2 ,&mismatched-parens JCN - ;unroll-stack JSR2 POP2 - JMP2r - &mismatched-parens ( LIT 'v emit ) #00 #00 DIV + ;unroll-stack JSR2 POP2 JMP2r + &mismatched-parens error! @c-or ( c^ -> r2* ) -( LIT '| emit newline ) POP #0000 #0000 ;push4 JSR2 ;compile-region-loop JMP2 @c-lpar ( c^ -> r2* ) -( LIT '( emit newline ) POP ;parens LDA2 INC2 ;parens STA2 ( parens++ ) ;compile-region JMP2 @c-rpar ( c^ -> r2* ) -( LIT ') emit newline ) - POP ( print ) - ;parens LDA2 #0000 EQU2 ,&mismatched-parens JCN ( print ) - ;parens LDA2 #0001 SUB2 ;parens STA2 ( parens-- ) ( print ) -( LIT 'x emit newline ) + POP + ;parens LDA2 #0000 EQU2 ,&mismatched-parens JCN + ;parens LDA2 #0001 SUB2 ;parens STA2 ( parens-- ) ;unroll-stack JSR2 -( LIT 'u emit print ) - ;push-next JSR2 ( print ) -( LIT 'z emit newline ) + ;push-next JSR2 ;compile-region-loop JMP2 - &mismatched-parens ( LIT 'Z emit ) #00 #00 DIV + &mismatched-parens error! @c-dot ( c^ -> r2* ) -( LIT '. emit newline ) POP ;alloc-dot JSR2 DUP2 ;push-next JSR2 ;compile-region-loop JMP2 @c-char ( c^ -> r2* ) -( LIT '@ emit print ) -( LIT '@ emit DUP ;emit-byte JSR2 newline ) ;alloc-lit JSR2 -( LIT '# emit print ) -( ;emit-stack JSR2 - ;emit-arena JSR2 ) - DUP2 ;push-next JSR2 ( print ) -( LIT '& emit print ) + DUP2 ;push-next JSR2 ;compile-region-loop JMP2 ( TODO: escaping rules not quite right ) @c-esc ( c^ -> r2* ) -( LIT '\ emit newline ) - POP ;read JSR2 + POP + ;read JSR2 ;c-char JMP2 @c-star ( c^ -> regex* ) -( LIT '* emit print ) - POP ( print ) + POP ;pop4 JSR2 SWP2 STH2 STH2k ( x1 [x0 x1] ) -( LIT '! emit space print ) ;alloc-star JSR2 ( r ) STH2r STH2kr ( r x1 x0 [x0] ) -( LIT '0 emit newline ) -( ;emit-arena JSR2 ) ;remove-from JSR2 ( r [x0] ) -( LIT '1 emit newline - ;emit-arena JSR2 ) STH2r OVR2 OVR2 ( r x0 r x0 ) ;set-next JSR2 - OVR2 #0003 ADD2 #0000 SWP2 STA2 ( fixme ) -( LIT '2 emit print - ;emit-arena JSR2 ) + OVR2 #0003 ADD2 #0000 SWP2 STA2 ( fixme: manually zeroing next ) ( r x0 ) SWP2 ;push4 JSR2 -( LIT '* emit print ) ;compile-region-loop JMP2 @@ -264,7 +237,6 @@ STH2k STA STH2kr INC2 STA2 STH2r -( LIT 'e emit print ) JMP2r @alloc-empty ( -> r* ) @@ -274,13 +246,12 @@ #02 ;alloc3 JMP2 @alloc-lit ( c^ -> r* ) - ( print ) - #03 #0000 SWP2 ( print ) - #04 ;alloc JSR2 ( print ) - STH2k STA ( print ) - STH2kr INC2 STA ( print ) - STH2kr #0002 ADD2 STA2 ( print ) - STH2r ( print ) + #03 #0000 SWP2 + #04 ;alloc JSR2 + STH2k STA + STH2kr INC2 STA + STH2kr #0002 ADD2 STA2 + STH2r JMP2r @alloc-or ( right* left* -> r* ) @@ -291,16 +262,11 @@ STH2r JMP2r @alloc-star ( expr* -> r* ) -( LIT '& emit space print ) -( LIT 'a emit print ) - #05 ;alloc JSR2 STH2 ( print ) ( expr [r] ) - #05 STH2kr STA ( print ) ( expr [r] ) - DUP2 STH2kr INC2 STA2 ( print ) ( expr [r] ) - #0000 STH2kr #0003 ADD2 STA2 ( print ) ( expr [r] ) -( LIT 'a emit ;emit-arena JSR2 ;emit-stack JSR2 ) - - STH2kr SWP2 ( print ) ( r expr [r] ) -( LIT 'x emit print ) + #05 ;alloc JSR2 STH2 ( expr [r] ) + #05 STH2kr STA ( expr [r] ) + DUP2 STH2kr INC2 STA2 ( expr [r] ) + #0000 STH2kr #0003 ADD2 STA2 ( expr [r] ) + STH2kr SWP2 ( r expr [r] ) ;set-next JSR2 ( [r] ) STH2r JMP2r @@ -351,14 +317,14 @@ JMP2r &is-zero POP2 POP2 ;push4 JSR2 JMP2r -( for nodes (other than 'or') read their 'next' pointer ) +( ( for nodes (other than 'or') read their 'next' pointer ) @get-next ( r* -> r.next* ) LDAk #01 NEQ ,&!1 JCN INC2 LDA2 JMP2r &!1 LDAk #02 NEQ ,&!2 JCN INC2 LDA2 JMP2r &!2 LDAk #03 NEQ ,&!3 JCN #0002 ADD2 LDA2 JMP2r &!3 LDAk #04 NEQ ,&!4 JCN INC2 LDA2 JMP2r &!4 LDAk #05 NEQ ,&!5 JCN #0003 ADD2 LDA2 JMP2r - &!5 ( either #04 (or) or ??? ) ( LIT 'q emit ) #00 #00 DIV + &!5 ( either #04 (or) or ??? ) ( LIT 'q emit ) error! ) @set-next-addr ( target* addr* -> ) ( LIT 'Z emit print ) @@ -379,7 +345,7 @@ OVR2 OVR2 INC2 ( LIT '{ emit space ) ;set-next-addr JSR2 #0003 ADD2 ( LIT '} emit space ) ;set-next-addr JSR2 JMP2r &!4 LDAk #05 NEQ ,&!5 JCN #0003 ADD2 ( STA2 ) ;set-next-addr JSR2 JMP2r - &!5 ( LIT '? emit LDAk ;emit-byte JSR2 ) #00 #00 DIV + &!5 ( LIT '? emit LDAk ;emit-byte JSR2 ) error! @remove-addr ( target* addr* -> ) ( LIT 'A emit print ) @@ -401,7 +367,7 @@ ( LIT 'q emit print ) #0003 ADD2 ;remove-addr JSR2 JMP2r &!4 LDAk #05 NEQ ,&!5 JCN #0003 ADD2 ;remove-addr JSR2 JMP2r - &!5 ( LIT '? emit LDAk ;emit-byte JSR2 ) #00 #00 DIV + &!5 ( LIT '? emit LDAk ;emit-byte JSR2 ) error! ( test cases -------- ) @@ -476,9 +442,9 @@ @stack-exist ( -> bool^ ) ;stack-pos LDA2 ;stack-bot GTH2 JMP2r @assert-avail ( -> ) - ;stack-avail JSR2 ,&ok JCN #00 #00 DIV &ok JMP2r + ;stack-avail JSR2 ,&ok JCN error! &ok JMP2r @assert-exist ( -> ) - ;stack-exist JSR2 ,&ok JCN #00 #00 DIV &ok JMP2r + ;stack-exist JSR2 ,&ok JCN error! &ok JMP2r @emit-stack ( -> ) space LIT 'n emit LIT '= emit ;stack-pos LDA2 ;stack-bot SUB2 #0004 DIV2 ;emit-short JSR2 LIT ': emit @@ -529,5 +495,5 @@ &!2 LDAk #03 NEQ ,&!3 JCN #04 ;emit-n JSR2 ,&loop JMP &!3 LDAk #04 NEQ ,&!4 JCN #05 ;emit-n JSR2 ,&loop JMP &!4 LDAk #05 NEQ ,&!5 JCN #05 ;emit-n JSR2 ,&loop JMP - &!5 LDAk ;emit-byte JSR2 LIT '! emit newline #00 #00 DIV + &!5 ( LDAk ;emit-byte JSR2 LIT '! emit newline ) error! \ No newline at end of file