From edfd0b791bdec26f5470107ddd3ac6832de6c3ac Mon Sep 17 00:00:00 2001 From: d6 Date: Mon, 11 Apr 2022 00:16:31 -0400 Subject: [PATCH] fix bug with ^ and $ --- regex.tal | 66 +++++++++++++++++++++----------------------------- repl-regex.tal | 21 ++++++++-------- 2 files changed, 39 insertions(+), 48 deletions(-) diff --git a/regex.tal b/regex.tal index 90c5905..7ce98df 100644 --- a/regex.tal +++ b/regex.tal @@ -115,25 +115,22 @@ ( str* should be a null-terminated string. ) ( ) ( returns true if the string, and false otherwise. ) -@match ( str* regex* -> bool^ ) +@rx-match ( str* regex* -> bool^ ) #01 ;match-multiline STA #00 ;search-mode STA ;reset-stack JSR2 ;loop JMP2 +@rx-search-multiline ( str* regex* -> bool^ ) + #01 ;match-multiline STA + #01 ;search-mode STA + ,rx-search/main JMP + @rx-search ( str* regex* -> bool^ ) #00 ;match-multiline STA #01 ;search-mode STA - ;_search JMP2 - -( @search-multiline ( str* regex* -> bool^ ) - #01 ;match-multiline STA - #01 ;search-mode STA - ;_search JMP2 ) - -@_search ( str* regex* -> bool^ ) - STH2 ( s* [r*] ) - DUP2 ;string-start STA2 ( s* [r*] ) + &main STH2 ( s* [r*] ) + DUP2 ;string-start STA2 ( s* [r*] ) &loop LDAk #00 EQU ,&eof JCN ( s* [r*] ) ;reset-stack JSR2 ( s* [r*] ) DUP2 ;search-start STA2 ( s* [r*] ) @@ -340,13 +337,13 @@ @compile-region-loop ;read JSR2 DUP #00 EQU ;c-done JCN2 - DUP LIT '| EQU ;c-or JCN2 - DUP LIT '. EQU ;c-dot JCN2 - DUP LIT '^ EQU ;c-caret JCN2 - DUP LIT '$ EQU ;c-dollar JCN2 + DUP LIT '| EQU ;c-or JCN2 + DUP LIT '. EQU ;c-dot JCN2 + DUP LIT '^ EQU ;c-caret JCN2 + DUP LIT '$ EQU ;c-dollar JCN2 DUP LIT '( EQU ;c-lpar JCN2 DUP LIT ') EQU ;c-rpar JCN2 - DUP LIT '\ EQU ;c-esc JCN2 + DUP LIT '\ EQU ;c-esc JCN2 DUP LIT '* EQU ;c-star JCN2 DUP LIT '+ EQU ;c-plus JCN2 DUP LIT '? EQU ;c-qmark JCN2 @@ -430,7 +427,7 @@ ( allocates a caret-node and continues. ) @c-caret ( c^ -> r2* ) POP - #06 ;alloc3 JMP2 + #06 ;alloc3 JSR2 DUP2 ;c-peek-and-finalize JMP2 ( called when we read "$" ) @@ -438,7 +435,7 @@ ( allocates a dollar-node and continues. ) @c-dollar ( c^ -> r2* ) POP - #07 ;alloc3 JMP2 + #07 ;alloc3 JSR2 DUP2 ;c-peek-and-finalize JMP2 ( called when we read "\" ) @@ -494,6 +491,10 @@ ( ALLOCATING REGEX NDOES ) +@rx-node-sizes + ( 00 01 02 03 04 05 06 07 08 09 ) + [ 00 03 03 04 ] [ 05 05 03 03 ] [ 05 05 ] + @alloc3 ( mode^ -> r* ) #0000 ROT ( 00 00 mode^ ) #03 ;alloc JSR2 ( 00 00 mode^ addr* ) @@ -588,7 +589,7 @@ ;set-next JSR2 SWP2 ( x0 r1 ) ;push4 JSR2 JMP2r - &is-zero POP2 POP2 ;push4 JSR2 JMP2r + &is-zero POP2 POP2 ;push4 JMP2 ( load the given address: ) ( ) @@ -596,7 +597,7 @@ ( 2. otherwise, call set-next on it ) @set-next-addr ( target* addr* -> ) LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN - LDA2 ;set-next JSR2 JMP2r + LDA2 ;set-next JMP2 &is-zero STA2 JMP2r ( set regex.next to target ) @@ -610,17 +611,16 @@ ( back up we only bother taking the left branch. otherwise ) ( you can end up double-appending things. ) @set-next ( target* regex* -> ) - LDAk #01 LTH ,&unknown JCN - LDAk #07 GTH ,&unknown JCN - LDAk #05 NEQ ,&!5 JCN #0003 ADD2 ,&continue JMP - &!5 LDAk #03 NEQ ,&!3 JCN #0002 ADD2 ,&continue JMP - &!3 INC2 - &continue ;set-next-addr JSR2 JMP2r + LDAk #01 LTH ,&unknown JCN + LDAk #09 GTH ,&unknown JCN + LDAk #00 SWP ;rx-node-sizes ADD2 + LDA #00 SWP ADD2 #0002 SUB2 + ;set-next-addr JMP2 &unknown LDAk ;unknown-node-type ;error!! JSR2 @set-next-or-addr ( target* addr* -> ) LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN - LDA2 ;set-next-or JSR2 JMP2r + LDA2 ;set-next-or JMP2 &is-zero STA2 JMP2r ( this is used when first building or-nodes ) @@ -630,7 +630,7 @@ @set-next-or ( target* regex* -> ) LDAk #04 NEQ ,&!4 JCN OVR2 OVR2 INC2 ;set-next-addr JSR2 - #0003 ADD2 ;set-next-or-addr JSR2 JMP2r + #0003 ADD2 ;set-next-or-addr JMP2 &!4 ;set-next JMP2 ( STACK OPERATIONS ) @@ -663,16 +663,6 @@ STH2r STH2r ( restore str and regex ) JMP2r -( -( -> size^ ) -@frame-size - #00 STH ;stack-pos LDA2 - &loop - #0004 SUB2 LDA2k #ffff EQU2 ,&done JCN - INCr ,&loop JMP - &done - STHr JMP2r ) - ( reset stack pointers ) @reset-stack ( -> ) ;stack-bot ;stack-pos STA2 JMP2r ( pos <- 0 ) diff --git a/repl-regex.tal b/repl-regex.tal index 6e18980..661533e 100644 --- a/repl-regex.tal +++ b/repl-regex.tal @@ -52,7 +52,8 @@ #00 ;ptr LDA2 STA ( null terminate string ) ;ptr LDA2 ;buffer EQU2 STH ( stash is-empty? ) ;buffer ;ptr STA2 ( reset ptr ) - ;buffer ;regex LDA2 ;match JSR2 ( match regex ) +( ;buffer ;regex LDA2 ;rx-match JSR2 ( match regex ) ) + ;buffer ;regex LDA2 ;rx-search JSR2 ( search regex ) ;emit-byte JSR2 nl ( print result ) STHr ,&was-empty JCN ;s-prompt ;println JSR2 @@ -96,13 +97,13 @@ @emit-arena ( -> ) ;arena-bot &loop - DUP2 ;arena-pos LDA2 LTH2 ,&ok JCN POP2 JMP2r + DUP2 ;arena-pos LDA2 LTH2 ,&ok JCN POP2 JMP2r &ok - DUP2 ;emit-short JSR2 - LIT ': emit! space - LDAk #01 NEQ ,&!1 JCN #03 ;emit-n JSR2 ,&loop JMP - &!1 LDAk #02 NEQ ,&!2 JCN #03 ;emit-n JSR2 ,&loop JMP - &!2 LDAk #03 NEQ ,&!3 JCN #04 ;emit-n JSR2 ,&loop JMP - &!3 LDAk #04 NEQ ,&!4 JCN #05 ;emit-n JSR2 ,&loop JMP - &!4 LDAk #05 NEQ ,&!5 JCN #05 ;emit-n JSR2 ,&loop JMP - &!5 ;unknown-node-type ;error!! JSR2 + DUP2 ;emit-short JSR2 + LIT ': emit! space + LDAk #01 LTH ,&uu JCN + LDAk #09 GTH ,&uu JCN + LDAk #00 SWP ;rx-node-sizes ADD2 + LDA ;emit-n JSR2 ,&loop JMP + &uu + ;unknown-node-type ;error!! JSR2