From f1fec3c767e65f4622b587177cbeb4669246e188 Mon Sep 17 00:00:00 2001 From: d6 Date: Wed, 2 Feb 2022 17:39:08 -0500 Subject: [PATCH] fix regex bugs --- regex.tal | 43 +++++++++++++++----- repl-regex.tal | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ uxnrun | 17 +++++--- 3 files changed, 154 insertions(+), 14 deletions(-) create mode 100644 repl-regex.tal diff --git a/regex.tal b/regex.tal index 80e15c1..c11f4c2 100644 --- a/regex.tal +++ b/regex.tal @@ -80,7 +80,7 @@ LDAk #03 EQU ;do-literal JCN2 LDAk #04 EQU ;do-or JCN2 LDAk #05 EQU ;do-or JCN2 ( same code as the or case ) - ;unknown-node-type ;error! JSR2 + LDAk ;emit-byte JSR2 ;unknown-node-type ;error! JSR2 ( used when we hit a dead-end during matching. ) ( ) @@ -341,6 +341,10 @@ ;set-next JSR2 ( [r] ) STH2r JMP2r +( if r is 0000, allocate an empty node ) +@alloc-if-null ( r* -> r2* ) + ORAk ,&return JCN POP2 ;alloc-empty JSR2 &return JMP2r + ( unroll one region of the parsing stack, returning ) ( a single node consisting of an alternation of ) ( all elements on the stack. ) @@ -350,7 +354,7 @@ @unroll-stack ( -> start* end* ) ;pop4 JSR2 STH2 ( r ) #00 STH ( count items in stack frame ) - DUP2 #0000 NEQ2 ,&loop JCN ;alloc-empty JSR2 + ;alloc-if-null JSR2 ( replace 0000 with empty ) &loop ( r* ) ;pop4 JSR2 POP2 ( r x ) DUP2 #ffff EQU2 ( r x x-is-end? ) ,&done JCN @@ -364,7 +368,7 @@ &is-or POP2r ;alloc-empty JSR2 OVR2 OVR2 SWP2 ( r empty empty r ) - ;set-next JSR2 + ;set-next-or JSR2 JMP2r ( add r to the top of the stock. ) @@ -393,13 +397,25 @@ @set-next ( target* regex* -> ) LDAk #01 NEQ ,&!1 JCN INC2 ;set-next-addr JSR2 JMP2r &!1 LDAk #02 NEQ ,&!2 JCN INC2 ;set-next-addr JSR2 JMP2r - &!2 LDAk #03 NEQ ,&!3 JCN #0002 ADD2 ;set-next-addr JSR2 JMP2r - &!3 LDAk #04 NEQ ,&!4 JCN - ( todo: this is probably broken ) - OVR2 OVR2 INC2 ;set-next-addr JSR2 - #0003 ADD2 ;set-next-addr JSR2 JMP2r + &!2 LDAk #03 NEQ ,&!3 JCN #0002 ADD2 ;set-next-addr JSR2 JMP2r + &!3 LDAk #04 NEQ ,&!4 JCN INC2 ;set-next-addr JSR2 JMP2r &!4 LDAk #05 NEQ ,&!5 JCN #0003 ADD2 ;set-next-addr JSR2 JMP2r - &!5 ;unknown-node-type ;error! JSR2 + &!5 LDAk ;emit-byte JSR2 ;unknown-node-type ;error! JSR2 + +@set-next-or-addr ( target* addr* -> ) + LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN + LDA2 ;set-next-or JSR2 JMP2r + &is-zero STA2 JMP2r + +( this is used when first building or-nodes ) +( structure will always be: ) +( [x1, [x2, [x3, ..., [xm, xn]]]] ) +( so we recurse on the right side but not the left. ) +@set-next-or ( target* regex* -> ) + LDAk #04 NEQ ,&!4 JCN + OVR2 OVR2 INC2 ;set-next-addr JSR2 + #0003 ADD2 ;set-next-or-addr JSR2 JMP2r + &!4 ;set-next JMP2 ( STACK OPERATIONS ) ( ) @@ -431,6 +447,15 @@ STH2r STH2r ( restore str and regex ) JMP2r +( -> size^ ) +@frame-size + #00 STH ;stack-pos LDA2 + &loop + #0004 SUB2 LDA2k #ffff EQU2 ,&done JCN + INCr ,&loop JMP + &done + STHr JMP2r + ( reset stack pointers ) @reset-stack ( -> ) ;stack-bot ;stack-pos STA2 JMP2r ( pos <- 0 ) diff --git a/repl-regex.tal b/repl-regex.tal new file mode 100644 index 0000000..7a7434d --- /dev/null +++ b/repl-regex.tal @@ -0,0 +1,108 @@ +( repl-regex.tal ) + +%dbg { #ff #0e DEO } +%sp { #20 #18 DEO } +%nl { #0a #18 DEO } +%exit { #01 #0f DEO BRK } + +( read in regular expressions ) +( and emit internal structures parsed ) +|0100 + ;r-prompt ;println JSR2 + ;r-read-stdin #10 DEO2 BRK + +( we use two different prompts depending on what mode we're in ) +@r-prompt "enter 20 "regex: 20 00 +@s-prompt "string 20 "to 20 "match: 20 00 + + +@regex $2 ( compiled regex address (if any) ) +@buffer $1000 ( buffer to read user input ) +@ptr :buffer ( next byte to write in buffer ) + +@println ( s* -> ) + &loop LDAk #00 EQU ,&eof JCN + LDAk #18 DEO INC2 ,&loop JMP + &eof POP2 JMP2r + +@r-read-stdin ( -> ) + #12 DEI #0a EQU ,&execute JCN + #12 DEI ;ptr LDA2 STA + ;ptr LDA2k INC2 SWP2 STA2 + BRK + &execute + #00 ;ptr LDA2 STA + ;buffer ;ptr STA2 + ;buffer ;compile JSR2 dbg nl + DUP2 ;regex STA2 + ;emit-stack JSR2 nl + ;emit-arena JSR2 nl + ;reset-arena JSR2 + POP2 + ;s-prompt ;println JSR2 + ;s-read-stdin #10 DEO2 BRK + BRK + +@s-read-stdin ( -> ) + #12 DEI #0a EQU ,&execute JCN + #12 DEI ;ptr LDA2 STA + ;ptr LDA2k INC2 SWP2 STA2 + BRK + &execute + #00 ;ptr LDA2 STA ( null terminate string ) + ;ptr LDA2 ;buffer EQU2 STH ( stash is-empty? ) + ;buffer ;ptr STA2 ( reset ptr ) + ;buffer ;regex LDA2 ;match JSR2 ( match regex ) + ;emit-byte JSR2 nl ( print result ) + STHr ,&was-empty JCN + ;s-prompt ;println JSR2 + BRK + &was-empty + ;r-prompt ;println JSR2 + ;r-read-stdin #10 DEO2 BRK + BRK + +~regex.tal + +@emit-short ( short* -- ) + SWP ;emit-byte JSR2 ;emit-byte JSR2 JMP2r + +@emit-byte ( byte^ -- ) + DUP #04 SFT ,&hex JSR #0f AND ,&hex JMP + &hex #30 ADD DUP #39 GTH #27 MUL ADD emit + JMP2r + +( print stack size, followed by contents ) +@emit-stack ( -> ) + space LIT 'n emit LIT '= emit ;stack-pos LDA2 ;stack-bot SUB2 #0004 DIV2 ;emit-short JSR2 LIT ': emit + ;stack-bot + &loop + DUP2 ;stack-pos LDA2 LTH2 ,&ok JCN + POP2 newline JMP2r + &ok + space LDA2k ;emit-short JSR2 + #0002 ADD2 ,&loop JMP + +( emit n bytes from the given address ) +@emit-n ( addr* count^ -> addr2* ) + DUP #00 GTH ( addr count count>0? ) ,&ok JCN ( addr count ) POP newline JMP2r + &ok + STH ( addr [count] ) space LDAk ;emit-byte JSR2 INC2 ( addr+1 [count] ) + STHr #01 SUB ( addr+1 count-1 ) + ;emit-n JMP2 + +( emit the arena, with one line per node ) +( parses node type, since node size is dynamic (3-5). ) +@emit-arena ( -> ) + ;arena-bot + &loop + DUP2 ;arena-pos LDA2 LTH2 ,&ok JCN POP2 JMP2r + &ok + DUP2 ;emit-short JSR2 + LIT ': emit space + LDAk #01 NEQ ,&!1 JCN #03 ;emit-n JSR2 ,&loop JMP + &!1 LDAk #02 NEQ ,&!2 JCN #03 ;emit-n JSR2 ,&loop JMP + &!2 LDAk #03 NEQ ,&!3 JCN #04 ;emit-n JSR2 ,&loop JMP + &!3 LDAk #04 NEQ ,&!4 JCN #05 ;emit-n JSR2 ,&loop JMP + &!4 LDAk #05 NEQ ,&!5 JCN #05 ;emit-n JSR2 ,&loop JMP + &!5 ;unknown-node-type ;error! JSR2 diff --git a/uxnrun b/uxnrun index 78a07a5..ae02bf8 100755 --- a/uxnrun +++ b/uxnrun @@ -7,10 +7,17 @@ if [ $# -lt 1 ]; then exit 1 fi -DEST=$( echo "$1" | sed -re 's#\.tal$#.rom#' ) - +RUN='uxnemu' case "$1" in - -c) $BIN/uxnasm $2 $DEST && $BIN/uxncli $DEST;; - -s) $BIN/uxnasm $3 $DEST && $BIN/uxnemu -s $2 $DEST;; - *) $BIN/uxnasm $1 $DEST && $BIN/uxnemu $DEST;; + -c) RUN='uxncli'; shift;; + -s) RUN="uxnemu -s $2"; shift; shift;; esac + +DEST=$( echo "$1" | sed -re 's#\.tal$#.rom#' ) +$BIN/uxnasm $1 $DEST && $BIN/$RUN $DEST + +#case "$1" in +# -c) $BIN/uxnasm $2 $DEST && $BIN/uxncli $DEST;; +# -s) $BIN/uxnasm $3 $DEST && $BIN/uxnemu -s $2 $DEST;; +# *) $BIN/uxnasm $1 $DEST && $BIN/uxnemu $DEST;; +#esac