fix regex bugs
This commit is contained in:
parent
4c106ea33c
commit
f1fec3c767
41
regex.tal
41
regex.tal
|
@ -80,7 +80,7 @@
|
||||||
LDAk #03 EQU ;do-literal JCN2
|
LDAk #03 EQU ;do-literal JCN2
|
||||||
LDAk #04 EQU ;do-or JCN2
|
LDAk #04 EQU ;do-or JCN2
|
||||||
LDAk #05 EQU ;do-or JCN2 ( same code as the or case )
|
LDAk #05 EQU ;do-or JCN2 ( same code as the or case )
|
||||||
;unknown-node-type ;error! JSR2
|
LDAk ;emit-byte JSR2 ;unknown-node-type ;error! JSR2
|
||||||
|
|
||||||
( used when we hit a dead-end during matching. )
|
( used when we hit a dead-end during matching. )
|
||||||
( )
|
( )
|
||||||
|
@ -341,6 +341,10 @@
|
||||||
;set-next JSR2 ( [r] )
|
;set-next JSR2 ( [r] )
|
||||||
STH2r JMP2r
|
STH2r JMP2r
|
||||||
|
|
||||||
|
( if r is 0000, allocate an empty node )
|
||||||
|
@alloc-if-null ( r* -> r2* )
|
||||||
|
ORAk ,&return JCN POP2 ;alloc-empty JSR2 &return JMP2r
|
||||||
|
|
||||||
( unroll one region of the parsing stack, returning )
|
( unroll one region of the parsing stack, returning )
|
||||||
( a single node consisting of an alternation of )
|
( a single node consisting of an alternation of )
|
||||||
( all elements on the stack. )
|
( all elements on the stack. )
|
||||||
|
@ -350,7 +354,7 @@
|
||||||
@unroll-stack ( -> start* end* )
|
@unroll-stack ( -> start* end* )
|
||||||
;pop4 JSR2 STH2 ( r )
|
;pop4 JSR2 STH2 ( r )
|
||||||
#00 STH ( count items in stack frame )
|
#00 STH ( count items in stack frame )
|
||||||
DUP2 #0000 NEQ2 ,&loop JCN ;alloc-empty JSR2
|
;alloc-if-null JSR2 ( replace 0000 with empty )
|
||||||
&loop ( r* )
|
&loop ( r* )
|
||||||
;pop4 JSR2 POP2 ( r x )
|
;pop4 JSR2 POP2 ( r x )
|
||||||
DUP2 #ffff EQU2 ( r x x-is-end? ) ,&done JCN
|
DUP2 #ffff EQU2 ( r x x-is-end? ) ,&done JCN
|
||||||
|
@ -364,7 +368,7 @@
|
||||||
&is-or
|
&is-or
|
||||||
POP2r
|
POP2r
|
||||||
;alloc-empty JSR2 OVR2 OVR2 SWP2 ( r empty empty r )
|
;alloc-empty JSR2 OVR2 OVR2 SWP2 ( r empty empty r )
|
||||||
;set-next JSR2
|
;set-next-or JSR2
|
||||||
JMP2r
|
JMP2r
|
||||||
|
|
||||||
( add r to the top of the stock. )
|
( add r to the top of the stock. )
|
||||||
|
@ -394,12 +398,24 @@
|
||||||
LDAk #01 NEQ ,&!1 JCN INC2 ;set-next-addr JSR2 JMP2r
|
LDAk #01 NEQ ,&!1 JCN INC2 ;set-next-addr JSR2 JMP2r
|
||||||
&!1 LDAk #02 NEQ ,&!2 JCN INC2 ;set-next-addr JSR2 JMP2r
|
&!1 LDAk #02 NEQ ,&!2 JCN INC2 ;set-next-addr JSR2 JMP2r
|
||||||
&!2 LDAk #03 NEQ ,&!3 JCN #0002 ADD2 ;set-next-addr JSR2 JMP2r
|
&!2 LDAk #03 NEQ ,&!3 JCN #0002 ADD2 ;set-next-addr JSR2 JMP2r
|
||||||
&!3 LDAk #04 NEQ ,&!4 JCN
|
&!3 LDAk #04 NEQ ,&!4 JCN INC2 ;set-next-addr JSR2 JMP2r
|
||||||
( todo: this is probably broken )
|
|
||||||
OVR2 OVR2 INC2 ;set-next-addr JSR2
|
|
||||||
#0003 ADD2 ;set-next-addr JSR2 JMP2r
|
|
||||||
&!4 LDAk #05 NEQ ,&!5 JCN #0003 ADD2 ;set-next-addr JSR2 JMP2r
|
&!4 LDAk #05 NEQ ,&!5 JCN #0003 ADD2 ;set-next-addr JSR2 JMP2r
|
||||||
&!5 ;unknown-node-type ;error! JSR2
|
&!5 LDAk ;emit-byte JSR2 ;unknown-node-type ;error! JSR2
|
||||||
|
|
||||||
|
@set-next-or-addr ( target* addr* -> )
|
||||||
|
LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN
|
||||||
|
LDA2 ;set-next-or JSR2 JMP2r
|
||||||
|
&is-zero STA2 JMP2r
|
||||||
|
|
||||||
|
( this is used when first building or-nodes )
|
||||||
|
( structure will always be: )
|
||||||
|
( [x1, [x2, [x3, ..., [xm, xn]]]] )
|
||||||
|
( so we recurse on the right side but not the left. )
|
||||||
|
@set-next-or ( target* regex* -> )
|
||||||
|
LDAk #04 NEQ ,&!4 JCN
|
||||||
|
OVR2 OVR2 INC2 ;set-next-addr JSR2
|
||||||
|
#0003 ADD2 ;set-next-or-addr JSR2 JMP2r
|
||||||
|
&!4 ;set-next JMP2
|
||||||
|
|
||||||
( STACK OPERATIONS )
|
( STACK OPERATIONS )
|
||||||
( )
|
( )
|
||||||
|
@ -431,6 +447,15 @@
|
||||||
STH2r STH2r ( restore str and regex )
|
STH2r STH2r ( restore str and regex )
|
||||||
JMP2r
|
JMP2r
|
||||||
|
|
||||||
|
( -> size^ )
|
||||||
|
@frame-size
|
||||||
|
#00 STH ;stack-pos LDA2
|
||||||
|
&loop
|
||||||
|
#0004 SUB2 LDA2k #ffff EQU2 ,&done JCN
|
||||||
|
INCr ,&loop JMP
|
||||||
|
&done
|
||||||
|
STHr JMP2r
|
||||||
|
|
||||||
( reset stack pointers )
|
( reset stack pointers )
|
||||||
@reset-stack ( -> )
|
@reset-stack ( -> )
|
||||||
;stack-bot ;stack-pos STA2 JMP2r ( pos <- 0 )
|
;stack-bot ;stack-pos STA2 JMP2r ( pos <- 0 )
|
||||||
|
|
|
@ -0,0 +1,108 @@
|
||||||
|
( repl-regex.tal )
|
||||||
|
|
||||||
|
%dbg { #ff #0e DEO }
|
||||||
|
%sp { #20 #18 DEO }
|
||||||
|
%nl { #0a #18 DEO }
|
||||||
|
%exit { #01 #0f DEO BRK }
|
||||||
|
|
||||||
|
( read in regular expressions )
|
||||||
|
( and emit internal structures parsed )
|
||||||
|
|0100
|
||||||
|
;r-prompt ;println JSR2
|
||||||
|
;r-read-stdin #10 DEO2 BRK
|
||||||
|
|
||||||
|
( we use two different prompts depending on what mode we're in )
|
||||||
|
@r-prompt "enter 20 "regex: 20 00
|
||||||
|
@s-prompt "string 20 "to 20 "match: 20 00
|
||||||
|
|
||||||
|
|
||||||
|
@regex $2 ( compiled regex address (if any) )
|
||||||
|
@buffer $1000 ( buffer to read user input )
|
||||||
|
@ptr :buffer ( next byte to write in buffer )
|
||||||
|
|
||||||
|
@println ( s* -> )
|
||||||
|
&loop LDAk #00 EQU ,&eof JCN
|
||||||
|
LDAk #18 DEO INC2 ,&loop JMP
|
||||||
|
&eof POP2 JMP2r
|
||||||
|
|
||||||
|
@r-read-stdin ( -> )
|
||||||
|
#12 DEI #0a EQU ,&execute JCN
|
||||||
|
#12 DEI ;ptr LDA2 STA
|
||||||
|
;ptr LDA2k INC2 SWP2 STA2
|
||||||
|
BRK
|
||||||
|
&execute
|
||||||
|
#00 ;ptr LDA2 STA
|
||||||
|
;buffer ;ptr STA2
|
||||||
|
;buffer ;compile JSR2 dbg nl
|
||||||
|
DUP2 ;regex STA2
|
||||||
|
;emit-stack JSR2 nl
|
||||||
|
;emit-arena JSR2 nl
|
||||||
|
;reset-arena JSR2
|
||||||
|
POP2
|
||||||
|
;s-prompt ;println JSR2
|
||||||
|
;s-read-stdin #10 DEO2 BRK
|
||||||
|
BRK
|
||||||
|
|
||||||
|
@s-read-stdin ( -> )
|
||||||
|
#12 DEI #0a EQU ,&execute JCN
|
||||||
|
#12 DEI ;ptr LDA2 STA
|
||||||
|
;ptr LDA2k INC2 SWP2 STA2
|
||||||
|
BRK
|
||||||
|
&execute
|
||||||
|
#00 ;ptr LDA2 STA ( null terminate string )
|
||||||
|
;ptr LDA2 ;buffer EQU2 STH ( stash is-empty? )
|
||||||
|
;buffer ;ptr STA2 ( reset ptr )
|
||||||
|
;buffer ;regex LDA2 ;match JSR2 ( match regex )
|
||||||
|
;emit-byte JSR2 nl ( print result )
|
||||||
|
STHr ,&was-empty JCN
|
||||||
|
;s-prompt ;println JSR2
|
||||||
|
BRK
|
||||||
|
&was-empty
|
||||||
|
;r-prompt ;println JSR2
|
||||||
|
;r-read-stdin #10 DEO2 BRK
|
||||||
|
BRK
|
||||||
|
|
||||||
|
~regex.tal
|
||||||
|
|
||||||
|
@emit-short ( short* -- )
|
||||||
|
SWP ;emit-byte JSR2 ;emit-byte JSR2 JMP2r
|
||||||
|
|
||||||
|
@emit-byte ( byte^ -- )
|
||||||
|
DUP #04 SFT ,&hex JSR #0f AND ,&hex JMP
|
||||||
|
&hex #30 ADD DUP #39 GTH #27 MUL ADD emit
|
||||||
|
JMP2r
|
||||||
|
|
||||||
|
( print stack size, followed by contents )
|
||||||
|
@emit-stack ( -> )
|
||||||
|
space LIT 'n emit LIT '= emit ;stack-pos LDA2 ;stack-bot SUB2 #0004 DIV2 ;emit-short JSR2 LIT ': emit
|
||||||
|
;stack-bot
|
||||||
|
&loop
|
||||||
|
DUP2 ;stack-pos LDA2 LTH2 ,&ok JCN
|
||||||
|
POP2 newline JMP2r
|
||||||
|
&ok
|
||||||
|
space LDA2k ;emit-short JSR2
|
||||||
|
#0002 ADD2 ,&loop JMP
|
||||||
|
|
||||||
|
( emit n bytes from the given address )
|
||||||
|
@emit-n ( addr* count^ -> addr2* )
|
||||||
|
DUP #00 GTH ( addr count count>0? ) ,&ok JCN ( addr count ) POP newline JMP2r
|
||||||
|
&ok
|
||||||
|
STH ( addr [count] ) space LDAk ;emit-byte JSR2 INC2 ( addr+1 [count] )
|
||||||
|
STHr #01 SUB ( addr+1 count-1 )
|
||||||
|
;emit-n JMP2
|
||||||
|
|
||||||
|
( emit the arena, with one line per node )
|
||||||
|
( parses node type, since node size is dynamic (3-5). )
|
||||||
|
@emit-arena ( -> )
|
||||||
|
;arena-bot
|
||||||
|
&loop
|
||||||
|
DUP2 ;arena-pos LDA2 LTH2 ,&ok JCN POP2 JMP2r
|
||||||
|
&ok
|
||||||
|
DUP2 ;emit-short JSR2
|
||||||
|
LIT ': emit space
|
||||||
|
LDAk #01 NEQ ,&!1 JCN #03 ;emit-n JSR2 ,&loop JMP
|
||||||
|
&!1 LDAk #02 NEQ ,&!2 JCN #03 ;emit-n JSR2 ,&loop JMP
|
||||||
|
&!2 LDAk #03 NEQ ,&!3 JCN #04 ;emit-n JSR2 ,&loop JMP
|
||||||
|
&!3 LDAk #04 NEQ ,&!4 JCN #05 ;emit-n JSR2 ,&loop JMP
|
||||||
|
&!4 LDAk #05 NEQ ,&!5 JCN #05 ;emit-n JSR2 ,&loop JMP
|
||||||
|
&!5 ;unknown-node-type ;error! JSR2
|
17
uxnrun
17
uxnrun
|
@ -7,10 +7,17 @@ if [ $# -lt 1 ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
DEST=$( echo "$1" | sed -re 's#\.tal$#.rom#' )
|
RUN='uxnemu'
|
||||||
|
|
||||||
case "$1" in
|
case "$1" in
|
||||||
-c) $BIN/uxnasm $2 $DEST && $BIN/uxncli $DEST;;
|
-c) RUN='uxncli'; shift;;
|
||||||
-s) $BIN/uxnasm $3 $DEST && $BIN/uxnemu -s $2 $DEST;;
|
-s) RUN="uxnemu -s $2"; shift; shift;;
|
||||||
*) $BIN/uxnasm $1 $DEST && $BIN/uxnemu $DEST;;
|
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
DEST=$( echo "$1" | sed -re 's#\.tal$#.rom#' )
|
||||||
|
$BIN/uxnasm $1 $DEST && $BIN/$RUN $DEST
|
||||||
|
|
||||||
|
#case "$1" in
|
||||||
|
# -c) $BIN/uxnasm $2 $DEST && $BIN/uxncli $DEST;;
|
||||||
|
# -s) $BIN/uxnasm $3 $DEST && $BIN/uxnemu -s $2 $DEST;;
|
||||||
|
# *) $BIN/uxnasm $1 $DEST && $BIN/uxnemu $DEST;;
|
||||||
|
#esac
|
||||||
|
|
Loading…
Reference in New Issue