modernize immediate jumps

This commit is contained in:
~d6 2023-11-28 12:08:42 -05:00
parent 29186973c8
commit 06332929f6
2 changed files with 686 additions and 684 deletions

968
femto.tal

File diff suppressed because it is too large Load Diff

402
regex.tal
View File

@ -97,10 +97,10 @@
( using error! will print the given message before causing ) ( using error! will print the given message before causing )
( the interpreter to halt. ) ( the interpreter to halt. )
@error!! ( msg* -> ) @errorm ( msg* -> )
LIT "! emit! space LIT "! emit! space
&loop LDAk #00 EQU ,&done JCN &loop LDAk #00 EQU ?&done
LDAk emit! INC2 ,&loop JMP LDAk emit! INC2 !&loop
&done POP2 newline #ff0e DEO #010f DEO BRK &done POP2 newline #ff0e DEO #010f DEO BRK
( error messages ) ( error messages )
@ -126,34 +126,34 @@
@rx-match ( str* regex* -> bool^ ) @rx-match ( str* regex* -> bool^ )
#01 ;match-multiline STA #01 ;match-multiline STA
#00 ;search-mode STA #00 ;search-mode STA
;rx-reset JSR2 rx-reset
;loop JMP2 !loop
@rx-search-multiline ( str* regex* -> bool^ ) @rx-search-multiline ( str* regex* -> bool^ )
#01 ;match-multiline STA #01 ;match-multiline STA
#01 ;search-mode STA #01 ;search-mode STA
,rx-search/main JMP !rx-search/main
@rx-search ( str* regex* -> bool^ ) @rx-search ( str* regex* -> bool^ )
#00 ;match-multiline STA #00 ;match-multiline STA
#01 ;search-mode STA #01 ;search-mode STA
&main STH2 ( s* [r*] ) &main STH2 ( s* [r*] )
DUP2 ;string-start STA2 ( s* [r*] ) DUP2 ;string-start STA2 ( s* [r*] )
&loop LDAk #00 EQU ,&eof JCN ( s* [r*] ) &loop LDAk #00 EQU ?&eof ( s* [r*] )
;rx-reset JSR2 ( s* [r*] ) rx-reset ( s* [r*] )
DUP2 ;search-start STA2 ( s* [r*] ) DUP2 ;search-start STA2 ( s* [r*] )
DUP2 STH2kr ;loop JSR2 ( s* b^ [r*] ) DUP2 STH2kr loop ( s* b^ [r*] )
,&found JCN ( s* [r*] ) ?&found ( s* [r*] )
INC2 ,&loop JMP ( s+1* [r*] ) INC2 !&loop ( s+1* [r*] )
&found POP2 POP2r #01 JMP2r ( 01 ) &found POP2 POP2r #01 JMP2r ( 01 )
&eof ;rx-reset JSR2 ( s* [r*] ) &eof rx-reset ( s* [r*] )
DUP2 ;search-start STA2 ( s* [r*] ) DUP2 ;search-start STA2 ( s* [r*] )
STH2r ;loop JMP2 ( b^ ) STH2r !loop ( b^ )
( reset all "runtime" memory allocated during match/search ) ( reset all "runtime" memory allocated during match/search )
@rx-reset ( -> ) @rx-reset ( -> )
;reset-stack JSR2 reset-stack
;subgroup-reset JMP2 !subgroup-reset
( loop used during matching ) ( loop used during matching )
( ) ( )
@ -163,87 +163,87 @@
( return a boolean, which is where the stack ) ( return a boolean, which is where the stack )
( effects signature comes from. ) ( effects signature comes from. )
@loop ( s* r* -> bool^ ) @loop ( s* r* -> bool^ )
LDAk #01 EQU ;do-empty JCN2 LDAk #01 EQU ?do-empty
LDAk #02 EQU ;do-dot JCN2 LDAk #02 EQU ?do-dot
LDAk #03 EQU ;do-literal JCN2 LDAk #03 EQU ?do-literal
LDAk #04 EQU ;do-or JCN2 LDAk #04 EQU ?do-or
LDAk #05 EQU ;do-or JCN2 ( same code as the or case ) LDAk #05 EQU ?do-or ( same code as the or case )
LDAk #06 EQU ;do-caret JCN2 LDAk #06 EQU ?do-caret
LDAk #07 EQU ;do-dollar JCN2 LDAk #07 EQU ?do-dollar
LDAk #08 EQU ;do-lpar JCN2 LDAk #08 EQU ?do-lpar
LDAk #09 EQU ;do-rpar JCN2 LDAk #09 EQU ?do-rpar
LDAk #0a EQU ;do-ccls JCN2 LDAk #0a EQU ?do-ccls
LDAk #0b EQU ;do-ncls JCN2 LDAk #0b EQU ?do-ncls
LDAk #dd ;unknown-node-type ;error!! JSR2 LDAk #dd ;unknown-node-type errorm
( used when we hit a dead-end during matching. ) ( used when we hit a dead-end during matching. )
( ) ( )
( if stack is non-empty we have a point we can resume from. ) ( if stack is non-empty we have a point we can resume from. )
@goto-backtrack ( -> bool^ ) @goto-backtrack ( -> bool^ )
;stack-exist JSR2 ,&has-stack JCN ( do we have stack? ) stack-exist ?&has-stack ( do we have stack? )
#00 JMP2r ( no, return false ) #00 JMP2r ( no, return false )
&has-stack &has-stack
;pop4 JSR2 pop4
;subgroup-backtrack JSR2 subgroup-backtrack
;goto-next JMP2 ( yes, resume from the top ) !goto-next ( yes, resume from the top )
( follow the given address (next*) to continue matching ) ( follow the given address (next*) to continue matching )
@goto-next ( str* next* -> bool^ ) @goto-next ( str* next* -> bool^ )
DUP2 #0000 GTH2 ,&has-next JCN DUP2 #0000 GTH2 ?&has-next
POP2 LDAk #00 EQU ,&end-of-string JCN POP2 LDAk #00 EQU ?&end-of-string
;search-mode LDA ,&end-of-search JCN ;search-mode LDA ?&end-of-search
POP2 ;goto-backtrack JMP2 POP2 !goto-backtrack
&end-of-search DUP2 ;search-end STA2 &end-of-search DUP2 ;search-end STA2
&end-of-string POP2 #01 JMP2r &end-of-string POP2 #01 JMP2r
&has-next ;loop JMP2 &has-next !loop
( handle the empty node -- just follow the next pointer ) ( handle the empty node -- just follow the next pointer )
@do-empty ( str* regex* -> bool^ ) @do-empty ( str* regex* -> bool^ )
INC2 LDA2 ( load next ) INC2 LDA2 ( load next )
;goto-next JMP2 ( jump to next ) !goto-next ( jump to next )
( FIXME: not currently used ) ( FIXME: not currently used )
@do-lpar ( str* regex* -> bool^ ) @do-lpar ( str* regex* -> bool^ )
STH2 DUP2 ( s s [r] ) STH2 DUP2 ( s s [r] )
INC2r LDA2kr STH2r ( s s i [r+1] ) INC2r LDA2kr STH2r ( s s i [r+1] )
;subgroup-start JSR2 ( s [r+1] ) subgroup-start ( s [r+1] )
STH2r INC2 INC2 ( s r+3 ) STH2r INC2 INC2 ( s r+3 )
LDA2 ;goto-next JMP2 ( jump to next ) LDA2 !goto-next ( jump to next )
( FIXME: not currently used ) ( FIXME: not currently used )
@do-rpar ( str* regex* -> bool^ ) @do-rpar ( str* regex* -> bool^ )
STH2 DUP2 ( s s [r] ) STH2 DUP2 ( s s [r] )
INC2r LDA2kr STH2r ( s s i [r+1] ) INC2r LDA2kr STH2r ( s s i [r+1] )
;subgroup-finish JSR2 ( s [r+1] ) subgroup-finish ( s [r+1] )
STH2r INC2 INC2 ( s r+3 ) STH2r INC2 INC2 ( s r+3 )
LDA2 ;goto-next JMP2 ( jump to next ) LDA2 !goto-next ( jump to next )
( handle dot -- match any one character ) ( handle dot -- match any one character )
@do-dot ( str* regex* -> bool^ ) @do-dot ( str* regex* -> bool^ )
INC2 LDA2 STH2 ( load and stash next ) INC2 LDA2 STH2 ( load and stash next )
LDAk #00 NEQ ,&non-empty JCN ( is there a char? ) LDAk #00 NEQ ?&non-empty ( is there a char? )
&backtrack POP2r POP2 ;goto-backtrack JMP2 ( no, clear stacks and backtrack ) &backtrack POP2r POP2 !goto-backtrack ( no, clear stacks and backtrack )
&non-empty LDAk #0a NEQ ,&match JCN ( yes, match unless \n in search-mode ) &non-empty LDAk #0a NEQ ?&match ( yes, match unless \n in search-mode )
;search-mode LDA ,&backtrack JCN ( if \n and search-mode, treat as EOF ) ;search-mode LDA ?&backtrack ( if \n and search-mode, treat as EOF )
&match INC2 STH2r ;goto-next JMP2 ( on match: inc s, restore and jump ) &match INC2 STH2r !goto-next ( on match: inc s, restore and jump )
( hande caret -- match string start (or possibly after newline) without advancing ) ( hande caret -- match string start (or possibly after newline) without advancing )
@do-caret ( str* regex* -> bool^ ) @do-caret ( str* regex* -> bool^ )
INC2 LDA2 STH2 ( load and stash next ) INC2 LDA2 STH2 ( load and stash next )
DUP2 ;string-start LDA2 EQU2 ,&at-start JCN ( at string start? ) DUP2 ;string-start LDA2 EQU2 ?&at-start ( at string start? )
;match-multiline LDA ,&no-match JCN ( are we in multi-line mode? ) ;match-multiline LDA ?&no-match ( are we in multi-line mode? )
DUP2 #0001 SUB2 LDA #0a EQU ,&at-start JCN ( just after newline? ) DUP2 #0001 SUB2 LDA #0a EQU ?&at-start ( just after newline? )
&no-match POP2r POP2 ;goto-backtrack JMP2 ( clear stacks and backtrack ) &no-match POP2r POP2 !goto-backtrack ( clear stacks and backtrack )
&at-start STH2r ;goto-next JMP2 ( go to next without advancing ) &at-start STH2r !goto-next ( go to next without advancing )
( hande dollar -- match string end (or possibly before newline) without advancing ) ( hande dollar -- match string end (or possibly before newline) without advancing )
@do-dollar ( str* regex* -> bool^ ) @do-dollar ( str* regex* -> bool^ )
INC2 LDA2 STH2 ( load and stash next ) INC2 LDA2 STH2 ( load and stash next )
LDAk #00 EQU ,&at-end JCN ( at string end? ) LDAk #00 EQU ?&at-end ( at string end? )
;match-multiline LDA ,&no-match JCN ( are we in multi-line mode? ) ;match-multiline LDA ?&no-match ( are we in multi-line mode? )
LDAk #0a EQU ,&at-end JCN ( at newline? ) LDAk #0a EQU ?&at-end ( at newline? )
&no-match POP2r POP2 ;goto-backtrack JMP2 ( clear stacks and backtrack ) &no-match POP2r POP2 !goto-backtrack ( clear stacks and backtrack )
&at-end STH2r ;goto-next JMP2 ( go to next without advancing ) &at-end STH2r !goto-next ( go to next without advancing )
( handle literal -- match one specific character ) ( handle literal -- match one specific character )
@do-literal ( str* regex* -> bool^ ) @do-literal ( str* regex* -> bool^ )
@ -251,23 +251,23 @@
LDAk STH ( store c ) LDAk STH ( store c )
INC2 LDA2 STH2 ROTr ( store next, move c to top ) INC2 LDA2 STH2 ROTr ( store next, move c to top )
LDAk LDAk
STHr EQU ,&matches JCN ( do we match this char? ) STHr EQU ?&matches ( do we match this char? )
POP2r POP2 ;goto-backtrack JMP2 ( no, clear stacks and backtrack ) POP2r POP2 !goto-backtrack ( no, clear stacks and backtrack )
&matches &matches
INC2 STH2r ;goto-next JMP2 ( yes, inc s, restore and jump ) INC2 STH2r !goto-next ( yes, inc s, restore and jump )
( handle or -- try the left branch but backtrack to the right if needed ) ( handle or -- try the left branch but backtrack to the right if needed )
( ) ( )
( this also handles asteration, since it ends up having the same structure ) ( this also handles asteration, since it ends up having the same structure )
@do-or ( str* regex* -> bool^ ) @do-or ( str* regex* -> bool^ )
INC2 OVR2 OVR2 #0002 ADD2 ( s r+1 s r+3 ) INC2 OVR2 OVR2 #0002 ADD2 ( s r+1 s r+3 )
LDA2 ;push4 JSR2 ( save (s, right) in the stack for possible backtracking ) LDA2 push4 ( save (s, right) in the stack for possible backtracking )
LDA2 ;loop JMP2 ( continue on left branch ) LDA2 !loop ( continue on left branch )
@matches-cls ( str* regex* -> bool^ ) @matches-cls ( str* regex* -> bool^ )
OVR2 LDA ,&not-null JCN OVR2 LDA ?&not-null
( needs to have a character to match ) ( needs to have a character to match )
POP2 POP2 ;goto-backtrack JMP2 POP2 POP2 !goto-backtrack
&not-null &not-null
DUP2 INC2 LDA2 STH2 ( str regex [next] ) DUP2 INC2 LDA2 STH2 ( str regex [next] )
OVR2 INC2 STH2 ( str regex [str+1 next] ) OVR2 INC2 STH2 ( str regex [str+1 next] )
@ -275,24 +275,24 @@
#0003 ADD2 LDAk #00 SWP #0002 MUL2 ( r+3 len*2 [c str+1 next] ) #0003 ADD2 LDAk #00 SWP #0002 MUL2 ( r+3 len*2 [c str+1 next] )
SWP2 INC2 STH2k ADD2 STH2r ( r+4+len*2 r+4 [c str+1 next] ) SWP2 INC2 STH2k ADD2 STH2r ( r+4+len*2 r+4 [c str+1 next] )
&loop ( limit addr [c str+1 next] ) &loop ( limit addr [c str+1 next] )
EQU2k ,&missing JCN EQU2k ?&missing
LDAk STHkr GTH ,&next1 JCN INC2 LDAk STHkr GTH ?&next1 INC2
LDAk STHkr LTH ,&next2 JCN ,&found JMP LDAk STHkr LTH ?&next2 !&found
&next1 INC2 &next1 INC2
&next2 INC2 ,&loop JMP &next2 INC2 !&loop
&missing POP2 POP2 POPr ,&negated LDR ,&match JCN &missing POP2 POP2 POPr ,&negated LDR ?&match
&no-match POP2r POP2r ;goto-backtrack JMP2 &no-match POP2r POP2r !goto-backtrack
&found POP2 POP2 POPr ,&negated LDR ,&no-match JCN &found POP2 POP2 POPr ,&negated LDR ?&no-match
&match STH2r STH2r ;goto-next JMP2 &match STH2r STH2r !goto-next
[ &negated $1 ] [ &negated $1 ]
( ) ( )
@do-ccls ( str* regex* -> bool^ ) @do-ccls ( str* regex* -> bool^ )
#00 ,matches-cls/negated STR ,matches-cls JMP #00 ,matches-cls/negated STR !matches-cls
( ) ( )
@do-ncls ( str* regex* -> bool^ ) @do-ncls ( str* regex* -> bool^ )
#01 ,matches-cls/negated STR ,matches-cls JMP #01 ,matches-cls/negated STR !matches-cls
( REGEX PARSING ) ( REGEX PARSING )
@ -325,9 +325,9 @@
@read ( -> c^ ) @read ( -> c^ )
;pos LDA2k ( pos s ) ;pos LDA2k ( pos s )
LDAk STHk #00 EQU ( pos s c=0 [c] ) LDAk STHk #00 EQU ( pos s c=0 [c] )
,&is-eof JCN ( pos s [c] ) ?&is-eof ( pos s [c] )
INC2 ( pos s+1 [c] ) INC2 ( pos s+1 [c] )
SWP2 STA2 ,&return JMP ( [c] ) SWP2 STA2 !&return ( [c] )
&is-eof POP2 POP2 &is-eof POP2 POP2
&return STHr ( c ) &return STHr ( c )
JMP2r JMP2r
@ -370,8 +370,8 @@
@compile ( expr* -> regex* ) @compile ( expr* -> regex* )
;pos STA2 ;pos STA2
#0000 ;parens STA2 #0000 ;parens STA2
;rx-reset JSR2 rx-reset
;compile-region JMP2 !compile-region
( the basic strategy here is to build a stack of non-or ) ( the basic strategy here is to build a stack of non-or )
( expressions to be joined together at the end of the ) ( expressions to be joined together at the end of the )
@ -385,24 +385,24 @@
( by #ffff #ffff. above that we start with #0000 #0000 ) ( by #ffff #ffff. above that we start with #0000 #0000 )
( to signal an empty node. ) ( to signal an empty node. )
@compile-region ( -> r2* ) @compile-region ( -> r2* )
#ffff #ffff ;push4 JSR2 ( stack delimiter ) #ffff #ffff push4 ( stack delimiter )
#0000 #0000 ;push4 JSR2 ( stack frame start ) #0000 #0000 push4 ( stack frame start )
@compile-region-loop @compile-region-loop
;read JSR2 read
DUP #00 EQU ;c-done JCN2 DUP #00 EQU ?c-done
DUP LIT "| EQU ;c-or JCN2 DUP LIT "| EQU ?c-or
DUP LIT ". EQU ;c-dot JCN2 DUP LIT ". EQU ?c-dot
DUP LIT "^ EQU ;c-caret JCN2 DUP LIT "^ EQU ?c-caret
DUP LIT "$ EQU ;c-dollar JCN2 DUP LIT "$ EQU ?c-dollar
DUP LIT "( EQU ;c-lpar JCN2 DUP LIT "( EQU ?c-lpar
DUP LIT ") EQU ;c-rpar JCN2 DUP LIT ") EQU ?c-rpar
DUP LIT "[ EQU ;c-lbrk JCN2 DUP LIT "[ EQU ?c-lbrk
DUP LIT "] EQU ;c-rbrk JCN2 DUP LIT "] EQU ?c-rbrk
DUP LIT "\ EQU ;c-esc JCN2 DUP LIT "\ EQU ?c-esc
DUP LIT "* EQU ;c-star JCN2 DUP LIT "* EQU ?c-star
DUP LIT "+ EQU ;c-plus JCN2 DUP LIT "+ EQU ?c-plus
DUP LIT "? EQU ;c-qmark JCN2 DUP LIT "? EQU ?c-qmark
;c-char JMP2 !c-char
( either finalize the given r0/r1 or else wrap it in ) ( either finalize the given r0/r1 or else wrap it in )
( a star node if a star is coming up next. ) ( a star node if a star is coming up next. )
@ -410,14 +410,14 @@
( we use this look-ahead approach rather than compiling ) ( we use this look-ahead approach rather than compiling )
( star nodes directly since the implementation is simpler. ) ( star nodes directly since the implementation is simpler. )
@c-peek-and-finalize ( r0* r1* -> r2* ) @c-peek-and-finalize ( r0* r1* -> r2* )
;peek-to-star JSR2 ( r0 r1 next-is-star? ) ,&next-is-star JCN peek-to-star ( r0 r1 next-is-star? ) ?&next-is-star
;peek-to-plus JSR2 ( r0 r1 next-is-plus? ) ,&next-is-plus JCN peek-to-plus ( r0 r1 next-is-plus? ) ?&next-is-plus
;peek-to-qmark JSR2 ( r0 r1 next-is-qmark? ) ,&next-is-qmark JCN peek-to-qmark ( r0 r1 next-is-qmark? ) ?&next-is-qmark
,&finally JMP ( r0 r1 ) !&finally ( r0 r1 )
&next-is-star ;skip JSR2 POP2 ;alloc-star JSR2 DUP2 ,&finally JMP &next-is-star skip POP2 alloc-star DUP2 !&finally
&next-is-plus ;skip JSR2 POP2 ;alloc-plus JSR2 DUP2 ,&finally JMP &next-is-plus skip POP2 alloc-plus DUP2 !&finally
&next-is-qmark ;skip JSR2 POP2 ;alloc-qmark JSR2 DUP2 ,&finally JMP &next-is-qmark skip POP2 alloc-qmark DUP2 !&finally
&finally ;push-next JSR2 ;compile-region-loop JMP2 &finally push-next !compile-region-loop
( called when we reach EOF of the input string ) ( called when we reach EOF of the input string )
( ) ( )
@ -427,9 +427,9 @@
( this is where we detect unclosed parenthesis. ) ( this is where we detect unclosed parenthesis. )
@c-done ( c^ -> r2* ) @c-done ( c^ -> r2* )
POP POP
;parens LDA2 #0000 GTH2 ,&mismatched-parens JCN ;parens LDA2 #0000 GTH2 ?&mismatched-parens
;unroll-stack JSR2 POP2 JMP2r unroll-stack POP2 JMP2r
&mismatched-parens ;mismatched-parens ;error!! JSR2 &mismatched-parens ;mismatched-parens errorm
( called when we read "|" ) ( called when we read "|" )
( ) ( )
@ -437,8 +437,8 @@
( we just start a new stack frame and continue. ) ( we just start a new stack frame and continue. )
@c-or ( c^ -> r2* ) @c-or ( c^ -> r2* )
POP POP
#0000 #0000 ;push4 JSR2 #0000 #0000 push4
;compile-region-loop JMP2 !compile-region-loop
( called when we read left parenthesis ) ( called when we read left parenthesis )
( ) ( )
@ -450,7 +450,7 @@
@c-lpar ( c^ -> r2* ) @c-lpar ( c^ -> r2* )
POP POP
;parens LDA2 INC2 ;parens STA2 ( parens++ ) ;parens LDA2 INC2 ;parens STA2 ( parens++ )
;compile-region JMP2 !compile-region
( called when we read right parenthesis ) ( called when we read right parenthesis )
( ) ( )
@ -463,34 +463,34 @@
( 5. continue parsing ) ( 5. continue parsing )
@c-rpar ( c^ -> r2* ) @c-rpar ( c^ -> r2* )
POP POP
;parens LDA2 #0000 EQU2 ,&mismatched-parens JCN ;parens LDA2 #0000 EQU2 ?&mismatched-parens
;parens LDA2 #0001 SUB2 ;parens STA2 ( parens-- ) ;parens LDA2 #0001 SUB2 ;parens STA2 ( parens-- )
;unroll-stack JSR2 unroll-stack
;c-peek-and-finalize JMP2 !c-peek-and-finalize
&mismatched-parens ;mismatched-parens ;error!! JSR2 &mismatched-parens ;mismatched-parens errorm
( doesn't support weird things like []abc] or [-abc] or similar. ) ( doesn't support weird things like []abc] or [-abc] or similar. )
( doesn't currently handle "special" escapes such as \n ) ( doesn't currently handle "special" escapes such as \n )
@c-lbrk ( c^ -> r2* ) @c-lbrk ( c^ -> r2* )
POP LITr 00 ;pos LDA2 ( pos [0] ) POP LITr 00 ;pos LDA2 ( pos [0] )
LDAk LIT "^ NEQ ,&normal JCN INCr INC2 ( pos [negated?^] ) LDAk LIT "^ NEQ ?&normal INCr INC2 ( pos [negated?^] )
&normal &normal
#0a STHr ADD ( src* type^ ) #0a STHr ADD ( src* type^ )
;arena-pos LDA2 STH2k ( src* type^ dst* [dst*] ) ;arena-pos LDA2 STH2k ( src* type^ dst* [dst*] )
STA LIT2r 0004 ADD2r ( src* [dst+4] ) STA LIT2r 0004 ADD2r ( src* [dst+4] )
&left-parse ( src* [dst*] ) &left-parse ( src* [dst*] )
LDAk LIT "] EQU ,&done JCN LDAk LIT "] EQU ?&done
LDAk LIT "- EQU ,&error JCN LDAk LIT "- EQU ?&error
LDAk LIT "\ NEQ ,&left JCN INC2 LDAk LIT "\ NEQ ?&left INC2
&left &left
LDAk STH2kr STA INC2r LDAk STH2kr STA INC2r
DUP2 INC2 LDA LIT "- NEQ ,&pre-right JCN INC2 INC2 DUP2 INC2 LDA LIT "- NEQ ?&pre-right INC2 INC2
LDAk LIT "] EQU ,&error JCN LDAk LIT "] EQU ?&error
LDAk LIT "- EQU ,&error JCN LDAk LIT "- EQU ?&error
&pre-right &pre-right
LDAk LIT "\ NEQ ,&right JCN INC2 LDAk LIT "\ NEQ ?&right INC2
&right &right
LDAk STH2kr STA INC2 INC2r ,&left-parse JMP LDAk STH2kr STA INC2 INC2r !&left-parse
&done ( src* [dst*] ) &done ( src* [dst*] )
INC2 ;pos STA2 STH2r ( dst* ) INC2 ;pos STA2 STH2r ( dst* )
DUP2 ;arena-pos LDA2 ( dst dst a ) DUP2 ;arena-pos LDA2 ( dst dst a )
@ -498,7 +498,7 @@
;arena-pos LDA2 STH2k #0003 ADD2 STA ( dst [a] ) ;arena-pos LDA2 STH2k #0003 ADD2 STA ( dst [a] )
;arena-pos STA2 STH2r ( a ) ;arena-pos STA2 STH2r ( a )
#0000 OVR2 INC2 STA2 ( a ) #0000 OVR2 INC2 STA2 ( a )
DUP2 ;c-peek-and-finalize JMP2 DUP2 !c-peek-and-finalize
&error &error
#abcd #0000 DIV ( TODO error here ) #abcd #0000 DIV ( TODO error here )
@ -511,24 +511,24 @@
( allocates a dot-node and continues. ) ( allocates a dot-node and continues. )
@c-dot ( c^ -> r2* ) @c-dot ( c^ -> r2* )
POP POP
#02 ;alloc3 JSR2 #02 alloc3
DUP2 ;c-peek-and-finalize JMP2 DUP2 !c-peek-and-finalize
( called when we read "^" ) ( called when we read "^" )
( ) ( )
( allocates a caret-node and continues. ) ( allocates a caret-node and continues. )
@c-caret ( c^ -> r2* ) @c-caret ( c^ -> r2* )
POP POP
#06 ;alloc3 JSR2 #06 alloc3
DUP2 ;c-peek-and-finalize JMP2 DUP2 !c-peek-and-finalize
( called when we read "$" ) ( called when we read "$" )
( ) ( )
( allocates a dollar-node and continues. ) ( allocates a dollar-node and continues. )
@c-dollar ( c^ -> r2* ) @c-dollar ( c^ -> r2* )
POP POP
#07 ;alloc3 JSR2 #07 alloc3
DUP2 ;c-peek-and-finalize JMP2 DUP2 !c-peek-and-finalize
( called when we read "\" ) ( called when we read "\" )
( ) ( )
@ -536,50 +536,50 @@
( ) ( )
( otherwise, allocates a literal of the next character. ) ( otherwise, allocates a literal of the next character. )
@c-esc ( c^ -> r2* ) @c-esc ( c^ -> r2* )
POP ;read JSR2 POP read
DUP LIT "a EQU ,&bel JCN DUP LIT "a EQU ?&bel
DUP LIT "b EQU ,&bs JCN DUP LIT "b EQU ?&bs
DUP LIT "t EQU ,&tab JCN DUP LIT "t EQU ?&tab
DUP LIT "n EQU ,&nl JCN DUP LIT "n EQU ?&nl
DUP LIT "v EQU ,&vtab JCN DUP LIT "v EQU ?&vtab
DUP LIT "f EQU ,&ff JCN DUP LIT "f EQU ?&ff
DUP LIT "r EQU ,&cr JCN DUP LIT "r EQU ?&cr
&default ;c-char JMP2 &default !c-char
&bel POP #07 ,&default JMP &bel POP #07 !&default
&bs POP #08 ,&default JMP &bs POP #08 !&default
&tab POP #09 ,&default JMP &tab POP #09 !&default
&nl POP #0a ,&default JMP &nl POP #0a !&default
&vtab POP #0b ,&default JMP &vtab POP #0b !&default
&ff POP #0c ,&default JMP &ff POP #0c !&default
&cr POP #0d ,&default JMP &cr POP #0d !&default
( called when we read any other character ) ( called when we read any other character )
( ) ( )
( allocates a literal-node and continues. ) ( allocates a literal-node and continues. )
@c-char ( c^ -> r2* ) @c-char ( c^ -> r2* )
;alloc-lit JSR2 ( lit ) alloc-lit ( lit )
DUP2 ;c-peek-and-finalize JMP2 DUP2 !c-peek-and-finalize
( called if we parse a "*" ) ( called if we parse a "*" )
( ) ( )
( actually calling this means the code broke an invariant somewhere. ) ( actually calling this means the code broke an invariant somewhere. )
@c-star ( c^ -> regex* ) @c-star ( c^ -> regex* )
POP POP
;star-invariant ;error!! JSR2 ;star-invariant errorm
( called if we parse a "+" ) ( called if we parse a "+" )
( ) ( )
( actually calling this means the code broke an invariant somewhere. ) ( actually calling this means the code broke an invariant somewhere. )
@c-plus ( c^ -> regex* ) @c-plus ( c^ -> regex* )
POP POP
;plus-invariant ;error!! JSR2 ;plus-invariant errorm
( called if we parse a "?" ) ( called if we parse a "?" )
( ) ( )
( actually calling this means the code broke an invariant somewhere. ) ( actually calling this means the code broke an invariant somewhere. )
@c-qmark ( c^ -> regex* ) @c-qmark ( c^ -> regex* )
POP POP
;qmark-invariant ;error!! JSR2 ;qmark-invariant errorm
( ALLOCATING REGEX NDOES ) ( ALLOCATING REGEX NDOES )
@ -589,51 +589,51 @@
@alloc3 ( mode^ -> r* ) @alloc3 ( mode^ -> r* )
#0000 ROT ( 00 00 mode^ ) #0000 ROT ( 00 00 mode^ )
#03 ;alloc JSR2 ( 00 00 mode^ addr* ) #03 alloc ( 00 00 mode^ addr* )
STH2k STA ( addr <- mode ) STH2k STA ( addr <- mode )
STH2kr INC2 STA2 ( addr+1 <- 0000 ) STH2kr INC2 STA2 ( addr+1 <- 0000 )
STH2r JMP2r ( return addr ) STH2r JMP2r ( return addr )
@alloc-empty ( -> r* ) @alloc-empty ( -> r* )
#01 ;alloc3 JMP2 #01 !alloc3
@alloc-lit ( c^ -> r* ) @alloc-lit ( c^ -> r* )
#03 #0000 SWP2 ( 0000 c^ 03 ) #03 #0000 SWP2 ( 0000 c^ 03 )
#04 ;alloc JSR2 ( 0000 c^ 03 addr* ) #04 alloc ( 0000 c^ 03 addr* )
STH2k STA ( addr <- 03 ) STH2k STA ( addr <- 03 )
STH2kr INC2 STA ( addr+1 <- c ) STH2kr INC2 STA ( addr+1 <- c )
STH2kr #0002 ADD2 STA2 ( addr+2 <- 0000 ) STH2kr #0002 ADD2 STA2 ( addr+2 <- 0000 )
STH2r JMP2r ( return addr ) STH2r JMP2r ( return addr )
@alloc-or ( right* left* -> r* ) @alloc-or ( right* left* -> r* )
#05 ;alloc JSR2 STH2 ( r l [x] ) #05 alloc STH2 ( r l [x] )
#04 STH2kr STA ( r l [x] ) #04 STH2kr STA ( r l [x] )
STH2kr INC2 STA2 ( r [x] ) STH2kr INC2 STA2 ( r [x] )
STH2kr #0003 ADD2 STA2 ( [x] ) STH2kr #0003 ADD2 STA2 ( [x] )
STH2r JMP2r STH2r JMP2r
@alloc-star ( expr* -> r* ) @alloc-star ( expr* -> r* )
#05 ;alloc JSR2 STH2 ( expr [r] ) #05 alloc STH2 ( expr [r] )
#05 STH2kr STA ( expr [r] ) #05 STH2kr STA ( expr [r] )
DUP2 STH2kr INC2 STA2 ( expr [r] ) DUP2 STH2kr INC2 STA2 ( expr [r] )
#0000 STH2kr #0003 ADD2 STA2 ( expr [r] ) #0000 STH2kr #0003 ADD2 STA2 ( expr [r] )
STH2kr SWP2 ( r expr [r] ) STH2kr SWP2 ( r expr [r] )
;set-next JSR2 ( [r] ) set-next ( [r] )
STH2r JMP2r STH2r JMP2r
@alloc-plus ( expr* -> r* ) @alloc-plus ( expr* -> r* )
#05 ;alloc JSR2 STH2 ( expr [r] ) #05 alloc STH2 ( expr [r] )
#05 STH2kr STA ( expr [r] ) #05 STH2kr STA ( expr [r] )
DUP2 STH2kr INC2 STA2 ( expr [r] ) DUP2 STH2kr INC2 STA2 ( expr [r] )
#0000 STH2kr #0003 ADD2 STA2 ( expr [r] ) #0000 STH2kr #0003 ADD2 STA2 ( expr [r] )
STH2r SWP2 STH2k ( r expr [expr] ) STH2r SWP2 STH2k ( r expr [expr] )
;set-next JSR2 ( [expr] ) set-next ( [expr] )
STH2r JMP2r STH2r JMP2r
@alloc-qmark ( expr* -> r* ) @alloc-qmark ( expr* -> r* )
;alloc-empty JSR2 STH2k ( expr e [e] ) alloc-empty STH2k ( expr e [e] )
OVR2 ;set-next JSR2 ( expr [e] ) OVR2 set-next ( expr [e] )
#05 ;alloc JSR2 STH2 ( expr [r e] ) #05 alloc STH2 ( expr [r e] )
#04 STH2kr STA ( expr [r e] ) #04 STH2kr STA ( expr [r e] )
STH2kr INC2 STA2 ( [r e] ) STH2kr INC2 STA2 ( [r e] )
SWP2r STH2r STH2kr ( e r [r] ) SWP2r STH2r STH2kr ( e r [r] )
@ -642,7 +642,7 @@
( if r is 0000, allocate an empty node ) ( if r is 0000, allocate an empty node )
@alloc-if-null ( r* -> r2* ) @alloc-if-null ( r* -> r2* )
ORAk ,&return JCN POP2 ;alloc-empty JSR2 &return JMP2r ORAk ?&return POP2 alloc-empty &return JMP2r
( unroll one region of the parsing stack, returning ) ( unroll one region of the parsing stack, returning )
( a single node consisting of an alternation of ) ( a single node consisting of an alternation of )
@ -651,23 +651,23 @@
( this unrolls until it hits #ffff #ffff, which it ) ( this unrolls until it hits #ffff #ffff, which it )
( also removes from the stack. ) ( also removes from the stack. )
@unroll-stack ( -> start* end* ) @unroll-stack ( -> start* end* )
;pop4 JSR2 STH2 ( r ) pop4 STH2 ( r )
#00 STH ( count items in stack frame ) #00 STH ( count items in stack frame )
;alloc-if-null JSR2 ( replace 0000 with empty ) alloc-if-null ( replace 0000 with empty )
&loop ( r* ) &loop ( r* )
;pop4 JSR2 POP2 ( r x ) pop4 POP2 ( r x )
DUP2 #ffff EQU2 ( r x x-is-end? ) ,&done JCN DUP2 #ffff EQU2 ( r x x-is-end? ) ?&done
INCr ( items++ ) INCr ( items++ )
;alloc-or JSR2 ( r|x ) ,&loop JMP alloc-or ( r|x ) !&loop
&done &done
( r ffff ) ( r ffff )
POP2 POP2
STHr ,&is-or JCN STHr ?&is-or
STH2r JMP2r STH2r JMP2r
&is-or &is-or
POP2r POP2r
;alloc-empty JSR2 OVR2 OVR2 SWP2 ( r empty empty r ) alloc-empty OVR2 OVR2 SWP2 ( r empty empty r )
;set-next-or JSR2 set-next-or
JMP2r JMP2r
( add r to the top of the stock. ) ( add r to the top of the stock. )
@ -675,21 +675,21 @@
( in particular, this will write r into tail.next ) ( in particular, this will write r into tail.next )
( before replacing tail with r. ) ( before replacing tail with r. )
@push-next ( r0 r1 -> ) @push-next ( r0 r1 -> )
;pop4 JSR2 ( r0 r1 x0 x1 ) pop4 ( r0 r1 x0 x1 )
DUP2 #0000 EQU2 ( r0 r1 x0 x1 x1=0? ) ,&is-zero JCN DUP2 #0000 EQU2 ( r0 r1 x0 x1 x1=0? ) ?&is-zero
STH2 ROT2 STH2r ( r1 x0 r0 x1 ) STH2 ROT2 STH2r ( r1 x0 r0 x1 )
;set-next JSR2 SWP2 ( x0 r1 ) set-next SWP2 ( x0 r1 )
;push4 JSR2 push4
JMP2r JMP2r
&is-zero POP2 POP2 ;push4 JMP2 &is-zero POP2 POP2 !push4
( load the given address: ) ( load the given address: )
( ) ( )
( 1. if it points to 0000, update it to target ) ( 1. if it points to 0000, update it to target )
( 2. otherwise, call set-next on it ) ( 2. otherwise, call set-next on it )
@set-next-addr ( target* addr* -> ) @set-next-addr ( target* addr* -> )
LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN LDA2k #0000 EQU2 ( target addr addr=0? ) ?&is-zero
LDA2 ;set-next JMP2 LDA2 !set-next
&is-zero STA2 JMP2r &is-zero STA2 JMP2r
( set regex.next to target ) ( set regex.next to target )
@ -703,18 +703,18 @@
( back up we only bother taking the left branch. otherwise ) ( back up we only bother taking the left branch. otherwise )
( you can end up double-appending things. ) ( you can end up double-appending things. )
@set-next ( target* regex* -> ) @set-next ( target* regex* -> )
LDAk #01 LTH ,&unknown JCN LDAk #01 LTH ?&unknown
LDAk #0b GTH ,&unknown JCN LDAk #0b GTH ?&unknown
LDAk #09 GTH ,&cc JCN LDAk #09 GTH ?&cc
LDAk #00 SWP ;rx-node-sizes ADD2 LDAk #00 SWP ;rx-node-sizes ADD2
LDA #00 SWP ADD2 #0002 SUB2 LDA #00 SWP ADD2 #0002 SUB2
;set-next-addr JMP2 !set-next-addr
&cc INC2 ;set-next-addr JMP2 &cc INC2 !set-next-addr
&unknown LDAk #ee ;unknown-node-type ;error!! JSR2 &unknown LDAk #ee ;unknown-node-type errorm
@set-next-or-addr ( target* addr* -> ) @set-next-or-addr ( target* addr* -> )
LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN LDA2k #0000 EQU2 ( target addr addr=0? ) ?&is-zero
LDA2 ;set-next-or JMP2 LDA2 !set-next-or
&is-zero STA2 JMP2r &is-zero STA2 JMP2r
( this is used when first building or-nodes ) ( this is used when first building or-nodes )
@ -722,10 +722,10 @@
( [x1, [x2, [x3, ..., [xm, xn]]]] ) ( [x1, [x2, [x3, ..., [xm, xn]]]] )
( so we recurse on the right side but not the left. ) ( so we recurse on the right side but not the left. )
@set-next-or ( target* regex* -> ) @set-next-or ( target* regex* -> )
LDAk #04 NEQ ,&!4 JCN LDAk #04 NEQ ?&!4
OVR2 OVR2 INC2 ;set-next-addr JSR2 OVR2 OVR2 INC2 set-next-addr
#0003 ADD2 ;set-next-or-addr JMP2 #0003 ADD2 !set-next-or-addr
&!4 ;set-next JMP2 &!4 !set-next
( STACK OPERATIONS ) ( STACK OPERATIONS )
( ) ( )
@ -741,7 +741,7 @@
( push 4 bytes onto the stack ) ( push 4 bytes onto the stack )
@push4 ( str* regex* -> ) @push4 ( str* regex* -> )
;assert-stack-avail JSR2 ( check for space ) assert-stack-avail ( check for space )
;stack-pos LDA2 #0002 ADD2 STA2 ( cell[2:3] <- regex ) ;stack-pos LDA2 #0002 ADD2 STA2 ( cell[2:3] <- regex )
;stack-pos LDA2 STA2 ( cell[0:1] <- str ) ;stack-pos LDA2 STA2 ( cell[0:1] <- str )
;stack-pos LDA2 #0004 ADD2 ;stack-pos STA2 ( pos += 4 ) ;stack-pos LDA2 #0004 ADD2 ;stack-pos STA2 ( pos += 4 )
@ -749,7 +749,7 @@
( pop 4 bytes from the stack ) ( pop 4 bytes from the stack )
@pop4 ( -> str* regex* ) @pop4 ( -> str* regex* )
;assert-stack-exist JSR2 ( check for space ) assert-stack-exist ( check for space )
;stack-pos LDA2 ( load stack-pos ) ;stack-pos LDA2 ( load stack-pos )
#0002 SUB2 LDA2k STH2 ( pop and stash regex ) #0002 SUB2 LDA2k STH2 ( pop and stash regex )
#0002 SUB2 LDA2k STH2 ( pop and stash str ) #0002 SUB2 LDA2k STH2 ( pop and stash str )
@ -771,11 +771,11 @@
( error if stack is full ) ( error if stack is full )
@assert-stack-avail ( -> ) @assert-stack-avail ( -> )
;stack-avail JSR2 ,&ok JCN ;stack-is-full ;error!! JSR2 &ok JMP2r stack-avail ?&ok ;stack-is-full errorm &ok JMP2r
( error is stack is empty ) ( error is stack is empty )
@assert-stack-exist ( -> ) @assert-stack-exist ( -> )
;stack-exist JSR2 ,&ok JCN ;stack-is-empty ;error!! JSR2 &ok JMP2r stack-exist ?&ok ;stack-is-empty errorm &ok JMP2r
( stack-pos points to the next free stack position (or the top if full). ) ( stack-pos points to the next free stack position (or the top if full). )
@stack-pos :stack-bot ( the next position to insert at ) @stack-pos :stack-bot ( the next position to insert at )
@ -810,10 +810,10 @@
#00 SWP ( size* ) #00 SWP ( size* )
;arena-pos LDA2 STH2k ADD2 ( pos+size* [pos] ) ;arena-pos LDA2 STH2k ADD2 ( pos+size* [pos] )
DUP2 ;arena-top GTH2 ( pos+size pos+size>top? [pos] ) DUP2 ;arena-top GTH2 ( pos+size pos+size>top? [pos] )
,&error JCN ( pos+size [pos] ) ?&error ( pos+size [pos] )
;arena-pos STA2 ( pos += size [pos] ) ;arena-pos STA2 ( pos += size [pos] )
STH2r JMP2r ( pos ) STH2r JMP2r ( pos )
&error POP2 POP2r ;arena-is-full ;error!! JSR2 &error POP2 POP2r ;arena-is-full errorm
@arena-pos :arena-bot ( the next position to allocate ) @arena-pos :arena-bot ( the next position to allocate )
@arena-bot $400 @arena-top ( holds up to 1024 bytes ) @arena-bot $400 @arena-top ( holds up to 1024 bytes )
@ -870,7 +870,7 @@
@subgroup-start ( s* i^ -> ) @subgroup-start ( s* i^ -> )
;subgroup-pos LDA2 STH2k ( s* i^ pos* [pos*] ) ;subgroup-pos LDA2 STH2k ( s* i^ pos* [pos*] )
;subgroup-top LTH2 ,&next JCN #0000 DIV ( too many subgroups ) ;subgroup-top LTH2 ?&next #0000 DIV ( too many subgroups )
&next ( s* i^ [pos*] ) &next ( s* i^ [pos*] )
STH2kr STA STH2kr STA
STH2r INC2 STA2 STH2r INC2 STA2
@ -878,9 +878,9 @@
@subgroup-finish ( s* i^ -> ) @subgroup-finish ( s* i^ -> )
;subgroup-pos LDA2 STH2k ( s* i^ pos* [pos*] ) ;subgroup-pos LDA2 STH2k ( s* i^ pos* [pos*] )
;subgroup-top LTH2 ,&next JCN #0000 DIV ( too many subgroups ) ;subgroup-top LTH2 ?&next #0000 DIV ( too many subgroups )
&next ( s* i^ [pos*] ) &next ( s* i^ [pos*] )
STH2kr LDA EQU ,&ok JCN #0000 DIV ( mismatched subgroups ) STH2kr LDA EQU ?&ok #0000 DIV ( mismatched subgroups )
&ok ( s* [pos*] ) &ok ( s* [pos*] )
STH2kr #0003 ADD2 STA2 STH2kr #0003 ADD2 STA2
STH2r #0005 ADD2 ;subgroup-pos STA2 STH2r #0005 ADD2 ;subgroup-pos STA2
@ -888,7 +888,7 @@
@subgroup-branch ( -> ) @subgroup-branch ( -> )
;subgroup-pos LDA2 STH2k ( pos* [pos*] ) ;subgroup-pos LDA2 STH2k ( pos* [pos*] )
;subgroup-top LTH2 ,&next JCN #0000 DIV ( too many subgroups ) ;subgroup-top LTH2 ?&next #0000 DIV ( too many subgroups )
&next &next
#00 STH2kr STA ( [*pos] ) #00 STH2kr STA ( [*pos] )
STH2r #0005 ADD2 ;subgroup-pos STA2 STH2r #0005 ADD2 ;subgroup-pos STA2
@ -897,9 +897,9 @@
@subgroup-backtrack ( -> ) @subgroup-backtrack ( -> )
;subgroup-bot ;subgroup-pos LDA2 ( bot* pos* ) ;subgroup-bot ;subgroup-pos LDA2 ( bot* pos* )
&loop ( bot* pos* ) &loop ( bot* pos* )
EQU2k ,&done JCN EQU2k ?&done
LDAk #00 EQU ,&done JCN LDAk #00 EQU ?&done
#0005 SUB2 ,&loop JMP #0005 SUB2 !&loop
&done ( bot* pos* ) &done ( bot* pos* )
NIP2 ;subgroup-pos STA2 NIP2 ;subgroup-pos STA2
JMP2r JMP2r