more comments + refactoring
This commit is contained in:
parent
8173f99b41
commit
0024bc4882
114
regex.tal
114
regex.tal
|
@ -33,73 +33,8 @@
|
||||||
%emit { #18 DEO }
|
%emit { #18 DEO }
|
||||||
%space { #20 emit }
|
%space { #20 emit }
|
||||||
%newline { #0a emit }
|
%newline { #0a emit }
|
||||||
%print { debug newline }
|
|
||||||
%quit! { #01 #0f DEO BRK }
|
%quit! { #01 #0f DEO BRK }
|
||||||
|
|
||||||
( TESTING )
|
|
||||||
|
|
||||||
(
|
|
||||||
|0100
|
|
||||||
;expr1 ;compile JSR2 print
|
|
||||||
;emit-stack JSR2 newline
|
|
||||||
;emit-arena JSR2 newline
|
|
||||||
|
|
||||||
;test1 OVR2k ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test2 OVR2k ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test3 OVR2k ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test4 OVR2k ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test5 OVR2k ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test6 OVR2k ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test7 OVR2k ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test8 OVR2k ;match JSR2 ;emit-byte JSR2 newline
|
|
||||||
|
|
||||||
;test1 ;graph1 ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test2 ;graph1 ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test3 ;graph1 ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test4 ;graph1 ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test5 ;graph1 ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test6 ;graph1 ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test7 ;graph1 ;match JSR2 ;emit-byte JSR2 space
|
|
||||||
;test8 ;graph1 ;match JSR2 ;emit-byte JSR2 newline
|
|
||||||
quit!
|
|
||||||
)
|
|
||||||
( TEST DATA )
|
|
||||||
|
|
||||||
(
|
|
||||||
( corresponds to regex: a(b|c)d* )
|
|
||||||
@expr1 "a(b|c)d* 00
|
|
||||||
|
|
||||||
( corresponds to regex: a(b|c)d* )
|
|
||||||
( accepts "ab" or "ac" followd by any number of d's )
|
|
||||||
@graph1
|
|
||||||
03 'a :x1
|
|
||||||
@x1 04 :x2 :x3
|
|
||||||
@x2 03 'b :x4
|
|
||||||
@x3 03 'c :x4
|
|
||||||
@x4 05 :x5 0000
|
|
||||||
@x5 03 'd :x4
|
|
||||||
|
|
||||||
( test case strings to try matching )
|
|
||||||
@test1 "ab 00 ( yes )
|
|
||||||
@test2 "acdd 00 ( yes )
|
|
||||||
@test3 "add 00 ( no )
|
|
||||||
@test4 "abd 00 ( yes )
|
|
||||||
@test5 "acddddddddddd 00 ( yes )
|
|
||||||
@test6 "bd 00 ( no )
|
|
||||||
@test7 "z 00 ( no )
|
|
||||||
@test8 00 ( no )
|
|
||||||
)
|
|
||||||
|
|
||||||
( PRINTING DATA )
|
|
||||||
|
|
||||||
@emit-short ( byte -- )
|
|
||||||
SWP ;emit-byte JSR2 ;emit-byte JSR2 JMP2r
|
|
||||||
|
|
||||||
@emit-byte ( byte -- )
|
|
||||||
DUP #04 SFT ,&hex JSR #0f AND ,&hex JMP
|
|
||||||
&hex #30 ADD DUP #39 GTH #27 MUL ADD emit
|
|
||||||
JMP2r
|
|
||||||
|
|
||||||
( ERROR HANDLING )
|
( ERROR HANDLING )
|
||||||
|
|
||||||
( using error! will print the given message before causing )
|
( using error! will print the given message before causing )
|
||||||
|
@ -221,7 +156,19 @@
|
||||||
@skip
|
@skip
|
||||||
;pos LDA2 INC2 ;pos STA2 JMP2r
|
;pos LDA2 INC2 ;pos STA2 JMP2r
|
||||||
|
|
||||||
( TODO: [] + ? )
|
( TODO: )
|
||||||
|
( 1. character groups: [] and [^] )
|
||||||
|
( 2. one-or-more: + )
|
||||||
|
( 3. zero-or-one: ? )
|
||||||
|
( 4. symbolic escapes, e.g. \n )
|
||||||
|
|
||||||
|
( STRETCH GOALS: )
|
||||||
|
( a. ^ and $ )
|
||||||
|
( b. counts: {n} and {m,n} )
|
||||||
|
( c. substring matching, i.e. searching )
|
||||||
|
( d. subgroup extraction )
|
||||||
|
( e. back-references, e.g \1 )
|
||||||
|
|
||||||
( compile an expression string into a regex graph )
|
( compile an expression string into a regex graph )
|
||||||
( )
|
( )
|
||||||
( the regex will be allocated in the arena; if there is not )
|
( the regex will be allocated in the arena; if there is not )
|
||||||
|
@ -500,17 +447,6 @@
|
||||||
@assert-stack-exist ( -> )
|
@assert-stack-exist ( -> )
|
||||||
;stack-exist JSR2 ,&ok JCN ;stack-is-empty ;error! JSR2 &ok JMP2r
|
;stack-exist JSR2 ,&ok JCN ;stack-is-empty ;error! JSR2 &ok JMP2r
|
||||||
|
|
||||||
( print stack size, followed by contents )
|
|
||||||
@emit-stack ( -> )
|
|
||||||
space LIT 'n emit LIT '= emit ;stack-pos LDA2 ;stack-bot SUB2 #0004 DIV2 ;emit-short JSR2 LIT ': emit
|
|
||||||
;stack-bot
|
|
||||||
&loop
|
|
||||||
DUP2 ;stack-pos LDA2 LTH2 ,&ok JCN
|
|
||||||
POP2 newline JMP2r
|
|
||||||
&ok
|
|
||||||
space LDA2k ;emit-short JSR2
|
|
||||||
#0002 ADD2 ,&loop JMP
|
|
||||||
|
|
||||||
( stack-pos points to the next free stack position (or the top if full). )
|
( stack-pos points to the next free stack position (or the top if full). )
|
||||||
@stack-pos :stack-bot ( the next position to insert at )
|
@stack-pos :stack-bot ( the next position to insert at )
|
||||||
|
|
||||||
|
@ -548,27 +484,3 @@
|
||||||
|1ffe
|
|1ffe
|
||||||
@arena-pos :arena-bot ( the next position to allocate )
|
@arena-pos :arena-bot ( the next position to allocate )
|
||||||
@arena-bot $400 @arena-top ( holds up to 1024 bytes )
|
@arena-bot $400 @arena-top ( holds up to 1024 bytes )
|
||||||
|
|
||||||
( emit n bytes from the given address )
|
|
||||||
@emit-n ( addr* count^ -> addr2* )
|
|
||||||
DUP #00 GTH ( addr count count>0? ) ,&ok JCN ( addr count ) POP newline JMP2r
|
|
||||||
&ok
|
|
||||||
STH ( addr [count] ) space LDAk ;emit-byte JSR2 INC2 ( addr+1 [count] )
|
|
||||||
STHr #01 SUB ( addr+1 count-1 )
|
|
||||||
;emit-n JMP2
|
|
||||||
|
|
||||||
( emit the arena, with one line per node )
|
|
||||||
( parses node type, since node size is dynamic (3-5). )
|
|
||||||
@emit-arena ( -> )
|
|
||||||
;arena-bot
|
|
||||||
&loop
|
|
||||||
DUP2 ;arena-pos LDA2 LTH2 ,&ok JCN POP2 JMP2r
|
|
||||||
&ok
|
|
||||||
DUP2 ;emit-short JSR2
|
|
||||||
LIT ': emit space
|
|
||||||
LDAk #01 NEQ ,&!1 JCN #03 ;emit-n JSR2 ,&loop JMP
|
|
||||||
&!1 LDAk #02 NEQ ,&!2 JCN #03 ;emit-n JSR2 ,&loop JMP
|
|
||||||
&!2 LDAk #03 NEQ ,&!3 JCN #04 ;emit-n JSR2 ,&loop JMP
|
|
||||||
&!3 LDAk #04 NEQ ,&!4 JCN #05 ;emit-n JSR2 ,&loop JMP
|
|
||||||
&!4 LDAk #05 NEQ ,&!5 JCN #05 ;emit-n JSR2 ,&loop JMP
|
|
||||||
&!5 ;unknown-node-type ;error! JSR2
|
|
||||||
|
|
|
@ -54,3 +54,45 @@
|
||||||
|
|
||||||
~regex.tal
|
~regex.tal
|
||||||
|
|
||||||
|
@emit-short ( short* -- )
|
||||||
|
SWP ;emit-byte JSR2 ;emit-byte JSR2 JMP2r
|
||||||
|
|
||||||
|
@emit-byte ( byte^ -- )
|
||||||
|
DUP #04 SFT ,&hex JSR #0f AND ,&hex JMP
|
||||||
|
&hex #30 ADD DUP #39 GTH #27 MUL ADD emit
|
||||||
|
JMP2r
|
||||||
|
|
||||||
|
( print stack size, followed by contents )
|
||||||
|
@emit-stack ( -> )
|
||||||
|
space LIT 'n emit LIT '= emit ;stack-pos LDA2 ;stack-bot SUB2 #0004 DIV2 ;emit-short JSR2 LIT ': emit
|
||||||
|
;stack-bot
|
||||||
|
&loop
|
||||||
|
DUP2 ;stack-pos LDA2 LTH2 ,&ok JCN
|
||||||
|
POP2 newline JMP2r
|
||||||
|
&ok
|
||||||
|
space LDA2k ;emit-short JSR2
|
||||||
|
#0002 ADD2 ,&loop JMP
|
||||||
|
|
||||||
|
( emit n bytes from the given address )
|
||||||
|
@emit-n ( addr* count^ -> addr2* )
|
||||||
|
DUP #00 GTH ( addr count count>0? ) ,&ok JCN ( addr count ) POP newline JMP2r
|
||||||
|
&ok
|
||||||
|
STH ( addr [count] ) space LDAk ;emit-byte JSR2 INC2 ( addr+1 [count] )
|
||||||
|
STHr #01 SUB ( addr+1 count-1 )
|
||||||
|
;emit-n JMP2
|
||||||
|
|
||||||
|
( emit the arena, with one line per node )
|
||||||
|
( parses node type, since node size is dynamic (3-5). )
|
||||||
|
@emit-arena ( -> )
|
||||||
|
;arena-bot
|
||||||
|
&loop
|
||||||
|
DUP2 ;arena-pos LDA2 LTH2 ,&ok JCN POP2 JMP2r
|
||||||
|
&ok
|
||||||
|
DUP2 ;emit-short JSR2
|
||||||
|
LIT ': emit space
|
||||||
|
LDAk #01 NEQ ,&!1 JCN #03 ;emit-n JSR2 ,&loop JMP
|
||||||
|
&!1 LDAk #02 NEQ ,&!2 JCN #03 ;emit-n JSR2 ,&loop JMP
|
||||||
|
&!2 LDAk #03 NEQ ,&!3 JCN #04 ;emit-n JSR2 ,&loop JMP
|
||||||
|
&!3 LDAk #04 NEQ ,&!4 JCN #05 ;emit-n JSR2 ,&loop JMP
|
||||||
|
&!4 LDAk #05 NEQ ,&!5 JCN #05 ;emit-n JSR2 ,&loop JMP
|
||||||
|
&!5 ;unknown-node-type ;error! JSR2
|
||||||
|
|
Loading…
Reference in New Issue