commit asma.rom
This commit is contained in:
parent
5d90cf6a3d
commit
cae3ce4a7c
123
regex.tal
123
regex.tal
|
@ -23,8 +23,12 @@
|
||||||
( )
|
( )
|
||||||
( when searching, ^ matches the beginning of the string OR a line. )
|
( when searching, ^ matches the beginning of the string OR a line. )
|
||||||
( $ matches the end of a line OR the end of the entire string. )
|
( $ matches the end of a line OR the end of the entire string. )
|
||||||
( (the ^ and $ operators aren't yet supported.) the dot nodes will )
|
( the dot nodes will not match newline characters, which must be )
|
||||||
( not match newline characters, which must be matched explicitly. )
|
( matched explicitly. )
|
||||||
|
( )
|
||||||
|
( finally, search-multiline will cause ^ and $ to use the matching )
|
||||||
|
( behavior (i.e. only matching the beginning or end of a string). )
|
||||||
|
( however dot nodes will still not match newline characters. )
|
||||||
( )
|
( )
|
||||||
( search returns 01 if the string was matched and 00 otherwise. )
|
( search returns 01 if the string was matched and 00 otherwise. )
|
||||||
( additionally, the @search-start and @search-end addresses will )
|
( additionally, the @search-start and @search-end addresses will )
|
||||||
|
@ -44,6 +48,10 @@
|
||||||
( dollar matches end of line/string [ #07 next* ] )
|
( dollar matches end of line/string [ #07 next* ] )
|
||||||
( lpar starts subgroup region [ #08 i* next* ] )
|
( lpar starts subgroup region [ #08 i* next* ] )
|
||||||
( rpar ends subgroup region [ #09 i* next* ] )
|
( rpar ends subgroup region [ #09 i* next* ] )
|
||||||
|
( class character class, e.g. [a-z] [ #0a next* n^ ... ] )
|
||||||
|
( (NOTE: n is the number of pairs in ...) )
|
||||||
|
( nclass negative class, e.g. [^a-z] [ #0b next* n^ ... ] )
|
||||||
|
( (NOTE: n is the number of pairs in ...) )
|
||||||
( )
|
( )
|
||||||
( `or` and `star` have the same structure and are handled by the )
|
( `or` and `star` have the same structure and are handled by the )
|
||||||
( same code (;do-or). however, the node types are kept different )
|
( same code (;do-or). however, the node types are kept different )
|
||||||
|
@ -93,7 +101,7 @@
|
||||||
LIT '! emit! space
|
LIT '! emit! space
|
||||||
&loop LDAk #00 EQU ,&done JCN
|
&loop LDAk #00 EQU ,&done JCN
|
||||||
LDAk emit! INC2 ,&loop JMP
|
LDAk emit! INC2 ,&loop JMP
|
||||||
&done POP2 newline #010f DEO BRK
|
&done POP2 newline #ff0e DEO #010f DEO BRK
|
||||||
|
|
||||||
( error messages )
|
( error messages )
|
||||||
@unknown-node-type "unknown 20 "node 20 "type 00
|
@unknown-node-type "unknown 20 "node 20 "type 00
|
||||||
|
@ -159,7 +167,9 @@
|
||||||
LDAk #07 EQU ;do-dollar JCN2
|
LDAk #07 EQU ;do-dollar JCN2
|
||||||
LDAk #08 EQU ;do-lpar JCN2
|
LDAk #08 EQU ;do-lpar JCN2
|
||||||
LDAk #09 EQU ;do-rpar JCN2
|
LDAk #09 EQU ;do-rpar JCN2
|
||||||
LDAk ;unknown-node-type ;error!! JSR2
|
LDAk #0a EQU ;do-ccls JCN2
|
||||||
|
LDAk #0b EQU ;do-ncls JCN2
|
||||||
|
LDAk #dd ;unknown-node-type ;error!! JSR2
|
||||||
|
|
||||||
( used when we hit a dead-end during matching. )
|
( used when we hit a dead-end during matching. )
|
||||||
( )
|
( )
|
||||||
|
@ -244,6 +254,36 @@
|
||||||
LDA2 ;push4 JSR2 ( save (s, right) in the stack for possible backtracking )
|
LDA2 ;push4 JSR2 ( save (s, right) in the stack for possible backtracking )
|
||||||
LDA2 ;loop JMP2 ( continue on left branch )
|
LDA2 ;loop JMP2 ( continue on left branch )
|
||||||
|
|
||||||
|
@matches-cls ( str* regex* -> bool^ )
|
||||||
|
OVR2 LDA ,¬-null JCN
|
||||||
|
( needs to have a character to match )
|
||||||
|
POP2 POP2 ;goto-backtrack JMP2
|
||||||
|
¬-null
|
||||||
|
DUP2 INC2 LDA2 STH2 ( str regex [next] )
|
||||||
|
OVR2 INC2 STH2 ( str regex [str+1 next] )
|
||||||
|
SWP2 LDA STH ( regex [c str+1 next] )
|
||||||
|
#0003 ADD2 LDAk #00 SWP #0002 MUL2 ( r+3 len*2 [c str+1 next] )
|
||||||
|
SWP2 INC2 STH2k ADD2 STH2r ( r+4+len*2 r+4 [c str+1 next] )
|
||||||
|
&loop ( limit addr [c str+1 next] )
|
||||||
|
EQU2k ,&missing JCN
|
||||||
|
LDAk STHkr GTH ,&next1 JCN INC2
|
||||||
|
LDAk STHkr LTH ,&next2 JCN ,&found JMP
|
||||||
|
&next1 INC2
|
||||||
|
&next2 INC2 ,&loop JMP
|
||||||
|
&missing POP2 POP2 POPr ,&negated LDR ,&match JCN
|
||||||
|
&no-match POP2r POP2r ;goto-backtrack JMP2
|
||||||
|
&found POP2 POP2 POPr ,&negated LDR ,&no-match JCN
|
||||||
|
&match STH2r STH2r ;goto-next JMP2
|
||||||
|
[ &negated $1 ]
|
||||||
|
|
||||||
|
( )
|
||||||
|
@do-ccls ( str* regex* -> bool^ )
|
||||||
|
#00 ,matches-cls/negated STR ,matches-cls JMP
|
||||||
|
|
||||||
|
( )
|
||||||
|
@do-ncls ( str* regex* -> bool^ )
|
||||||
|
#01 ,matches-cls/negated STR ,matches-cls JMP
|
||||||
|
|
||||||
( REGEX PARSING )
|
( REGEX PARSING )
|
||||||
|
|
||||||
( do we match across lines? )
|
( do we match across lines? )
|
||||||
|
@ -343,6 +383,8 @@
|
||||||
DUP LIT '$ EQU ;c-dollar JCN2
|
DUP LIT '$ EQU ;c-dollar JCN2
|
||||||
DUP LIT '( EQU ;c-lpar JCN2
|
DUP LIT '( EQU ;c-lpar JCN2
|
||||||
DUP LIT ') EQU ;c-rpar JCN2
|
DUP LIT ') EQU ;c-rpar JCN2
|
||||||
|
DUP LIT '[ EQU ;c-lbrk JCN2
|
||||||
|
DUP LIT '] EQU ;c-rbrk JCN2
|
||||||
DUP LIT '\ EQU ;c-esc JCN2
|
DUP LIT '\ EQU ;c-esc JCN2
|
||||||
DUP LIT '* EQU ;c-star JCN2
|
DUP LIT '* EQU ;c-star JCN2
|
||||||
DUP LIT '+ EQU ;c-plus JCN2
|
DUP LIT '+ EQU ;c-plus JCN2
|
||||||
|
@ -414,6 +456,42 @@
|
||||||
;c-peek-and-finalize JMP2
|
;c-peek-and-finalize JMP2
|
||||||
&mismatched-parens ;mismatched-parens ;error!! JSR2
|
&mismatched-parens ;mismatched-parens ;error!! JSR2
|
||||||
|
|
||||||
|
( doesn't support weird things like []abc] or [-abc] or similar. )
|
||||||
|
( doesn't currently handle "special" escapes such as \n )
|
||||||
|
@c-lbrk ( c^ -> r2* )
|
||||||
|
POP LITr 00 ;pos LDA2 ( pos [0] )
|
||||||
|
LDAk LIT '^ NEQ ,&normal JCN INCr INC2 ( pos [negated?^] )
|
||||||
|
&normal
|
||||||
|
#0a STHr ADD ( src* type^ )
|
||||||
|
;arena-pos LDA2 STH2k ( src* type^ dst* [dst*] )
|
||||||
|
STA LIT2r 0004 ADD2r ( src* [dst+4] )
|
||||||
|
&left-parse ( src* [dst*] )
|
||||||
|
LDAk LIT '] EQU ,&done JCN
|
||||||
|
LDAk LIT '- EQU ,&error JCN
|
||||||
|
LDAk LIT '\ NEQ ,&left JCN INC2
|
||||||
|
&left
|
||||||
|
LDAk STH2kr STA INC2r
|
||||||
|
DUP2 INC2 LDA LIT '- NEQ ,&pre-right JCN INC2 INC2
|
||||||
|
LDAk LIT '] EQU ,&error JCN
|
||||||
|
LDAk LIT '- EQU ,&error JCN
|
||||||
|
&pre-right
|
||||||
|
LDAk LIT '\ NEQ ,&right JCN INC2
|
||||||
|
&right
|
||||||
|
LDAk STH2kr STA INC2 INC2r ,&left-parse JMP
|
||||||
|
&done ( src* [dst*] )
|
||||||
|
INC2 ;pos STA2 STH2r ( dst* )
|
||||||
|
DUP2 ;arena-pos LDA2 ( dst dst a )
|
||||||
|
#0004 ADD2 SUB2 #0002 DIV2 NIP ( dst (dst-(a+4))/2 )
|
||||||
|
;arena-pos LDA2 STH2k #0003 ADD2 STA ( dst [a] )
|
||||||
|
;arena-pos STA2 STH2r ( a )
|
||||||
|
DUP2 ;c-peek-and-finalize JMP2
|
||||||
|
&error
|
||||||
|
#abcd #0000 DIV ( TODO error here )
|
||||||
|
|
||||||
|
@c-rbrk ( c^ -> r2* )
|
||||||
|
POP
|
||||||
|
#0000 DIV ( invariant: should never be seen )
|
||||||
|
|
||||||
( called when we read "." )
|
( called when we read "." )
|
||||||
( )
|
( )
|
||||||
( allocates a dot-node and continues. )
|
( allocates a dot-node and continues. )
|
||||||
|
@ -492,8 +570,8 @@
|
||||||
( ALLOCATING REGEX NDOES )
|
( ALLOCATING REGEX NDOES )
|
||||||
|
|
||||||
@rx-node-sizes
|
@rx-node-sizes
|
||||||
( 00 01 02 03 04 05 06 07 08 09 )
|
( 00 01 02 03 04 05 06 07 08 09 0a 0b )
|
||||||
[ 00 03 03 04 ] [ 05 05 03 03 ] [ 05 05 ]
|
[ 00 03 03 04 ] [ 05 05 03 03 ] [ 05 05 00 00 ]
|
||||||
|
|
||||||
@alloc3 ( mode^ -> r* )
|
@alloc3 ( mode^ -> r* )
|
||||||
#0000 ROT ( 00 00 mode^ )
|
#0000 ROT ( 00 00 mode^ )
|
||||||
|
@ -612,11 +690,13 @@
|
||||||
( you can end up double-appending things. )
|
( you can end up double-appending things. )
|
||||||
@set-next ( target* regex* -> )
|
@set-next ( target* regex* -> )
|
||||||
LDAk #01 LTH ,&unknown JCN
|
LDAk #01 LTH ,&unknown JCN
|
||||||
LDAk #09 GTH ,&unknown JCN
|
LDAk #0b GTH ,&unknown JCN
|
||||||
|
LDAk #09 GTH ,&cc JCN
|
||||||
LDAk #00 SWP ;rx-node-sizes ADD2
|
LDAk #00 SWP ;rx-node-sizes ADD2
|
||||||
LDA #00 SWP ADD2 #0002 SUB2
|
LDA #00 SWP ADD2 #0002 SUB2
|
||||||
;set-next-addr JMP2
|
;set-next-addr JMP2
|
||||||
&unknown LDAk ;unknown-node-type ;error!! JSR2
|
&cc INC2 ;set-next-addr JMP2
|
||||||
|
&unknown LDAk #ee ;unknown-node-type ;error!! JSR2
|
||||||
|
|
||||||
@set-next-or-addr ( target* addr* -> )
|
@set-next-or-addr ( target* addr* -> )
|
||||||
LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN
|
LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN
|
||||||
|
@ -776,30 +856,3 @@
|
||||||
|
|
||||||
@subgroup-pos :subgroup-bot ( the position of the first unallocated subgroup )
|
@subgroup-pos :subgroup-bot ( the position of the first unallocated subgroup )
|
||||||
@subgroup-bot $400 @subgroup-top ( holds up to 256 subgroups (1024 bytes) )
|
@subgroup-bot $400 @subgroup-top ( holds up to 256 subgroups (1024 bytes) )
|
||||||
|
|
||||||
( INTERVAL OPERATIONS )
|
|
||||||
( )
|
|
||||||
( not baked yet )
|
|
||||||
(
|
|
||||||
@min ( first* last* -> min-addr* )
|
|
||||||
SWP2 STH2k ,&incr JMP ( last first [first] )
|
|
||||||
&loop LDAk LDAkr STHr LTH ,&replace JCN ,&incr JMP ( last a [c] )
|
|
||||||
&replace POP2r STH2k ( last a [a] )
|
|
||||||
&incr EQUk ,&done JCN INC2 ,&loop JMP ( last a+1 [c] )
|
|
||||||
&done POP2 POP2 STH2r JMP2r ( c )
|
|
||||||
|
|
||||||
@sort ( first* last* -> )
|
|
||||||
SWP2 ( last first )
|
|
||||||
&loop ;min JSR2 NEQk ,&swap JCN POP2 ,&incr JMP
|
|
||||||
&swap STH2 LDA2k ( last first fx [min] ) STH2kr STA STH2r SWP2 ( last min first )
|
|
||||||
STH2 LDA2 ( last mx [first] ) STH2kr STA STH2r ( last first )
|
|
||||||
&incr EQUk ,&done JCN INC2 ,&loop JMP
|
|
||||||
&done POP2 POP2 JMP2r
|
|
||||||
|
|
||||||
@iv-in-range ( c^ b0^ b1^ -> bool^ )
|
|
||||||
ROT STHk LTH ,&above JCN
|
|
||||||
STHr GTH ,&below JCN #01 JMP2r
|
|
||||||
&above POPr POP &below #00 JMP2r
|
|
||||||
|
|
||||||
@iv-find ( c^ iv* -> bool^ )
|
|
||||||
)
|
|
||||||
|
|
|
@ -52,8 +52,8 @@
|
||||||
#00 ;ptr LDA2 STA ( null terminate string )
|
#00 ;ptr LDA2 STA ( null terminate string )
|
||||||
;ptr LDA2 ;buffer EQU2 STH ( stash is-empty? )
|
;ptr LDA2 ;buffer EQU2 STH ( stash is-empty? )
|
||||||
;buffer ;ptr STA2 ( reset ptr )
|
;buffer ;ptr STA2 ( reset ptr )
|
||||||
( ;buffer ;regex LDA2 ;rx-match JSR2 ( match regex ) )
|
;buffer ;regex LDA2 ;rx-match JSR2 ( match regex )
|
||||||
;buffer ;regex LDA2 ;rx-search JSR2 ( search regex )
|
( ;buffer ;regex LDA2 ;rx-search JSR2 ( search regex ) )
|
||||||
;emit-byte JSR2 nl ( print result )
|
;emit-byte JSR2 nl ( print result )
|
||||||
STHr ,&was-empty JCN
|
STHr ,&was-empty JCN
|
||||||
;s-prompt ;println JSR2
|
;s-prompt ;println JSR2
|
||||||
|
@ -102,8 +102,12 @@
|
||||||
DUP2 ;emit-short JSR2
|
DUP2 ;emit-short JSR2
|
||||||
LIT ': emit! space
|
LIT ': emit! space
|
||||||
LDAk #01 LTH ,&uu JCN
|
LDAk #01 LTH ,&uu JCN
|
||||||
LDAk #09 GTH ,&uu JCN
|
LDAk #0b GTH ,&uu JCN
|
||||||
|
LDAk #09 GTH ,&cc JCN
|
||||||
LDAk #00 SWP ;rx-node-sizes ADD2
|
LDAk #00 SWP ;rx-node-sizes ADD2
|
||||||
LDA ;emit-n JSR2 ,&loop JMP
|
LDA ;emit-n JSR2 ,&loop JMP
|
||||||
|
&cc
|
||||||
|
DUP2 #0003 ADD2 LDA #02 MUL #04 ADD
|
||||||
|
;emit-n JSR2 ,&loop JMP
|
||||||
&uu
|
&uu
|
||||||
;unknown-node-type ;error!! JSR2
|
LDAk #cc ;unknown-node-type ;error!! JSR2
|
||||||
|
|
Loading…
Reference in New Issue