commit asma.rom

This commit is contained in:
~d6 2022-04-15 01:40:52 -04:00
parent 5d90cf6a3d
commit cae3ce4a7c
3 changed files with 96 additions and 39 deletions

BIN
asma.rom Normal file

Binary file not shown.

123
regex.tal
View File

@ -23,8 +23,12 @@
( ) ( )
( when searching, ^ matches the beginning of the string OR a line. ) ( when searching, ^ matches the beginning of the string OR a line. )
( $ matches the end of a line OR the end of the entire string. ) ( $ matches the end of a line OR the end of the entire string. )
( (the ^ and $ operators aren't yet supported.) the dot nodes will ) ( the dot nodes will not match newline characters, which must be )
( not match newline characters, which must be matched explicitly. ) ( matched explicitly. )
( )
( finally, search-multiline will cause ^ and $ to use the matching )
( behavior (i.e. only matching the beginning or end of a string). )
( however dot nodes will still not match newline characters. )
( ) ( )
( search returns 01 if the string was matched and 00 otherwise. ) ( search returns 01 if the string was matched and 00 otherwise. )
( additionally, the @search-start and @search-end addresses will ) ( additionally, the @search-start and @search-end addresses will )
@ -44,6 +48,10 @@
( dollar matches end of line/string [ #07 next* ] ) ( dollar matches end of line/string [ #07 next* ] )
( lpar starts subgroup region [ #08 i* next* ] ) ( lpar starts subgroup region [ #08 i* next* ] )
( rpar ends subgroup region [ #09 i* next* ] ) ( rpar ends subgroup region [ #09 i* next* ] )
( class character class, e.g. [a-z] [ #0a next* n^ ... ] )
( (NOTE: n is the number of pairs in ...) )
( nclass negative class, e.g. [^a-z] [ #0b next* n^ ... ] )
( (NOTE: n is the number of pairs in ...) )
( ) ( )
( `or` and `star` have the same structure and are handled by the ) ( `or` and `star` have the same structure and are handled by the )
( same code (;do-or). however, the node types are kept different ) ( same code (;do-or). however, the node types are kept different )
@ -93,7 +101,7 @@
LIT '! emit! space LIT '! emit! space
&loop LDAk #00 EQU ,&done JCN &loop LDAk #00 EQU ,&done JCN
LDAk emit! INC2 ,&loop JMP LDAk emit! INC2 ,&loop JMP
&done POP2 newline #010f DEO BRK &done POP2 newline #ff0e DEO #010f DEO BRK
( error messages ) ( error messages )
@unknown-node-type "unknown 20 "node 20 "type 00 @unknown-node-type "unknown 20 "node 20 "type 00
@ -159,7 +167,9 @@
LDAk #07 EQU ;do-dollar JCN2 LDAk #07 EQU ;do-dollar JCN2
LDAk #08 EQU ;do-lpar JCN2 LDAk #08 EQU ;do-lpar JCN2
LDAk #09 EQU ;do-rpar JCN2 LDAk #09 EQU ;do-rpar JCN2
LDAk ;unknown-node-type ;error!! JSR2 LDAk #0a EQU ;do-ccls JCN2
LDAk #0b EQU ;do-ncls JCN2
LDAk #dd ;unknown-node-type ;error!! JSR2
( used when we hit a dead-end during matching. ) ( used when we hit a dead-end during matching. )
( ) ( )
@ -244,6 +254,36 @@
LDA2 ;push4 JSR2 ( save (s, right) in the stack for possible backtracking ) LDA2 ;push4 JSR2 ( save (s, right) in the stack for possible backtracking )
LDA2 ;loop JMP2 ( continue on left branch ) LDA2 ;loop JMP2 ( continue on left branch )
@matches-cls ( str* regex* -> bool^ )
OVR2 LDA ,&not-null JCN
( needs to have a character to match )
POP2 POP2 ;goto-backtrack JMP2
&not-null
DUP2 INC2 LDA2 STH2 ( str regex [next] )
OVR2 INC2 STH2 ( str regex [str+1 next] )
SWP2 LDA STH ( regex [c str+1 next] )
#0003 ADD2 LDAk #00 SWP #0002 MUL2 ( r+3 len*2 [c str+1 next] )
SWP2 INC2 STH2k ADD2 STH2r ( r+4+len*2 r+4 [c str+1 next] )
&loop ( limit addr [c str+1 next] )
EQU2k ,&missing JCN
LDAk STHkr GTH ,&next1 JCN INC2
LDAk STHkr LTH ,&next2 JCN ,&found JMP
&next1 INC2
&next2 INC2 ,&loop JMP
&missing POP2 POP2 POPr ,&negated LDR ,&match JCN
&no-match POP2r POP2r ;goto-backtrack JMP2
&found POP2 POP2 POPr ,&negated LDR ,&no-match JCN
&match STH2r STH2r ;goto-next JMP2
[ &negated $1 ]
( )
@do-ccls ( str* regex* -> bool^ )
#00 ,matches-cls/negated STR ,matches-cls JMP
( )
@do-ncls ( str* regex* -> bool^ )
#01 ,matches-cls/negated STR ,matches-cls JMP
( REGEX PARSING ) ( REGEX PARSING )
( do we match across lines? ) ( do we match across lines? )
@ -343,6 +383,8 @@
DUP LIT '$ EQU ;c-dollar JCN2 DUP LIT '$ EQU ;c-dollar JCN2
DUP LIT '( EQU ;c-lpar JCN2 DUP LIT '( EQU ;c-lpar JCN2
DUP LIT ') EQU ;c-rpar JCN2 DUP LIT ') EQU ;c-rpar JCN2
DUP LIT '[ EQU ;c-lbrk JCN2
DUP LIT '] EQU ;c-rbrk JCN2
DUP LIT '\ EQU ;c-esc JCN2 DUP LIT '\ EQU ;c-esc JCN2
DUP LIT '* EQU ;c-star JCN2 DUP LIT '* EQU ;c-star JCN2
DUP LIT '+ EQU ;c-plus JCN2 DUP LIT '+ EQU ;c-plus JCN2
@ -414,6 +456,42 @@
;c-peek-and-finalize JMP2 ;c-peek-and-finalize JMP2
&mismatched-parens ;mismatched-parens ;error!! JSR2 &mismatched-parens ;mismatched-parens ;error!! JSR2
( doesn't support weird things like []abc] or [-abc] or similar. )
( doesn't currently handle "special" escapes such as \n )
@c-lbrk ( c^ -> r2* )
POP LITr 00 ;pos LDA2 ( pos [0] )
LDAk LIT '^ NEQ ,&normal JCN INCr INC2 ( pos [negated?^] )
&normal
#0a STHr ADD ( src* type^ )
;arena-pos LDA2 STH2k ( src* type^ dst* [dst*] )
STA LIT2r 0004 ADD2r ( src* [dst+4] )
&left-parse ( src* [dst*] )
LDAk LIT '] EQU ,&done JCN
LDAk LIT '- EQU ,&error JCN
LDAk LIT '\ NEQ ,&left JCN INC2
&left
LDAk STH2kr STA INC2r
DUP2 INC2 LDA LIT '- NEQ ,&pre-right JCN INC2 INC2
LDAk LIT '] EQU ,&error JCN
LDAk LIT '- EQU ,&error JCN
&pre-right
LDAk LIT '\ NEQ ,&right JCN INC2
&right
LDAk STH2kr STA INC2 INC2r ,&left-parse JMP
&done ( src* [dst*] )
INC2 ;pos STA2 STH2r ( dst* )
DUP2 ;arena-pos LDA2 ( dst dst a )
#0004 ADD2 SUB2 #0002 DIV2 NIP ( dst (dst-(a+4))/2 )
;arena-pos LDA2 STH2k #0003 ADD2 STA ( dst [a] )
;arena-pos STA2 STH2r ( a )
DUP2 ;c-peek-and-finalize JMP2
&error
#abcd #0000 DIV ( TODO error here )
@c-rbrk ( c^ -> r2* )
POP
#0000 DIV ( invariant: should never be seen )
( called when we read "." ) ( called when we read "." )
( ) ( )
( allocates a dot-node and continues. ) ( allocates a dot-node and continues. )
@ -492,8 +570,8 @@
( ALLOCATING REGEX NDOES ) ( ALLOCATING REGEX NDOES )
@rx-node-sizes @rx-node-sizes
( 00 01 02 03 04 05 06 07 08 09 ) ( 00 01 02 03 04 05 06 07 08 09 0a 0b )
[ 00 03 03 04 ] [ 05 05 03 03 ] [ 05 05 ] [ 00 03 03 04 ] [ 05 05 03 03 ] [ 05 05 00 00 ]
@alloc3 ( mode^ -> r* ) @alloc3 ( mode^ -> r* )
#0000 ROT ( 00 00 mode^ ) #0000 ROT ( 00 00 mode^ )
@ -612,11 +690,13 @@
( you can end up double-appending things. ) ( you can end up double-appending things. )
@set-next ( target* regex* -> ) @set-next ( target* regex* -> )
LDAk #01 LTH ,&unknown JCN LDAk #01 LTH ,&unknown JCN
LDAk #09 GTH ,&unknown JCN LDAk #0b GTH ,&unknown JCN
LDAk #09 GTH ,&cc JCN
LDAk #00 SWP ;rx-node-sizes ADD2 LDAk #00 SWP ;rx-node-sizes ADD2
LDA #00 SWP ADD2 #0002 SUB2 LDA #00 SWP ADD2 #0002 SUB2
;set-next-addr JMP2 ;set-next-addr JMP2
&unknown LDAk ;unknown-node-type ;error!! JSR2 &cc INC2 ;set-next-addr JMP2
&unknown LDAk #ee ;unknown-node-type ;error!! JSR2
@set-next-or-addr ( target* addr* -> ) @set-next-or-addr ( target* addr* -> )
LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN LDA2k #0000 EQU2 ( target addr addr=0? ) ,&is-zero JCN
@ -776,30 +856,3 @@
@subgroup-pos :subgroup-bot ( the position of the first unallocated subgroup ) @subgroup-pos :subgroup-bot ( the position of the first unallocated subgroup )
@subgroup-bot $400 @subgroup-top ( holds up to 256 subgroups (1024 bytes) ) @subgroup-bot $400 @subgroup-top ( holds up to 256 subgroups (1024 bytes) )
( INTERVAL OPERATIONS )
( )
( not baked yet )
(
@min ( first* last* -> min-addr* )
SWP2 STH2k ,&incr JMP ( last first [first] )
&loop LDAk LDAkr STHr LTH ,&replace JCN ,&incr JMP ( last a [c] )
&replace POP2r STH2k ( last a [a] )
&incr EQUk ,&done JCN INC2 ,&loop JMP ( last a+1 [c] )
&done POP2 POP2 STH2r JMP2r ( c )
@sort ( first* last* -> )
SWP2 ( last first )
&loop ;min JSR2 NEQk ,&swap JCN POP2 ,&incr JMP
&swap STH2 LDA2k ( last first fx [min] ) STH2kr STA STH2r SWP2 ( last min first )
STH2 LDA2 ( last mx [first] ) STH2kr STA STH2r ( last first )
&incr EQUk ,&done JCN INC2 ,&loop JMP
&done POP2 POP2 JMP2r
@iv-in-range ( c^ b0^ b1^ -> bool^ )
ROT STHk LTH ,&above JCN
STHr GTH ,&below JCN #01 JMP2r
&above POPr POP &below #00 JMP2r
@iv-find ( c^ iv* -> bool^ )
)

View File

@ -52,8 +52,8 @@
#00 ;ptr LDA2 STA ( null terminate string ) #00 ;ptr LDA2 STA ( null terminate string )
;ptr LDA2 ;buffer EQU2 STH ( stash is-empty? ) ;ptr LDA2 ;buffer EQU2 STH ( stash is-empty? )
;buffer ;ptr STA2 ( reset ptr ) ;buffer ;ptr STA2 ( reset ptr )
( ;buffer ;regex LDA2 ;rx-match JSR2 ( match regex ) ) ;buffer ;regex LDA2 ;rx-match JSR2 ( match regex )
;buffer ;regex LDA2 ;rx-search JSR2 ( search regex ) ( ;buffer ;regex LDA2 ;rx-search JSR2 ( search regex ) )
;emit-byte JSR2 nl ( print result ) ;emit-byte JSR2 nl ( print result )
STHr ,&was-empty JCN STHr ,&was-empty JCN
;s-prompt ;println JSR2 ;s-prompt ;println JSR2
@ -102,8 +102,12 @@
DUP2 ;emit-short JSR2 DUP2 ;emit-short JSR2
LIT ': emit! space LIT ': emit! space
LDAk #01 LTH ,&uu JCN LDAk #01 LTH ,&uu JCN
LDAk #09 GTH ,&uu JCN LDAk #0b GTH ,&uu JCN
LDAk #09 GTH ,&cc JCN
LDAk #00 SWP ;rx-node-sizes ADD2 LDAk #00 SWP ;rx-node-sizes ADD2
LDA ;emit-n JSR2 ,&loop JMP LDA ;emit-n JSR2 ,&loop JMP
&cc
DUP2 #0003 ADD2 LDA #02 MUL #04 ADD
;emit-n JSR2 ,&loop JMP
&uu &uu
;unknown-node-type ;error!! JSR2 LDAk #cc ;unknown-node-type ;error!! JSR2