get + and ? working in regex parser

This commit is contained in:
~d6 2022-02-03 01:32:35 -05:00
parent b1e5142e95
commit 5e799b381a
1 changed files with 53 additions and 6 deletions

View File

@ -52,6 +52,8 @@
@stack-is-empty "stack 20 "is 20 "empty 00
@arena-is-full "arena 20 "is 20 "full 00
@star-invariant "star 20 "invariant 20 "failed 00
@plus-invariant "plus 20 "invariant 20 "failed 00
@qmark-invariant "question 20 "mark 20 "invariant 20 "failed 00
( REGEX MATCHING )
@ -152,15 +154,21 @@
@peek-to-star ( -> is-star^ )
;pos LDA2 LDA LIT '* EQU JMP2r
( is pos currently pointing to a plus? )
@peek-to-plus ( -> is-plus^ )
;pos LDA2 LDA LIT '+ EQU JMP2r
( is pos currently pointing to a qmark? )
@peek-to-qmark ( -> is-qmark^ )
;pos LDA2 LDA LIT '? EQU JMP2r
( just increment pos )
@skip
;pos LDA2 INC2 ;pos STA2 JMP2r
( TODO: )
( 1. character groups: [] and [^] )
( 2. one-or-more: + )
( 3. zero-or-one: ? )
( 4. symbolic escapes, e.g. \n )
( 2. symbolic escapes, e.g. \n )
( STRETCH GOALS: )
( a. ^ and $ )
@ -205,6 +213,8 @@
DUP LIT ') EQU ;c-rpar JCN2
DUP LIT '\ EQU ;c-esc JCN2
DUP LIT '* EQU ;c-star JCN2
DUP LIT '+ EQU ;c-plus JCN2
DUP LIT '? EQU ;c-qmark JCN2
;c-char JMP2
( either finalize the given r0/r1 or else wrap it in )
@ -213,9 +223,13 @@
( we use this look-ahead approach rather than compiling )
( star nodes directly since the implementation is simpler. )
@c-peek-and-finalize ( r0* r1* -> r2* )
;peek-to-star JSR2 ( r0 r1 next-is-star? )
,&next-is-star JCN ,&finally JMP ( r0 r1 )
&next-is-star ;skip JSR2 POP2 ;alloc-star JSR2 DUP2 ( star )
;peek-to-star JSR2 ( r0 r1 next-is-star? ) ,&next-is-star JCN
;peek-to-plus JSR2 ( r0 r1 next-is-plus? ) ,&next-is-plus JCN
;peek-to-qmark JSR2 ( r0 r1 next-is-qmark? ) ,&next-is-qmark JCN
,&finally JMP ( r0 r1 )
&next-is-star ;skip JSR2 POP2 ;alloc-star JSR2 DUP2 ,&finally JMP
&next-is-plus ;skip JSR2 POP2 ;alloc-plus JSR2 DUP2 ,&finally JMP
&next-is-qmark ;skip JSR2 POP2 ;alloc-qmark JSR2 DUP2 ,&finally JMP
&finally ;push-next JSR2 ;compile-region-loop JMP2
( called when we reach EOF of the input string )
@ -302,6 +316,20 @@
POP
;star-invariant ;error! JSR2
( called if we parse a "+" )
( )
( actually calling this means the code broke an invariant somewhere. )
@c-plus ( c^ -> regex* )
POP
;plus-invariant ;error! JSR2
( called if we parse a "?" )
( )
( actually calling this means the code broke an invariant somewhere. )
@c-qmark ( c^ -> regex* )
POP
;qmark-invariant ;error! JSR2
( ALLOCATING REGEX NDOES )
@alloc3 ( mode^ -> r* )
@ -341,6 +369,25 @@
;set-next JSR2 ( [r] )
STH2r JMP2r
@alloc-plus ( expr* -> r* )
#05 ;alloc JSR2 STH2 ( expr [r] )
#05 STH2kr STA ( expr [r] )
DUP2 STH2kr INC2 STA2 ( expr [r] )
#0000 STH2kr #0003 ADD2 STA2 ( expr [r] )
STH2r SWP2 STH2k ( r expr [expr] )
;set-next JSR2 ( [expr] )
STH2r JMP2r
@alloc-qmark ( expr* -> r* )
;alloc-empty JSR2 STH2k ( expr e [e] )
OVR2 ;set-next JSR2 ( expr [e] )
#05 ;alloc JSR2 STH2 ( expr [r e] )
#04 STH2kr STA ( expr [r e] )
STH2kr INC2 STA2 ( [r e] )
SWP2r STH2r STH2kr ( e r [r] )
#0003 ADD2 STA2 ( [r] )
STH2r JMP2r
( if r is 0000, allocate an empty node )
@alloc-if-null ( r* -> r2* )
ORAk ,&return JCN POP2 ;alloc-empty JSR2 &return JMP2r