get + and ? working in regex parser
This commit is contained in:
parent
b1e5142e95
commit
5e799b381a
59
regex.tal
59
regex.tal
|
@ -52,6 +52,8 @@
|
|||
@stack-is-empty "stack 20 "is 20 "empty 00
|
||||
@arena-is-full "arena 20 "is 20 "full 00
|
||||
@star-invariant "star 20 "invariant 20 "failed 00
|
||||
@plus-invariant "plus 20 "invariant 20 "failed 00
|
||||
@qmark-invariant "question 20 "mark 20 "invariant 20 "failed 00
|
||||
|
||||
( REGEX MATCHING )
|
||||
|
||||
|
@ -152,15 +154,21 @@
|
|||
@peek-to-star ( -> is-star^ )
|
||||
;pos LDA2 LDA LIT '* EQU JMP2r
|
||||
|
||||
( is pos currently pointing to a plus? )
|
||||
@peek-to-plus ( -> is-plus^ )
|
||||
;pos LDA2 LDA LIT '+ EQU JMP2r
|
||||
|
||||
( is pos currently pointing to a qmark? )
|
||||
@peek-to-qmark ( -> is-qmark^ )
|
||||
;pos LDA2 LDA LIT '? EQU JMP2r
|
||||
|
||||
( just increment pos )
|
||||
@skip
|
||||
;pos LDA2 INC2 ;pos STA2 JMP2r
|
||||
|
||||
( TODO: )
|
||||
( 1. character groups: [] and [^] )
|
||||
( 2. one-or-more: + )
|
||||
( 3. zero-or-one: ? )
|
||||
( 4. symbolic escapes, e.g. \n )
|
||||
( 2. symbolic escapes, e.g. \n )
|
||||
|
||||
( STRETCH GOALS: )
|
||||
( a. ^ and $ )
|
||||
|
@ -205,6 +213,8 @@
|
|||
DUP LIT ') EQU ;c-rpar JCN2
|
||||
DUP LIT '\ EQU ;c-esc JCN2
|
||||
DUP LIT '* EQU ;c-star JCN2
|
||||
DUP LIT '+ EQU ;c-plus JCN2
|
||||
DUP LIT '? EQU ;c-qmark JCN2
|
||||
;c-char JMP2
|
||||
|
||||
( either finalize the given r0/r1 or else wrap it in )
|
||||
|
@ -213,9 +223,13 @@
|
|||
( we use this look-ahead approach rather than compiling )
|
||||
( star nodes directly since the implementation is simpler. )
|
||||
@c-peek-and-finalize ( r0* r1* -> r2* )
|
||||
;peek-to-star JSR2 ( r0 r1 next-is-star? )
|
||||
,&next-is-star JCN ,&finally JMP ( r0 r1 )
|
||||
&next-is-star ;skip JSR2 POP2 ;alloc-star JSR2 DUP2 ( star )
|
||||
;peek-to-star JSR2 ( r0 r1 next-is-star? ) ,&next-is-star JCN
|
||||
;peek-to-plus JSR2 ( r0 r1 next-is-plus? ) ,&next-is-plus JCN
|
||||
;peek-to-qmark JSR2 ( r0 r1 next-is-qmark? ) ,&next-is-qmark JCN
|
||||
,&finally JMP ( r0 r1 )
|
||||
&next-is-star ;skip JSR2 POP2 ;alloc-star JSR2 DUP2 ,&finally JMP
|
||||
&next-is-plus ;skip JSR2 POP2 ;alloc-plus JSR2 DUP2 ,&finally JMP
|
||||
&next-is-qmark ;skip JSR2 POP2 ;alloc-qmark JSR2 DUP2 ,&finally JMP
|
||||
&finally ;push-next JSR2 ;compile-region-loop JMP2
|
||||
|
||||
( called when we reach EOF of the input string )
|
||||
|
@ -302,6 +316,20 @@
|
|||
POP
|
||||
;star-invariant ;error! JSR2
|
||||
|
||||
( called if we parse a "+" )
|
||||
( )
|
||||
( actually calling this means the code broke an invariant somewhere. )
|
||||
@c-plus ( c^ -> regex* )
|
||||
POP
|
||||
;plus-invariant ;error! JSR2
|
||||
|
||||
( called if we parse a "?" )
|
||||
( )
|
||||
( actually calling this means the code broke an invariant somewhere. )
|
||||
@c-qmark ( c^ -> regex* )
|
||||
POP
|
||||
;qmark-invariant ;error! JSR2
|
||||
|
||||
( ALLOCATING REGEX NDOES )
|
||||
|
||||
@alloc3 ( mode^ -> r* )
|
||||
|
@ -341,6 +369,25 @@
|
|||
;set-next JSR2 ( [r] )
|
||||
STH2r JMP2r
|
||||
|
||||
@alloc-plus ( expr* -> r* )
|
||||
#05 ;alloc JSR2 STH2 ( expr [r] )
|
||||
#05 STH2kr STA ( expr [r] )
|
||||
DUP2 STH2kr INC2 STA2 ( expr [r] )
|
||||
#0000 STH2kr #0003 ADD2 STA2 ( expr [r] )
|
||||
STH2r SWP2 STH2k ( r expr [expr] )
|
||||
;set-next JSR2 ( [expr] )
|
||||
STH2r JMP2r
|
||||
|
||||
@alloc-qmark ( expr* -> r* )
|
||||
;alloc-empty JSR2 STH2k ( expr e [e] )
|
||||
OVR2 ;set-next JSR2 ( expr [e] )
|
||||
#05 ;alloc JSR2 STH2 ( expr [r e] )
|
||||
#04 STH2kr STA ( expr [r e] )
|
||||
STH2kr INC2 STA2 ( [r e] )
|
||||
SWP2r STH2r STH2kr ( e r [r] )
|
||||
#0003 ADD2 STA2 ( [r] )
|
||||
STH2r JMP2r
|
||||
|
||||
( if r is 0000, allocate an empty node )
|
||||
@alloc-if-null ( r* -> r2* )
|
||||
ORAk ,&return JCN POP2 ;alloc-empty JSR2 &return JMP2r
|
||||
|
|
Loading…
Reference in New Issue