neauoire's optimizations

This commit is contained in:
~d6 2022-09-10 13:31:35 -04:00
parent 0f61f352bb
commit 8d7a1675f8
1 changed files with 19 additions and 24 deletions

View File

@ -54,11 +54,6 @@
( - mul32 memory, 12 bytes ) ( - mul32 memory, 12 bytes )
( - _divmod32 memory, 16 bytes ) ( - _divmod32 memory, 16 bytes )
%TOR { ROT ROT } ( a b c -> c a b )
%COMPLEMENT32 { SWP2 #ffff EOR2 SWP2 #ffff EOR2 }
%DUP4 { OVR2 OVR2 }
%POP4 { POP2 POP2 }
( bitcount: number of bits needed to represent number ) ( bitcount: number of bits needed to represent number )
( equivalent to floor[log2[x]] + 1 ) ( equivalent to floor[log2[x]] + 1 )
@ -82,7 +77,7 @@
SWP ;bitcount8 JSR2 ADD ( nhi+nlo ) SWP ;bitcount8 JSR2 ADD ( nhi+nlo )
JMP2r JMP2r
&hi-set &hi-set
SWP POP #08 ADD ( nhi+8 ) NIP #08 ADD ( nhi+8 )
JMP2r JMP2r
@bitcount32 ( x** -> n^ ) @bitcount32 ( x** -> n^ )
@ -90,9 +85,9 @@
;bitcount16 JSR2 ( xlo* nhi ) ;bitcount16 JSR2 ( xlo* nhi )
DUP #00 NEQ ( xlo* nhi nhi!=0 ) DUP #00 NEQ ( xlo* nhi nhi!=0 )
,&hi-set JCN ( xlo* nhi ) ,&hi-set JCN ( xlo* nhi )
TOR ;bitcount16 JSR2 ADD JMP2r ( nhi+nlo ) ROT ROT ;bitcount16 JSR2 ADD JMP2r ( nhi+nlo )
&hi-set &hi-set
TOR POP2 #10 ADD ( nhi+16 ) ROT ROT POP2 #10 ADD ( nhi+16 )
JMP2r JMP2r
( equality ) ( equality )
@ -113,7 +108,7 @@
( x != 0 ) ( x != 0 )
@non-zero32 ( x** -> bool^ ) @non-zero32 ( x** -> bool^ )
ORA2 #0000 NEQ2 JMP2r ORA2 ORA JMP2r
( comparisons ) ( comparisons )
@ -165,7 +160,7 @@
( ~x ) ( ~x )
@complement32 ( x** -> ~x** ) @complement32 ( x** -> ~x** )
COMPLEMENT32 JMP2r SWP2 #ffff EOR2 SWP2 #ffff EOR2 JMP2r
( temporary registers ) ( temporary registers )
( shared by most operations, except mul32 and div32 ) ( shared by most operations, except mul32 and div32 )
@ -185,20 +180,20 @@
( shift right by 0-7 bits ) ( shift right by 0-7 bits )
@rshift32-0 ( x** n^ -> x<<n ) @rshift32-0 ( x** n^ -> x<<n )
STHk SFT ;m32/z3 STA ( write z3 ) STHk SFT ;m32/z3 STA ( write z3 )
#00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 ) #00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 )
#00 STHkr SFT2 #00 ;m32/z2 LDA ORA2 ;m32/z1 STA2 ( write z1,z2 ) #00 STHkr SFT2 #00 ;m32/z2 LDA ORA2 ;m32/z1 STA2 ( write z1,z2 )
#00 STHr SFT2 #00 ;m32/z1 LDA ORA2 ( compute z0,z1 ) #00 STHr SFT2 #00 ;m32/z1 LDA ORA2 ( compute z0,z1 )
;m32/z2 LDA2 ;m32/z2 LDA2
JMP2r JMP2r
( shift right by 8-15 bits ) ( shift right by 8-15 bits )
@rshift32-1 ( x** n^ -> x<<n ) @rshift32-1 ( x** n^ -> x<<n )
#08 SUB STH POP #08 SUB STH POP
STHkr SFT ;m32/z3 STA ( write z3 ) STHkr SFT ;m32/z3 STA ( write z3 )
#00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 ) #00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 )
#00 STHr SFT2 #00 ;m32/z2 LDA ORA2 ( compute z1,z2 ) #00 STHr SFT2 #00 ;m32/z2 LDA ORA2 ( compute z1,z2 )
#00 TOR ;m32/z3 LDA #00 ROT ROT ;m32/z3 LDA
JMP2r JMP2r
( shift right by 16-23 bits ) ( shift right by 16-23 bits )
@ -239,7 +234,7 @@
#00 SWP STHkr SFT2 ;m32/z1 STA2 ( store z1,z2 ) #00 SWP STHkr SFT2 ;m32/z1 STA2 ( store z1,z2 )
#00 SWP STHkr SFT2 #00 ;m32/z1 LDA ORA2 ;m32/z0 STA2 ( store z0,z1 ) #00 SWP STHkr SFT2 #00 ;m32/z1 LDA ORA2 ;m32/z0 STA2 ( store z0,z1 )
STHr SFT ;m32/z0 LDA ORA ( calculate z0 ) STHr SFT ;m32/z0 LDA ORA ( calculate z0 )
SWP POP ( x0 unused ) NIP ( x0 unused )
;m32/z1 LDA2 #00 ;m32/z1 LDA2 #00
JMP2r JMP2r
@ -256,7 +251,7 @@
@lshift32-3 ( x** n^ -> x<<n ) @lshift32-3 ( x** n^ -> x<<n )
#18 SUB #40 SFT ( x0 x1 x2 x3 r=[n-24]<<4 ) #18 SUB #40 SFT ( x0 x1 x2 x3 r=[n-24]<<4 )
SFT ( x0 x1 x2 x3<<r ) SFT ( x0 x1 x2 x3<<r )
SWP2 POP2 SWP POP #0000 #00 NIP2 NIP #0000 #00
JMP2r JMP2r
( arithmetic ) ( arithmetic )
@ -265,7 +260,7 @@
@add32 ( xhi* xlo* yhi* ylo* -> zhi* zlo* ) @add32 ( xhi* xlo* yhi* ylo* -> zhi* zlo* )
;m32/y2 STA2 ;m32/y0 STA2 ( save ylo, yhi ) ;m32/y2 STA2 ;m32/y0 STA2 ( save ylo, yhi )
;m32/x2 STA2 ;m32/x0 STA2 ( save xlo, xhi ) ;m32/x2 STA2 ;m32/x0 STA2 ( save xlo, xhi )
#0000 #0000 ;m32/z0 STA2 ;m32/z2 STA2 ( reset zhi, zlo ) #0000 DUP2 ;m32/z0 STA2 ;m32/z2 STA2 ( reset zhi, zlo )
( x3 + y3 => z2z3 ) ( x3 + y3 => z2z3 )
#00 ;m32/x3 LDA #00 ;m32/y3 LDA ADD2 ;m32/z2 STA2 #00 ;m32/x3 LDA #00 ;m32/y3 LDA ADD2 ;m32/z2 STA2
@ -288,9 +283,9 @@
( -x ) ( -x )
@negate32 ( x** -> -x** ) @negate32 ( x** -> -x** )
COMPLEMENT32 ;complement32 JSR2 ( ~x** )
INC2 ( ~xhi -xlo ) INC2 ( ~xhi -xlo )
DUP2 #0000 NEQ2 ( ~xhi -xlo non-zero? ) DUP2 ORA ( ~xhi -xlo non-zero? )
,&done JCN ( xlo non-zero => don't inc hi ) ,&done JCN ( xlo non-zero => don't inc hi )
SWP2 INC2 SWP2 ( -xhi -xlo ) SWP2 INC2 SWP2 ( -xhi -xlo )
&done &done
@ -380,7 +375,7 @@
#00 DUP2 ( shift 0 shift 0 ) #00 DUP2 ( shift 0 shift 0 )
( 1<<shift -> cur ) ( 1<<shift -> cur )
#0000 #0001 ROT2 POP #0000 INC2k ROT2 POP
;lshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2 ;lshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2
( div<<shift -> div ) ( div<<shift -> div )
@ -412,10 +407,10 @@
( greatest common divisor - euclidean algorithm ) ( greatest common divisor - euclidean algorithm )
@gcd32 ( x** y** -> z** ) @gcd32 ( x** y** -> z** )
&loop ( x y ) &loop ( x y )
DUP4 ( x y y ) OVR2 OVR2 ( x y y )
;is-zero32 JSR2 ( x y y=0? ) ;is-zero32 JSR2 ( x y y=0? )
,&done JCN ( x y ) ,&done JCN ( x y )
DUP4 ( x y y ) OVR2 OVR2 ( x y y )
STH2 STH2 ( x y [y] ) STH2 STH2 ( x y [y] )
;mod32 JSR2 ( r=x%y [y] ) ;mod32 JSR2 ( r=x%y [y] )
STH2r ( rhi rlo yhi [ylo] ) STH2r ( rhi rlo yhi [ylo] )
@ -426,5 +421,5 @@
ROT2 ( yhi ylo rhi rlo ) ROT2 ( yhi ylo rhi rlo )
,&loop JMP ,&loop JMP
&done &done
POP4 ( x ) POP2 POP2 ( x )
JMP2r JMP2r