neauoire's optimizations
This commit is contained in:
parent
0f61f352bb
commit
8d7a1675f8
43
math32.tal
43
math32.tal
|
@ -54,11 +54,6 @@
|
|||
( - mul32 memory, 12 bytes )
|
||||
( - _divmod32 memory, 16 bytes )
|
||||
|
||||
%TOR { ROT ROT } ( a b c -> c a b )
|
||||
%COMPLEMENT32 { SWP2 #ffff EOR2 SWP2 #ffff EOR2 }
|
||||
%DUP4 { OVR2 OVR2 }
|
||||
%POP4 { POP2 POP2 }
|
||||
|
||||
( bitcount: number of bits needed to represent number )
|
||||
( equivalent to floor[log2[x]] + 1 )
|
||||
|
||||
|
@ -82,7 +77,7 @@
|
|||
SWP ;bitcount8 JSR2 ADD ( nhi+nlo )
|
||||
JMP2r
|
||||
&hi-set
|
||||
SWP POP #08 ADD ( nhi+8 )
|
||||
NIP #08 ADD ( nhi+8 )
|
||||
JMP2r
|
||||
|
||||
@bitcount32 ( x** -> n^ )
|
||||
|
@ -90,9 +85,9 @@
|
|||
;bitcount16 JSR2 ( xlo* nhi )
|
||||
DUP #00 NEQ ( xlo* nhi nhi!=0 )
|
||||
,&hi-set JCN ( xlo* nhi )
|
||||
TOR ;bitcount16 JSR2 ADD JMP2r ( nhi+nlo )
|
||||
ROT ROT ;bitcount16 JSR2 ADD JMP2r ( nhi+nlo )
|
||||
&hi-set
|
||||
TOR POP2 #10 ADD ( nhi+16 )
|
||||
ROT ROT POP2 #10 ADD ( nhi+16 )
|
||||
JMP2r
|
||||
|
||||
( equality )
|
||||
|
@ -113,7 +108,7 @@
|
|||
|
||||
( x != 0 )
|
||||
@non-zero32 ( x** -> bool^ )
|
||||
ORA2 #0000 NEQ2 JMP2r
|
||||
ORA2 ORA JMP2r
|
||||
|
||||
( comparisons )
|
||||
|
||||
|
@ -165,7 +160,7 @@
|
|||
|
||||
( ~x )
|
||||
@complement32 ( x** -> ~x** )
|
||||
COMPLEMENT32 JMP2r
|
||||
SWP2 #ffff EOR2 SWP2 #ffff EOR2 JMP2r
|
||||
|
||||
( temporary registers )
|
||||
( shared by most operations, except mul32 and div32 )
|
||||
|
@ -185,20 +180,20 @@
|
|||
|
||||
( shift right by 0-7 bits )
|
||||
@rshift32-0 ( x** n^ -> x<<n )
|
||||
STHk SFT ;m32/z3 STA ( write z3 )
|
||||
STHk SFT ;m32/z3 STA ( write z3 )
|
||||
#00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 )
|
||||
#00 STHkr SFT2 #00 ;m32/z2 LDA ORA2 ;m32/z1 STA2 ( write z1,z2 )
|
||||
#00 STHr SFT2 #00 ;m32/z1 LDA ORA2 ( compute z0,z1 )
|
||||
#00 STHr SFT2 #00 ;m32/z1 LDA ORA2 ( compute z0,z1 )
|
||||
;m32/z2 LDA2
|
||||
JMP2r
|
||||
|
||||
( shift right by 8-15 bits )
|
||||
@rshift32-1 ( x** n^ -> x<<n )
|
||||
#08 SUB STH POP
|
||||
STHkr SFT ;m32/z3 STA ( write z3 )
|
||||
STHkr SFT ;m32/z3 STA ( write z3 )
|
||||
#00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 )
|
||||
#00 STHr SFT2 #00 ;m32/z2 LDA ORA2 ( compute z1,z2 )
|
||||
#00 TOR ;m32/z3 LDA
|
||||
#00 STHr SFT2 #00 ;m32/z2 LDA ORA2 ( compute z1,z2 )
|
||||
#00 ROT ROT ;m32/z3 LDA
|
||||
JMP2r
|
||||
|
||||
( shift right by 16-23 bits )
|
||||
|
@ -239,7 +234,7 @@
|
|||
#00 SWP STHkr SFT2 ;m32/z1 STA2 ( store z1,z2 )
|
||||
#00 SWP STHkr SFT2 #00 ;m32/z1 LDA ORA2 ;m32/z0 STA2 ( store z0,z1 )
|
||||
STHr SFT ;m32/z0 LDA ORA ( calculate z0 )
|
||||
SWP POP ( x0 unused )
|
||||
NIP ( x0 unused )
|
||||
;m32/z1 LDA2 #00
|
||||
JMP2r
|
||||
|
||||
|
@ -256,7 +251,7 @@
|
|||
@lshift32-3 ( x** n^ -> x<<n )
|
||||
#18 SUB #40 SFT ( x0 x1 x2 x3 r=[n-24]<<4 )
|
||||
SFT ( x0 x1 x2 x3<<r )
|
||||
SWP2 POP2 SWP POP #0000 #00
|
||||
NIP2 NIP #0000 #00
|
||||
JMP2r
|
||||
|
||||
( arithmetic )
|
||||
|
@ -265,7 +260,7 @@
|
|||
@add32 ( xhi* xlo* yhi* ylo* -> zhi* zlo* )
|
||||
;m32/y2 STA2 ;m32/y0 STA2 ( save ylo, yhi )
|
||||
;m32/x2 STA2 ;m32/x0 STA2 ( save xlo, xhi )
|
||||
#0000 #0000 ;m32/z0 STA2 ;m32/z2 STA2 ( reset zhi, zlo )
|
||||
#0000 DUP2 ;m32/z0 STA2 ;m32/z2 STA2 ( reset zhi, zlo )
|
||||
|
||||
( x3 + y3 => z2z3 )
|
||||
#00 ;m32/x3 LDA #00 ;m32/y3 LDA ADD2 ;m32/z2 STA2
|
||||
|
@ -288,9 +283,9 @@
|
|||
|
||||
( -x )
|
||||
@negate32 ( x** -> -x** )
|
||||
COMPLEMENT32
|
||||
;complement32 JSR2 ( ~x** )
|
||||
INC2 ( ~xhi -xlo )
|
||||
DUP2 #0000 NEQ2 ( ~xhi -xlo non-zero? )
|
||||
DUP2 ORA ( ~xhi -xlo non-zero? )
|
||||
,&done JCN ( xlo non-zero => don't inc hi )
|
||||
SWP2 INC2 SWP2 ( -xhi -xlo )
|
||||
&done
|
||||
|
@ -380,7 +375,7 @@
|
|||
#00 DUP2 ( shift 0 shift 0 )
|
||||
|
||||
( 1<<shift -> cur )
|
||||
#0000 #0001 ROT2 POP
|
||||
#0000 INC2k ROT2 POP
|
||||
;lshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2
|
||||
|
||||
( div<<shift -> div )
|
||||
|
@ -412,10 +407,10 @@
|
|||
( greatest common divisor - euclidean algorithm )
|
||||
@gcd32 ( x** y** -> z** )
|
||||
&loop ( x y )
|
||||
DUP4 ( x y y )
|
||||
OVR2 OVR2 ( x y y )
|
||||
;is-zero32 JSR2 ( x y y=0? )
|
||||
,&done JCN ( x y )
|
||||
DUP4 ( x y y )
|
||||
OVR2 OVR2 ( x y y )
|
||||
STH2 STH2 ( x y [y] )
|
||||
;mod32 JSR2 ( r=x%y [y] )
|
||||
STH2r ( rhi rlo yhi [ylo] )
|
||||
|
@ -426,5 +421,5 @@
|
|||
ROT2 ( yhi ylo rhi rlo )
|
||||
,&loop JMP
|
||||
&done
|
||||
POP4 ( x )
|
||||
POP2 POP2 ( x )
|
||||
JMP2r
|
||||
|
|
Loading…
Reference in New Issue