neauoire's optimizations
This commit is contained in:
parent
0f61f352bb
commit
8d7a1675f8
43
math32.tal
43
math32.tal
|
@ -54,11 +54,6 @@
|
||||||
( - mul32 memory, 12 bytes )
|
( - mul32 memory, 12 bytes )
|
||||||
( - _divmod32 memory, 16 bytes )
|
( - _divmod32 memory, 16 bytes )
|
||||||
|
|
||||||
%TOR { ROT ROT } ( a b c -> c a b )
|
|
||||||
%COMPLEMENT32 { SWP2 #ffff EOR2 SWP2 #ffff EOR2 }
|
|
||||||
%DUP4 { OVR2 OVR2 }
|
|
||||||
%POP4 { POP2 POP2 }
|
|
||||||
|
|
||||||
( bitcount: number of bits needed to represent number )
|
( bitcount: number of bits needed to represent number )
|
||||||
( equivalent to floor[log2[x]] + 1 )
|
( equivalent to floor[log2[x]] + 1 )
|
||||||
|
|
||||||
|
@ -82,7 +77,7 @@
|
||||||
SWP ;bitcount8 JSR2 ADD ( nhi+nlo )
|
SWP ;bitcount8 JSR2 ADD ( nhi+nlo )
|
||||||
JMP2r
|
JMP2r
|
||||||
&hi-set
|
&hi-set
|
||||||
SWP POP #08 ADD ( nhi+8 )
|
NIP #08 ADD ( nhi+8 )
|
||||||
JMP2r
|
JMP2r
|
||||||
|
|
||||||
@bitcount32 ( x** -> n^ )
|
@bitcount32 ( x** -> n^ )
|
||||||
|
@ -90,9 +85,9 @@
|
||||||
;bitcount16 JSR2 ( xlo* nhi )
|
;bitcount16 JSR2 ( xlo* nhi )
|
||||||
DUP #00 NEQ ( xlo* nhi nhi!=0 )
|
DUP #00 NEQ ( xlo* nhi nhi!=0 )
|
||||||
,&hi-set JCN ( xlo* nhi )
|
,&hi-set JCN ( xlo* nhi )
|
||||||
TOR ;bitcount16 JSR2 ADD JMP2r ( nhi+nlo )
|
ROT ROT ;bitcount16 JSR2 ADD JMP2r ( nhi+nlo )
|
||||||
&hi-set
|
&hi-set
|
||||||
TOR POP2 #10 ADD ( nhi+16 )
|
ROT ROT POP2 #10 ADD ( nhi+16 )
|
||||||
JMP2r
|
JMP2r
|
||||||
|
|
||||||
( equality )
|
( equality )
|
||||||
|
@ -113,7 +108,7 @@
|
||||||
|
|
||||||
( x != 0 )
|
( x != 0 )
|
||||||
@non-zero32 ( x** -> bool^ )
|
@non-zero32 ( x** -> bool^ )
|
||||||
ORA2 #0000 NEQ2 JMP2r
|
ORA2 ORA JMP2r
|
||||||
|
|
||||||
( comparisons )
|
( comparisons )
|
||||||
|
|
||||||
|
@ -165,7 +160,7 @@
|
||||||
|
|
||||||
( ~x )
|
( ~x )
|
||||||
@complement32 ( x** -> ~x** )
|
@complement32 ( x** -> ~x** )
|
||||||
COMPLEMENT32 JMP2r
|
SWP2 #ffff EOR2 SWP2 #ffff EOR2 JMP2r
|
||||||
|
|
||||||
( temporary registers )
|
( temporary registers )
|
||||||
( shared by most operations, except mul32 and div32 )
|
( shared by most operations, except mul32 and div32 )
|
||||||
|
@ -185,20 +180,20 @@
|
||||||
|
|
||||||
( shift right by 0-7 bits )
|
( shift right by 0-7 bits )
|
||||||
@rshift32-0 ( x** n^ -> x<<n )
|
@rshift32-0 ( x** n^ -> x<<n )
|
||||||
STHk SFT ;m32/z3 STA ( write z3 )
|
STHk SFT ;m32/z3 STA ( write z3 )
|
||||||
#00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 )
|
#00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 )
|
||||||
#00 STHkr SFT2 #00 ;m32/z2 LDA ORA2 ;m32/z1 STA2 ( write z1,z2 )
|
#00 STHkr SFT2 #00 ;m32/z2 LDA ORA2 ;m32/z1 STA2 ( write z1,z2 )
|
||||||
#00 STHr SFT2 #00 ;m32/z1 LDA ORA2 ( compute z0,z1 )
|
#00 STHr SFT2 #00 ;m32/z1 LDA ORA2 ( compute z0,z1 )
|
||||||
;m32/z2 LDA2
|
;m32/z2 LDA2
|
||||||
JMP2r
|
JMP2r
|
||||||
|
|
||||||
( shift right by 8-15 bits )
|
( shift right by 8-15 bits )
|
||||||
@rshift32-1 ( x** n^ -> x<<n )
|
@rshift32-1 ( x** n^ -> x<<n )
|
||||||
#08 SUB STH POP
|
#08 SUB STH POP
|
||||||
STHkr SFT ;m32/z3 STA ( write z3 )
|
STHkr SFT ;m32/z3 STA ( write z3 )
|
||||||
#00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 )
|
#00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 )
|
||||||
#00 STHr SFT2 #00 ;m32/z2 LDA ORA2 ( compute z1,z2 )
|
#00 STHr SFT2 #00 ;m32/z2 LDA ORA2 ( compute z1,z2 )
|
||||||
#00 TOR ;m32/z3 LDA
|
#00 ROT ROT ;m32/z3 LDA
|
||||||
JMP2r
|
JMP2r
|
||||||
|
|
||||||
( shift right by 16-23 bits )
|
( shift right by 16-23 bits )
|
||||||
|
@ -239,7 +234,7 @@
|
||||||
#00 SWP STHkr SFT2 ;m32/z1 STA2 ( store z1,z2 )
|
#00 SWP STHkr SFT2 ;m32/z1 STA2 ( store z1,z2 )
|
||||||
#00 SWP STHkr SFT2 #00 ;m32/z1 LDA ORA2 ;m32/z0 STA2 ( store z0,z1 )
|
#00 SWP STHkr SFT2 #00 ;m32/z1 LDA ORA2 ;m32/z0 STA2 ( store z0,z1 )
|
||||||
STHr SFT ;m32/z0 LDA ORA ( calculate z0 )
|
STHr SFT ;m32/z0 LDA ORA ( calculate z0 )
|
||||||
SWP POP ( x0 unused )
|
NIP ( x0 unused )
|
||||||
;m32/z1 LDA2 #00
|
;m32/z1 LDA2 #00
|
||||||
JMP2r
|
JMP2r
|
||||||
|
|
||||||
|
@ -256,7 +251,7 @@
|
||||||
@lshift32-3 ( x** n^ -> x<<n )
|
@lshift32-3 ( x** n^ -> x<<n )
|
||||||
#18 SUB #40 SFT ( x0 x1 x2 x3 r=[n-24]<<4 )
|
#18 SUB #40 SFT ( x0 x1 x2 x3 r=[n-24]<<4 )
|
||||||
SFT ( x0 x1 x2 x3<<r )
|
SFT ( x0 x1 x2 x3<<r )
|
||||||
SWP2 POP2 SWP POP #0000 #00
|
NIP2 NIP #0000 #00
|
||||||
JMP2r
|
JMP2r
|
||||||
|
|
||||||
( arithmetic )
|
( arithmetic )
|
||||||
|
@ -265,7 +260,7 @@
|
||||||
@add32 ( xhi* xlo* yhi* ylo* -> zhi* zlo* )
|
@add32 ( xhi* xlo* yhi* ylo* -> zhi* zlo* )
|
||||||
;m32/y2 STA2 ;m32/y0 STA2 ( save ylo, yhi )
|
;m32/y2 STA2 ;m32/y0 STA2 ( save ylo, yhi )
|
||||||
;m32/x2 STA2 ;m32/x0 STA2 ( save xlo, xhi )
|
;m32/x2 STA2 ;m32/x0 STA2 ( save xlo, xhi )
|
||||||
#0000 #0000 ;m32/z0 STA2 ;m32/z2 STA2 ( reset zhi, zlo )
|
#0000 DUP2 ;m32/z0 STA2 ;m32/z2 STA2 ( reset zhi, zlo )
|
||||||
|
|
||||||
( x3 + y3 => z2z3 )
|
( x3 + y3 => z2z3 )
|
||||||
#00 ;m32/x3 LDA #00 ;m32/y3 LDA ADD2 ;m32/z2 STA2
|
#00 ;m32/x3 LDA #00 ;m32/y3 LDA ADD2 ;m32/z2 STA2
|
||||||
|
@ -288,9 +283,9 @@
|
||||||
|
|
||||||
( -x )
|
( -x )
|
||||||
@negate32 ( x** -> -x** )
|
@negate32 ( x** -> -x** )
|
||||||
COMPLEMENT32
|
;complement32 JSR2 ( ~x** )
|
||||||
INC2 ( ~xhi -xlo )
|
INC2 ( ~xhi -xlo )
|
||||||
DUP2 #0000 NEQ2 ( ~xhi -xlo non-zero? )
|
DUP2 ORA ( ~xhi -xlo non-zero? )
|
||||||
,&done JCN ( xlo non-zero => don't inc hi )
|
,&done JCN ( xlo non-zero => don't inc hi )
|
||||||
SWP2 INC2 SWP2 ( -xhi -xlo )
|
SWP2 INC2 SWP2 ( -xhi -xlo )
|
||||||
&done
|
&done
|
||||||
|
@ -380,7 +375,7 @@
|
||||||
#00 DUP2 ( shift 0 shift 0 )
|
#00 DUP2 ( shift 0 shift 0 )
|
||||||
|
|
||||||
( 1<<shift -> cur )
|
( 1<<shift -> cur )
|
||||||
#0000 #0001 ROT2 POP
|
#0000 INC2k ROT2 POP
|
||||||
;lshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2
|
;lshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2
|
||||||
|
|
||||||
( div<<shift -> div )
|
( div<<shift -> div )
|
||||||
|
@ -412,10 +407,10 @@
|
||||||
( greatest common divisor - euclidean algorithm )
|
( greatest common divisor - euclidean algorithm )
|
||||||
@gcd32 ( x** y** -> z** )
|
@gcd32 ( x** y** -> z** )
|
||||||
&loop ( x y )
|
&loop ( x y )
|
||||||
DUP4 ( x y y )
|
OVR2 OVR2 ( x y y )
|
||||||
;is-zero32 JSR2 ( x y y=0? )
|
;is-zero32 JSR2 ( x y y=0? )
|
||||||
,&done JCN ( x y )
|
,&done JCN ( x y )
|
||||||
DUP4 ( x y y )
|
OVR2 OVR2 ( x y y )
|
||||||
STH2 STH2 ( x y [y] )
|
STH2 STH2 ( x y [y] )
|
||||||
;mod32 JSR2 ( r=x%y [y] )
|
;mod32 JSR2 ( r=x%y [y] )
|
||||||
STH2r ( rhi rlo yhi [ylo] )
|
STH2r ( rhi rlo yhi [ylo] )
|
||||||
|
@ -426,5 +421,5 @@
|
||||||
ROT2 ( yhi ylo rhi rlo )
|
ROT2 ( yhi ylo rhi rlo )
|
||||||
,&loop JMP
|
,&loop JMP
|
||||||
&done
|
&done
|
||||||
POP4 ( x )
|
POP2 POP2 ( x )
|
||||||
JMP2r
|
JMP2r
|
||||||
|
|
Loading…
Reference in New Issue