optimize division
This commit is contained in:
parent
36a5ca5212
commit
436f6dc7bf
118
math32.tal
118
math32.tal
|
@ -27,7 +27,7 @@
|
|||
( div32 x** y** -> q** x / y )
|
||||
( mod32 x** y** -> r** x % y )
|
||||
( divmod32 x** y** -> q** r** x / y, x % y )
|
||||
( gcd32 x** y** -> z** gcd(x, y) )
|
||||
( gcd32 x** y** -> z** gcd[x, y] )
|
||||
( negate32 x** -> z** -x )
|
||||
( lshift32 x** n^ -> z** x<<n )
|
||||
( rshift32 x** n^ -> z** x>>n )
|
||||
|
@ -43,19 +43,12 @@
|
|||
( gt32 x** y** -> bool^ x > y )
|
||||
( lteq32 x** y** -> bool^ x <= y )
|
||||
( gteq32 x** y** -> bool^ x >= y )
|
||||
( bitcount8 x^ -> bool^ floor(log2(x))+1 )
|
||||
( bitcount16 x* -> bool^ floor(log2(x))+1 )
|
||||
( bitcount32 x** -> bool^ floor(log2(x))+1 )
|
||||
( bitcount8 x^ -> bool^ floor[log2[x]]+1 )
|
||||
( bitcount16 x* -> bool^ floor[log2[x]]+1 )
|
||||
( bitcount32 x** -> bool^ floor[log2[x]]+1 )
|
||||
( )
|
||||
( In addition to the code this file uses 44 bytes of registers )
|
||||
( to store temporary state: )
|
||||
( )
|
||||
( - shared memory, 16 bytes )
|
||||
( - mul32 memory, 12 bytes )
|
||||
( - z_divmod32 memory, 16 bytes )
|
||||
|
||||
( bitcount: number of bits needed to represent number )
|
||||
( equivalent to floor[log2[x]] + 1 )
|
||||
( bitcount: number of bits needed to represent the number. )
|
||||
( this is equivalent to floor[log2[x]] + 1 )
|
||||
|
||||
@bitcount8 ( x^ -> n^ )
|
||||
LITr 00 &loop DUP ?{ POP STHr JMP2r } #01 SFT INCr !&loop
|
||||
|
@ -66,7 +59,7 @@
|
|||
@bitcount32 ( x** -> n^ )
|
||||
SWP2 bitcount16 DUP ?{ POP !bitcount16 } #10 NIP2 ADD JMP2r
|
||||
|
||||
( equality )
|
||||
( -- equality )
|
||||
|
||||
( x == y )
|
||||
@eq32 ( xhi* xlo* yhi* ylo* -> bool^ )
|
||||
|
@ -84,7 +77,7 @@
|
|||
@non-zero32 ( x** -> bool^ )
|
||||
ORA2 ORA JMP2r
|
||||
|
||||
( comparisons )
|
||||
( -- comparisons )
|
||||
|
||||
( x < y )
|
||||
@lt32 ( x** y** -> bool^ )
|
||||
|
@ -102,10 +95,10 @@
|
|||
@gteq32 ( x** y** -> bool^ )
|
||||
ROT2 SWP2 LTH2 ?{ LTH2 #00 EQU JMP2r } GTH2 JMP2r
|
||||
|
||||
( bitwise operations )
|
||||
( -- bitwise operations )
|
||||
|
||||
( x & y )
|
||||
@and32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
|
||||
@and32 ( xhi* xlo* yhi* ylo* -> xhi&yhi* xlo&ylo* )
|
||||
ROT2 AND2 STH2 AND2 STH2r JMP2r
|
||||
|
||||
( x | y )
|
||||
|
@ -113,14 +106,14 @@
|
|||
ROT2 ORA2 STH2 ORA2 STH2r JMP2r
|
||||
|
||||
( x ^ y )
|
||||
@xor32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
|
||||
@xor32 ( xhi* xlo* yhi* ylo* -> xhi^yhi* xlo^ylo* )
|
||||
ROT2 EOR2 STH2 EOR2 STH2r JMP2r
|
||||
|
||||
( ~x )
|
||||
@complement32 ( x** -> ~x** )
|
||||
@complement32 ( x** -> ~xhi* ~xlo* )
|
||||
SWP2 #ffff EOR2 SWP2 #ffff EOR2 JMP2r
|
||||
|
||||
( bit shifting )
|
||||
( -- bit-shifting )
|
||||
|
||||
( x >> n )
|
||||
@rshift32 ( x** n^ -> x>>n )
|
||||
|
@ -181,7 +174,7 @@
|
|||
#18 SUB #40 SFT ( stash [n-24]<<4 )
|
||||
SFT NIP2 NIP #0000 #00 JMP2r
|
||||
|
||||
( arithmetic )
|
||||
( -- arithmetic )
|
||||
|
||||
( x + y )
|
||||
@add32 ( xhi* xlo* yhi* ylo* -> zhi* zlo* )
|
||||
|
@ -228,70 +221,57 @@
|
|||
LIT2 [ &z0 $2 ] ADD2 ( sum += [x0*y1+x1*y0]<<16 )
|
||||
LIT2 [ &z1 $2 ] JMP2r
|
||||
|
||||
( x / y )
|
||||
@div32 ( x** y** -> q** )
|
||||
z_divmod32 ;z_divmod32/quo0 LDA2 ;z_divmod32/quo1 LDA2 JMP2r
|
||||
|
||||
( x % y )
|
||||
@mod32 ( x** y** -> r** )
|
||||
z_divmod32 ;z_divmod32/rem0 LDA2 ;z_divmod32/rem1 LDA2 JMP2r
|
||||
|
||||
( x / y, x % y )
|
||||
@divmod32 ( x** y** -> q** r** )
|
||||
z_divmod32
|
||||
;z_divmod32/quo0 LDA2 ;z_divmod32/quo1 LDA2
|
||||
;z_divmod32/rem0 LDA2 ;z_divmod32/rem1 LDA2
|
||||
JMP2r
|
||||
|
||||
( calculate and store x / y and x % y )
|
||||
( private: calculate and store x / y and x % y )
|
||||
@z_divmod32 ( x** y** -> )
|
||||
( store y and x for repeated use )
|
||||
,&div1 STR2 ,&div0 STR2 ( y -> div )
|
||||
,&rem1 STR2 ,&rem0 STR2 ( x -> rem )
|
||||
( ; store y and x for repeated use )
|
||||
#0000 DUP2 ,&quo0 STR2 ,&quo1 STR2 ( x** y** ; quo<-0 )
|
||||
STH2k ,&div1 STR2 STH2k ,&div0 STR2 ( x** [ylo* yhi*] ; div<-y )
|
||||
OVR2 OVR2 ,&rem1 STR2 ,&rem0 STR2 ( x** [ylo* yhi*] ; rem<-x )
|
||||
OVR2 OVR2 STH2r STH2r ( x** x** y** )
|
||||
OVR2 OVR2 STH2 STH2 ( x** x** y** [ylo* yhi*] )
|
||||
gteq32 ?{ POP2 POP2 POP2r POP2r JMP2r } ( x** [ylo* yhi*] ; return if x < y )
|
||||
|
||||
( if x < y then the answer is 0 )
|
||||
,&rem0 LDR2 ,&rem1 LDR2
|
||||
,&div0 LDR2 ,&div1 LDR2
|
||||
lt32 ?&is-zero !¬-zero
|
||||
&is-zero
|
||||
#0000 ,&quo0 STR2 #0000 ,&quo1 STR2 JMP2r
|
||||
|
||||
( x >= y so the answer is >= 1 )
|
||||
¬-zero
|
||||
#0000 ,&quo0 STR2 #0000 ,&quo1 STR2 ( 0 -> quo )
|
||||
|
||||
( bitcount[x] - bitcount[y] determines the largest multiple of y to try )
|
||||
,&rem0 LDR2 ,&rem1 LDR2 bitcount32 ( rbits^ )
|
||||
,&div0 LDR2 ,&div1 LDR2 bitcount32 ( rbits^ dbits^ )
|
||||
SUB ( shift=rbits-dits )
|
||||
#00 DUP2 ( shift 0 shift 0 )
|
||||
|
||||
( 1<<shift -> cur )
|
||||
#0000 INC2k ROT2 POP
|
||||
lshift32 ,&cur1 STR2 ,&cur0 STR2
|
||||
|
||||
( div<<shift -> div )
|
||||
,&div0 LDR2 ,&div1 LDR2 ROT2 POP
|
||||
lshift32 ,&div1 STR2 ,&div0 STR2
|
||||
|
||||
!&loop
|
||||
|
||||
[ &div0 $2 &div1 $2
|
||||
&rem0 $2 &rem1 $2
|
||||
&quo0 $2 &quo1 $2
|
||||
&cur0 $2 &cur1 $2 ]
|
||||
( ; bitcount[x] - bitcount[y] determines largest multiple of y to try )
|
||||
bitcount32 STH2r STH2r bitcount32 SUB ( shift=rbits-dits^ )
|
||||
#00 DUP2 ( shift^ 0^ shift^ 0^ )
|
||||
#0000 INC2k ROT2 POP ( shift^ 0^ 0* 1* shift^ )
|
||||
lshift32 ,&cur1 STR2 ,&cur0 STR2 ( shift^ 0^ ; cur<-1<<shift )
|
||||
,&div0 LDR2 ,&div1 LDR2 ROT2 POP ( div** shift^ )
|
||||
lshift32 ,&div1 STR2 ,&div0 STR2 ( ; div<-div<<shift )
|
||||
|
||||
&loop
|
||||
( if rem >= the current divisor, we can subtract it and add to quotient )
|
||||
,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 lt32 ( is rem < div? )
|
||||
?&rem-lt ( if rem < div skip this iteration )
|
||||
|
||||
( since rem >= div, we have found a multiple of y that divides x )
|
||||
,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 sub32 ,&rem1 STR2 ,&rem0 STR2 ( rem -= div )
|
||||
,&quo0 LDR2 ,&quo1 LDR2 ,&cur0 LDR2 ,&cur1 LDR2 add32 ,&quo1 STR2 ,&quo0 STR2 ( quo += cur )
|
||||
|
||||
&rem-lt
|
||||
,&div0 LDR2 ,&div1 LDR2 #01 rshift32 ,&div1 STR2 ,&div0 STR2 ( div >>= 1 )
|
||||
,&cur0 LDR2 ,&cur1 LDR2 #01 rshift32 ,&cur1 STR2 ,&cur0 STR2 ( cur >>= 1 )
|
||||
,&cur0 LDR2 ,&cur1 LDR2 non-zero32 ?&loop ( if cur>0, loop. else we're done )
|
||||
JMP2r
|
||||
( ; if rem >= cur [current divisor], we can subtract it and add to quotient )
|
||||
( ; otherwise, skip that iteration and reduce cur. )
|
||||
LIT2 [ &rem0 $2 ] LIT2 [ &rem1 $2 ] ,&div0 LDR2 ,&div1 LDR2
|
||||
lt32 ?{
|
||||
( ; since rem >= div, we have found a multiple of y that divides x )
|
||||
,&rem0 LDR2 ,&rem1 LDR2 ( rem** )
|
||||
LIT2 [ &div0 $2 ] LIT2 [ &div1 $2 ] ( rem** div** )
|
||||
sub32 ,&rem1 STR2 ,&rem0 STR2 ( ; rem<-rem-div** )
|
||||
LIT2 [ &quo0 $2 ] LIT2 [ &quo1 $2 ] ( quo** )
|
||||
LIT2 [ &cur0 $2 ] LIT2 [ &cur1 $2 ] ( quo** cur** )
|
||||
add32 ,&quo1 STR2 ,&quo0 STR2 ( ; quo<-quo+cur** )
|
||||
}
|
||||
,&div0 LDR2 ,&div1 LDR2 #01 rshift32 ( div>>1** )
|
||||
,&div1 STR2 ,&div0 STR2 ( ; div<-div>>1 )
|
||||
,&cur0 LDR2 ,&cur1 LDR2 #01 rshift32 ( cur>>1** )
|
||||
OVR2 OVR2 ,&cur1 STR2 ,&cur0 STR2 ( cur>>1** ; cur<-cur>>1 )
|
||||
non-zero32 ?&loop JMP2r ( ; loop if cur>0, else we're done )
|
||||
|
||||
( greatest common divisor - euclidean algorithm )
|
||||
@gcd32 ( x** y** -> z** )
|
||||
|
|
Loading…
Reference in New Issue