diff --git a/math32.tal b/math32.tal index 87ffd7c..0afb5df 100644 --- a/math32.tal +++ b/math32.tal @@ -27,7 +27,7 @@ ( div32 x** y** -> q** x / y ) ( mod32 x** y** -> r** x % y ) ( divmod32 x** y** -> q** r** x / y, x % y ) -( gcd32 x** y** -> z** gcd(x, y) ) +( gcd32 x** y** -> z** gcd[x, y] ) ( negate32 x** -> z** -x ) ( lshift32 x** n^ -> z** x< z** x>>n ) @@ -43,19 +43,12 @@ ( gt32 x** y** -> bool^ x > y ) ( lteq32 x** y** -> bool^ x <= y ) ( gteq32 x** y** -> bool^ x >= y ) -( bitcount8 x^ -> bool^ floor(log2(x))+1 ) -( bitcount16 x* -> bool^ floor(log2(x))+1 ) -( bitcount32 x** -> bool^ floor(log2(x))+1 ) +( bitcount8 x^ -> bool^ floor[log2[x]]+1 ) +( bitcount16 x* -> bool^ floor[log2[x]]+1 ) +( bitcount32 x** -> bool^ floor[log2[x]]+1 ) ( ) -( In addition to the code this file uses 44 bytes of registers ) -( to store temporary state: ) -( ) -( - shared memory, 16 bytes ) -( - mul32 memory, 12 bytes ) -( - z_divmod32 memory, 16 bytes ) - -( bitcount: number of bits needed to represent number ) -( equivalent to floor[log2[x]] + 1 ) +( bitcount: number of bits needed to represent the number. ) +( this is equivalent to floor[log2[x]] + 1 ) @bitcount8 ( x^ -> n^ ) LITr 00 &loop DUP ?{ POP STHr JMP2r } #01 SFT INCr !&loop @@ -66,7 +59,7 @@ @bitcount32 ( x** -> n^ ) SWP2 bitcount16 DUP ?{ POP !bitcount16 } #10 NIP2 ADD JMP2r -( equality ) +( -- equality ) ( x == y ) @eq32 ( xhi* xlo* yhi* ylo* -> bool^ ) @@ -84,7 +77,7 @@ @non-zero32 ( x** -> bool^ ) ORA2 ORA JMP2r -( comparisons ) +( -- comparisons ) ( x < y ) @lt32 ( x** y** -> bool^ ) @@ -102,10 +95,10 @@ @gteq32 ( x** y** -> bool^ ) ROT2 SWP2 LTH2 ?{ LTH2 #00 EQU JMP2r } GTH2 JMP2r -( bitwise operations ) +( -- bitwise operations ) ( x & y ) -@and32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* ) +@and32 ( xhi* xlo* yhi* ylo* -> xhi&yhi* xlo&ylo* ) ROT2 AND2 STH2 AND2 STH2r JMP2r ( x | y ) @@ -113,14 +106,14 @@ ROT2 ORA2 STH2 ORA2 STH2r JMP2r ( x ^ y ) -@xor32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* ) +@xor32 ( xhi* xlo* yhi* ylo* -> xhi^yhi* xlo^ylo* ) ROT2 EOR2 STH2 EOR2 STH2r JMP2r ( ~x ) -@complement32 ( x** -> ~x** ) +@complement32 ( x** -> ~xhi* ~xlo* ) SWP2 #ffff EOR2 SWP2 #ffff EOR2 JMP2r -( bit shifting ) +( -- bit-shifting ) ( x >> n ) @rshift32 ( x** n^ -> x>>n ) @@ -181,7 +174,7 @@ #18 SUB #40 SFT ( stash [n-24]<<4 ) SFT NIP2 NIP #0000 #00 JMP2r -( arithmetic ) +( -- arithmetic ) ( x + y ) @add32 ( xhi* xlo* yhi* ylo* -> zhi* zlo* ) @@ -228,70 +221,57 @@ LIT2 [ &z0 $2 ] ADD2 ( sum += [x0*y1+x1*y0]<<16 ) LIT2 [ &z1 $2 ] JMP2r +( x / y ) @div32 ( x** y** -> q** ) z_divmod32 ;z_divmod32/quo0 LDA2 ;z_divmod32/quo1 LDA2 JMP2r +( x % y ) @mod32 ( x** y** -> r** ) z_divmod32 ;z_divmod32/rem0 LDA2 ;z_divmod32/rem1 LDA2 JMP2r +( x / y, x % y ) @divmod32 ( x** y** -> q** r** ) z_divmod32 ;z_divmod32/quo0 LDA2 ;z_divmod32/quo1 LDA2 ;z_divmod32/rem0 LDA2 ;z_divmod32/rem1 LDA2 JMP2r -( calculate and store x / y and x % y ) +( private: calculate and store x / y and x % y ) @z_divmod32 ( x** y** -> ) - ( store y and x for repeated use ) - ,&div1 STR2 ,&div0 STR2 ( y -> div ) - ,&rem1 STR2 ,&rem0 STR2 ( x -> rem ) + ( ; store y and x for repeated use ) + #0000 DUP2 ,&quo0 STR2 ,&quo1 STR2 ( x** y** ; quo<-0 ) + STH2k ,&div1 STR2 STH2k ,&div0 STR2 ( x** [ylo* yhi*] ; div<-y ) + OVR2 OVR2 ,&rem1 STR2 ,&rem0 STR2 ( x** [ylo* yhi*] ; rem<-x ) + OVR2 OVR2 STH2r STH2r ( x** x** y** ) + OVR2 OVR2 STH2 STH2 ( x** x** y** [ylo* yhi*] ) + gteq32 ?{ POP2 POP2 POP2r POP2r JMP2r } ( x** [ylo* yhi*] ; return if x < y ) - ( if x < y then the answer is 0 ) - ,&rem0 LDR2 ,&rem1 LDR2 - ,&div0 LDR2 ,&div1 LDR2 - lt32 ?&is-zero !¬-zero - &is-zero - #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 JMP2r - - ( x >= y so the answer is >= 1 ) - ¬-zero - #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 ( 0 -> quo ) - - ( bitcount[x] - bitcount[y] determines the largest multiple of y to try ) - ,&rem0 LDR2 ,&rem1 LDR2 bitcount32 ( rbits^ ) - ,&div0 LDR2 ,&div1 LDR2 bitcount32 ( rbits^ dbits^ ) - SUB ( shift=rbits-dits ) - #00 DUP2 ( shift 0 shift 0 ) - - ( 1< cur ) - #0000 INC2k ROT2 POP - lshift32 ,&cur1 STR2 ,&cur0 STR2 - - ( div< div ) - ,&div0 LDR2 ,&div1 LDR2 ROT2 POP - lshift32 ,&div1 STR2 ,&div0 STR2 - - !&loop - - [ &div0 $2 &div1 $2 - &rem0 $2 &rem1 $2 - &quo0 $2 &quo1 $2 - &cur0 $2 &cur1 $2 ] + ( ; bitcount[x] - bitcount[y] determines largest multiple of y to try ) + bitcount32 STH2r STH2r bitcount32 SUB ( shift=rbits-dits^ ) + #00 DUP2 ( shift^ 0^ shift^ 0^ ) + #0000 INC2k ROT2 POP ( shift^ 0^ 0* 1* shift^ ) + lshift32 ,&cur1 STR2 ,&cur0 STR2 ( shift^ 0^ ; cur<-1<= the current divisor, we can subtract it and add to quotient ) - ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 lt32 ( is rem < div? ) - ?&rem-lt ( if rem < div skip this iteration ) - - ( since rem >= div, we have found a multiple of y that divides x ) - ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 sub32 ,&rem1 STR2 ,&rem0 STR2 ( rem -= div ) - ,&quo0 LDR2 ,&quo1 LDR2 ,&cur0 LDR2 ,&cur1 LDR2 add32 ,&quo1 STR2 ,&quo0 STR2 ( quo += cur ) - - &rem-lt - ,&div0 LDR2 ,&div1 LDR2 #01 rshift32 ,&div1 STR2 ,&div0 STR2 ( div >>= 1 ) - ,&cur0 LDR2 ,&cur1 LDR2 #01 rshift32 ,&cur1 STR2 ,&cur0 STR2 ( cur >>= 1 ) - ,&cur0 LDR2 ,&cur1 LDR2 non-zero32 ?&loop ( if cur>0, loop. else we're done ) - JMP2r + ( ; if rem >= cur [current divisor], we can subtract it and add to quotient ) + ( ; otherwise, skip that iteration and reduce cur. ) + LIT2 [ &rem0 $2 ] LIT2 [ &rem1 $2 ] ,&div0 LDR2 ,&div1 LDR2 + lt32 ?{ + ( ; since rem >= div, we have found a multiple of y that divides x ) + ,&rem0 LDR2 ,&rem1 LDR2 ( rem** ) + LIT2 [ &div0 $2 ] LIT2 [ &div1 $2 ] ( rem** div** ) + sub32 ,&rem1 STR2 ,&rem0 STR2 ( ; rem<-rem-div** ) + LIT2 [ &quo0 $2 ] LIT2 [ &quo1 $2 ] ( quo** ) + LIT2 [ &cur0 $2 ] LIT2 [ &cur1 $2 ] ( quo** cur** ) + add32 ,&quo1 STR2 ,&quo0 STR2 ( ; quo<-quo+cur** ) + } + ,&div0 LDR2 ,&div1 LDR2 #01 rshift32 ( div>>1** ) + ,&div1 STR2 ,&div0 STR2 ( ; div<-div>>1 ) + ,&cur0 LDR2 ,&cur1 LDR2 #01 rshift32 ( cur>>1** ) + OVR2 OVR2 ,&cur1 STR2 ,&cur0 STR2 ( cur>>1** ; cur<-cur>>1 ) + non-zero32 ?&loop JMP2r ( ; loop if cur>0, else we're done ) ( greatest common divisor - euclidean algorithm ) @gcd32 ( x** y** -> z** )