From 186a14d4436921e60d2c0f1b5cccedf7cdea47ad Mon Sep 17 00:00:00 2001 From: d_m Date: Sat, 28 Oct 2023 17:05:34 -0400 Subject: [PATCH] new, improved, optimized --- math32.tal | 296 ++++++++++++++++------------------------------------- 1 file changed, 87 insertions(+), 209 deletions(-) diff --git a/math32.tal b/math32.tal index 8dbc54a..87ffd7c 100644 --- a/math32.tal +++ b/math32.tal @@ -52,55 +52,29 @@ ( ) ( - shared memory, 16 bytes ) ( - mul32 memory, 12 bytes ) -( - _divmod32 memory, 16 bytes ) +( - z_divmod32 memory, 16 bytes ) ( bitcount: number of bits needed to represent number ) ( equivalent to floor[log2[x]] + 1 ) @bitcount8 ( x^ -> n^ ) - #00 SWP ( n x ) - &loop - DUP #00 EQU ( n x x=0 ) - ,&done JCN ( n x ) - #01 SFT ( n x>>1 ) - SWP INC SWP ( n+1 x>>1 ) - ,&loop JMP - &done - POP ( n ) - JMP2r + LITr 00 &loop DUP ?{ POP STHr JMP2r } #01 SFT INCr !&loop @bitcount16 ( x* -> n^ ) - SWP ( xlo xhi ) - ;bitcount8 JSR2 ( xlo nhi ) - DUP #00 NEQ ( xlo nhi nhi!=0 ) - ,&hi-set JCN ( xlo nhi ) - SWP ;bitcount8 JSR2 ADD ( nhi+nlo ) - JMP2r - &hi-set - NIP #08 ADD ( nhi+8 ) - JMP2r + LITr 00 &loop ORAk ?{ POP2 STHr JMP2r } #01 SFT2 INCr !&loop @bitcount32 ( x** -> n^ ) - SWP2 ( xlo* xhi* ) - ;bitcount16 JSR2 ( xlo* nhi ) - DUP #00 NEQ ( xlo* nhi nhi!=0 ) - ,&hi-set JCN ( xlo* nhi ) - ROT ROT ;bitcount16 JSR2 ADD JMP2r ( nhi+nlo ) - &hi-set - ROT ROT POP2 #10 ADD ( nhi+16 ) - JMP2r + SWP2 bitcount16 DUP ?{ POP !bitcount16 } #10 NIP2 ADD JMP2r ( equality ) ( x == y ) @eq32 ( xhi* xlo* yhi* ylo* -> bool^ ) - ROT2 EQU2 STH - EQU2 STHr AND JMP2r + ROT2 EQU2 STH EQU2 STHr AND JMP2r ( x != y ) @ne32 ( xhi* xlo* yhi* ylo* -> bool^ ) - ROT2 NEQ2 STH - NEQ2 STHr ORA JMP2r + ROT2 NEQ2 STH NEQ2 STHr ORA JMP2r ( x == 0 ) @is-zero32 ( x** -> bool^ ) @@ -114,35 +88,19 @@ ( x < y ) @lt32 ( x** y** -> bool^ ) - ROT2 SWP2 ( xhi yhi xlo ylo ) - LTH2 ,<-lo JCN ( xhi yhi ) - LTH2 JMP2r - <-lo - GTH2 #00 EQU JMP2r + ROT2 SWP2 LTH2 ?{ LTH2 JMP2r } GTH2 #00 EQU JMP2r ( x <= y ) @lteq32 ( x** y** -> bool^ ) - ROT2 SWP2 ( xhi yhi xlo ylo ) - GTH2 ,>-lo JCN ( xhi yhi ) - GTH2 #00 EQU JMP2r - >-lo - LTH2 JMP2r + ROT2 SWP2 GTH2 ?{ GTH2 #00 EQU JMP2r } LTH2 JMP2r ( x > y ) @gt32 ( x** y** -> bool^ ) - ROT2 SWP2 ( xhi yhi xlo ylo ) - GTH2 ,>-lo JCN ( xhi yhi ) - GTH2 JMP2r - >-lo - LTH2 #00 EQU JMP2r + ROT2 SWP2 GTH2 ?{ GTH2 JMP2r } LTH2 #00 EQU JMP2r ( x > y ) @gteq32 ( x** y** -> bool^ ) - ROT2 SWP2 ( xhi yhi xlo ylo ) - LTH2 ,<-lo JCN ( xhi yhi ) - LTH2 #00 EQU JMP2r - <-lo - GTH2 JMP2r + ROT2 SWP2 LTH2 ?{ LTH2 #00 EQU JMP2r } GTH2 JMP2r ( bitwise operations ) @@ -162,197 +120,128 @@ @complement32 ( x** -> ~x** ) SWP2 #ffff EOR2 SWP2 #ffff EOR2 JMP2r -( temporary registers ) -( shared by most operations, except mul32 and div32 ) -@m32 [ &x0 $1 &x1 $1 &x2 $1 &x3 $1 - &y0 $1 &y1 $1 &y2 $1 &y3 $1 - &z0 $1 &z1 $1 &z2 $1 &z3 $1 - &w0 $1 &w1 $1 &w2 $2 ] - ( bit shifting ) ( x >> n ) -@rshift32 ( x** n^ -> x< x>>n ) + DUP #08 LTH ?shift32-0 ( x n ) + DUP #10 LTH ?rshift32-1 ( x n ) + DUP #18 LTH ?rshift32-2 ( x n ) + !rshift32-3 ( x n ) -( shift right by 0-7 bits ) -@rshift32-0 ( x** n^ -> x< x>>n ) + STH DUP2 STHkr SFT2 ,&z2 STR2 + POP DUP2 STHkr SFT2 ,&z2 LDR ORA ,&z2 STR ,&z1 STR + POP STHr SFT2 ,&z1 LDR ORA ,&z1 STR + LIT [ &z1 $1 ] LIT2 [ &z2 $2 ] JMP2r ( shift right by 8-15 bits ) -@rshift32-1 ( x** n^ -> x< x>>n ) + #08 SUB STH ( stash [n>>8] ) + POP DUP2 STHkr SFT2 ,&z2 STR2 + POP STHr SFT2 ,&z2 LDR ORA ,&z2 STR + #00 SWP LIT2 [ &z2 $2 ] JMP2r ( shift right by 16-23 bits ) -@rshift32-2 ( x** n^ -> x< x>>n ) + #10 SUB STH ( stash [n>>16] ) + POP2 STHr SFT2 #0000 SWP2 JMP2r ( shift right by 16-23 bits ) -@rshift32-3 ( x** n^ -> x< x>>n ) + #18 SUB STH ( stash [n>>24] ) + POP2 POP STH SWPr SFTr #00 #0000 STHr JMP2r ( x << n ) @lshift32 ( x** n^ -> x< x< x< x< x< zhi* zlo* ) - ;m32/y2 STA2 ;m32/y0 STA2 ( save ylo, yhi ) - ;m32/x2 STA2 ;m32/x0 STA2 ( save xlo, xhi ) - #0000 DUP2 ;m32/z0 STA2 ;m32/z2 STA2 ( reset zhi, zlo ) - - ( x3 + y3 => z2z3 ) - #00 ;m32/x3 LDA #00 ;m32/y3 LDA ADD2 ;m32/z2 STA2 - - ( x2 + y2 + z2 => z1z2 ) - #00 ;m32/x2 LDA ;m32/z1 LDA2 ADD2 ;m32/z1 STA2 - #00 ;m32/y2 LDA ;m32/z1 LDA2 ADD2 ;m32/z1 STA2 - - ( x1 + y1 + z1 => z0z1 ) - #00 ;m32/x1 LDA ;m32/z0 LDA2 ADD2 ;m32/z0 STA2 - #00 ;m32/y1 LDA ;m32/z0 LDA2 ADD2 ;m32/z0 STA2 - - ( x0 + y0 + z0 => z0 ) - ;m32/x0 LDA ;m32/z0 LDA ADD ;m32/z0 STA - ;m32/y0 LDA ;m32/z0 LDA ADD ;m32/z0 STA - - ( load zhi,zlo ) - ;m32/z0 LDA2 ;m32/z2 LDA2 - JMP2r + ROT2 STH2k ADD2 STH2k ROT2 ROT2 GTH2r #00 STHr ADD2 ADD2 SWP2 JMP2r ( -x ) @negate32 ( x** -> -x** ) - ;complement32 JSR2 ( ~x** ) - INC2 ( ~xhi -xlo ) - ORAk ( ~xhi -xlo non-zero? ) - ,&done JCN ( xlo non-zero => don't inc hi ) - SWP2 INC2 SWP2 ( -xhi -xlo ) - &done - JMP2r + complement32 INC2 ORAk ?{ SWP2 INC2 SWP2 } JMP2r ( x - y ) @sub32 ( x** y** -> z** ) - ;negate32 JSR2 ;add32 JMP2 + ROT2 STH2k SWP2 SUB2 STH2k ROT2 ROT2 LTH2r #00 STHr ADD2 SUB2 SWP2 JMP2r ( 16-bit multiplication ) @mul16 ( x* y* -> z** ) - ;m32/y1 STA ;m32/y0 STA ( save ylo, yhi ) - ;m32/x1 STA ;m32/x0 STA ( save xlo, xhi ) - #0000 #00 ;m32/z1 STA2 ;m32/z3 STA ( reset z1,z2,z3 ) - #0000 #00 ;m32/w0 STA2 ;m32/w2 STA ( reset w0,w1,w2 ) + ,&y1 STR ,&y0 STR ( save ylo, yhi ) + ,&x1 STR ,&x0 STR ( save xlo, xhi ) + #0000 ,&z1 STR ,&w0 STR ( reset z1 and w0 ) ( x1 * y1 => z1z2 ) - #00 ;m32/x1 LDA #00 ;m32/y1 LDA MUL2 ;m32/z2 STA2 + LIT2 00 [ &x1 $1 ] LIT2 00 [ &y1 $1 ] MUL2 ,&z3 STR ,&z2 STR ( x0 * y1 => z0z1 ) - #00 ;m32/x0 LDA #00 ;m32/y1 LDA MUL2 ;m32/z1 LDA2 ADD2 ;m32/z1 STA2 + #00 ,&x0 LDR #00 ,&y1 LDR MUL2 ,&z1 LDR2 ADD2 ,&z1 STR2 ( x1 * y0 => w1w2 ) - #00 ;m32/x1 LDA #00 ;m32/y0 LDA MUL2 ;m32/w1 STA2 + #00 ,&x1 LDR #00 ,&y0 LDR MUL2 ,&w2 STR ,&w1 STR ( x0 * y0 => w0w1 ) - #00 ;m32/x0 LDA #00 ;m32/y0 LDA MUL2 ;m32/w0 LDA2 ADD2 ;m32/w0 STA2 + LIT2 00 [ &x0 $1 ] LIT2 00 [ &y0 $1 ] MUL2 ,&w0 LDR2 ADD2 ,&w0 STR2 ( add z and a<<8 ) - #00 ;m32/z1 LDA2 ;m32/z3 LDA - ;m32/w0 LDA2 ;m32/w2 LDA #00 - ;add32 JMP2 + #00 LIT2 [ &z1 $1 &z2 $1 ] LIT [ &z3 $1 ] + LIT2 [ &w0 $1 &w1 $1 ] LIT [ &w2 $1 ] #00 + !add32 ( x * y ) @mul32 ( x** y** -> z** ) - ,&y1 STR2 ,&y0 STR2 ( save ylo, yhi ) - ,&x1 STR2 ,&x0 STR2 ( save xlo, xhi ) - ,&y1 LDR2 ,&x1 LDR2 ;mul16 JSR2 ( [x1*y1] ) - ,&z1 STR2 ,&z0 STR2 ( sum = x1*y1, save zlo, zhi ) - ,&y1 LDR2 ,&x0 LDR2 MUL2 ( [x0*y1]<<16 ) - ,&y0 LDR2 ,&x1 LDR2 MUL2 ( [x1*y0]<<16 ) + ROT2k ( x0* x1* y0* y1* y0* y1* x1* ) + mul16 ,&z1 STR2 ,&z0 STR2 POP2 ( x0* x1* y0* y1* ; sum = [x1*y1] ) + STH2 ROT2 STH2 ( x1* y0* [y1* x0*] ) + MUL2r MUL2 STH2r ADD2 ( x1*y0+y1*x0* ) ( [x0*y0]<<32 will completely overflow ) - ADD2 ,&z0 LDR2 ADD2 ( sum += x0*y1<<16 + x1*y0<<16 ) - ,&z1 LDR2 - JMP2r -[ &x0 $2 &x1 $2 - &y0 $2 &y1 $2 - &z0 $2 &z1 $2 ] + LIT2 [ &z0 $2 ] ADD2 ( sum += [x0*y1+x1*y0]<<16 ) + LIT2 [ &z1 $2 ] JMP2r @div32 ( x** y** -> q** ) - ;_divmod32 JSR2 - ;_divmod32/quo0 LDA2 ;_divmod32/quo1 LDA2 - JMP2r + z_divmod32 ;z_divmod32/quo0 LDA2 ;z_divmod32/quo1 LDA2 JMP2r @mod32 ( x** y** -> r** ) - ;_divmod32 JSR2 - ;_divmod32/rem0 LDA2 ;_divmod32/rem1 LDA2 - JMP2r + z_divmod32 ;z_divmod32/rem0 LDA2 ;z_divmod32/rem1 LDA2 JMP2r @divmod32 ( x** y** -> q** r** ) - ;_divmod32 JSR2 - ;_divmod32/quo0 LDA2 ;_divmod32/quo1 LDA2 - ;_divmod32/rem0 LDA2 ;_divmod32/rem1 LDA2 + z_divmod32 + ;z_divmod32/quo0 LDA2 ;z_divmod32/quo1 LDA2 + ;z_divmod32/rem0 LDA2 ;z_divmod32/rem1 LDA2 JMP2r ( calculate and store x / y and x % y ) -@_divmod32 ( x** y** -> ) +@z_divmod32 ( x** y** -> ) ( store y and x for repeated use ) ,&div1 STR2 ,&div0 STR2 ( y -> div ) ,&rem1 STR2 ,&rem0 STR2 ( x -> rem ) @@ -360,7 +249,7 @@ ( if x < y then the answer is 0 ) ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 - ;lt32 JSR2 ,&is-zero JCN ,¬-zero JMP + lt32 ?&is-zero !¬-zero &is-zero #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 JMP2r @@ -369,20 +258,20 @@ #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 ( 0 -> quo ) ( bitcount[x] - bitcount[y] determines the largest multiple of y to try ) - ,&rem0 LDR2 ,&rem1 LDR2 ;bitcount32 JSR2 ( rbits^ ) - ,&div0 LDR2 ,&div1 LDR2 ;bitcount32 JSR2 ( rbits^ dbits^ ) + ,&rem0 LDR2 ,&rem1 LDR2 bitcount32 ( rbits^ ) + ,&div0 LDR2 ,&div1 LDR2 bitcount32 ( rbits^ dbits^ ) SUB ( shift=rbits-dits ) #00 DUP2 ( shift 0 shift 0 ) ( 1< cur ) #0000 INC2k ROT2 POP - ;lshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2 + lshift32 ,&cur1 STR2 ,&cur0 STR2 ( div< div ) ,&div0 LDR2 ,&div1 LDR2 ROT2 POP - ;lshift32 JSR2 ,&div1 STR2 ,&div0 STR2 + lshift32 ,&div1 STR2 ,&div0 STR2 - ,&loop JMP + !&loop [ &div0 $2 &div1 $2 &rem0 $2 &rem1 $2 @@ -391,35 +280,24 @@ &loop ( if rem >= the current divisor, we can subtract it and add to quotient ) - ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 ;lt32 JSR2 ( is rem < div? ) - ,&rem-lt JCN ( if rem < div skip this iteration ) + ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 lt32 ( is rem < div? ) + ?&rem-lt ( if rem < div skip this iteration ) ( since rem >= div, we have found a multiple of y that divides x ) - ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 ;sub32 JSR2 ,&rem1 STR2 ,&rem0 STR2 ( rem -= div ) - ,&quo0 LDR2 ,&quo1 LDR2 ,&cur0 LDR2 ,&cur1 LDR2 ;add32 JSR2 ,&quo1 STR2 ,&quo0 STR2 ( quo += cur ) + ,&rem0 LDR2 ,&rem1 LDR2 ,&div0 LDR2 ,&div1 LDR2 sub32 ,&rem1 STR2 ,&rem0 STR2 ( rem -= div ) + ,&quo0 LDR2 ,&quo1 LDR2 ,&cur0 LDR2 ,&cur1 LDR2 add32 ,&quo1 STR2 ,&quo0 STR2 ( quo += cur ) &rem-lt - ,&div0 LDR2 ,&div1 LDR2 #01 ;rshift32 JSR2 ,&div1 STR2 ,&div0 STR2 ( div >>= 1 ) - ,&cur0 LDR2 ,&cur1 LDR2 #01 ;rshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2 ( cur >>= 1 ) - ,&cur0 LDR2 ,&cur1 LDR2 ;non-zero32 JSR2 ,&loop JCN ( if cur>0, loop. else we're done ) + ,&div0 LDR2 ,&div1 LDR2 #01 rshift32 ,&div1 STR2 ,&div0 STR2 ( div >>= 1 ) + ,&cur0 LDR2 ,&cur1 LDR2 #01 rshift32 ,&cur1 STR2 ,&cur0 STR2 ( cur >>= 1 ) + ,&cur0 LDR2 ,&cur1 LDR2 non-zero32 ?&loop ( if cur>0, loop. else we're done ) JMP2r ( greatest common divisor - euclidean algorithm ) @gcd32 ( x** y** -> z** ) - &loop ( x y ) - OVR2 OVR2 ( x y y ) - ;is-zero32 JSR2 ( x y y=0? ) - ,&done JCN ( x y ) - OVR2 OVR2 ( x y y ) - STH2 STH2 ( x y [y] ) - ;mod32 JSR2 ( r=x%y [y] ) - STH2r ( rhi rlo yhi [ylo] ) - ROT2 ( rlo yhi rhi [ylo] ) - ROT2 ( yhi rhi rlo [ylo] ) - STH2r ( yhi rhi rlo ylo ) - ROT2 ( yhi rlo ylo rhi ) - ROT2 ( yhi ylo rhi rlo ) - ,&loop JMP - &done - POP2 POP2 ( x ) - JMP2r + &loop OVR2 OVR2 is-zero32 ?{ ( x** y** ) + OVR2 OVR2 STH2 STH2 ( x** y** [y**] ) + mod32 ( r=x%y** [y**] ) + STH2r ROT2 ROT2 ( yhi* rhi* rlo* [ylo*] ) + STH2r ROT2 ROT2 !&loop ( y** r** ) + } POP2 POP2 JMP2r ( z** )