diff --git a/math32.tal b/math32.tal index cce6606..3f412a2 100644 --- a/math32.tal +++ b/math32.tal @@ -110,6 +110,16 @@ @complement32 ( x** -> ~x** ) COMPLEMENT32 RTN +( temporary registers ) +( used by all operations except div32 ) +@sh [ + &r $1 + &x0 $1 &x1 $1 &x2 $1 &x3 $1 + &y0 $1 &y1 $1 &y2 $1 &y3 $1 + &z0 $1 &z1 $1 &z2 $1 &z3 $1 + &a0 $1 &a1 $1 &a2 $2 +] + ( bit shifting ) @right-shift ( x** n^ -> x< x< r ) - ,&r LDR SFT ,&z3 STR ( write z3 ) - #00 ,&r LDR SFT2 ,&z2 LDR2 ORA2 ,&z2 STR2 ( write z2,z3 ) - #00 ,&r LDR SFT2 ,&z1 LDR2 ORA2 ,&z1 STR2 ( write z1,z2 ) - #00 ,&r LDR SFT2 ,&z0 LDR2 ORA2 ,&z0 STR2 ( write z0,z1 ) - ,&z0 LDR2 ,&z2 LDR2 + #0000 ;sh/z0 STA2 #0000 ;sh/z2 STA2 + ;sh/r STA ( n -> r ) + ;sh/r LDA SFT ;sh/z3 STA ( write z3 ) + #00 ;sh/r LDA SFT2 ;sh/z2 LDA2 ORA2 ;sh/z2 STA2 ( write z2,z3 ) + #00 ;sh/r LDA SFT2 ;sh/z1 LDA2 ORA2 ;sh/z1 STA2 ( write z1,z2 ) + #00 ;sh/r LDA SFT2 ;sh/z0 LDA2 ORA2 ;sh/z0 STA2 ( write z0,z1 ) + ;sh/z0 LDA2 ;sh/z2 LDA2 RTN -[ &r $1 &z0 $1 &z1 $1 &z2 $1 &z3 $1 ] ( shift right by 8-15 bits ) @right-shift1 ( x** n^ -> x< r ) + #00 ;sh/z1 STA #0000 ;sh/z2 STA2 + #08 SUB ;sh/r STA ( n -> r ) POP - ,&r LDR SFT ,&z3 STR ( write z3 ) - #00 ,&r LDR SFT2 ,&z2 LDR2 ORA2 ,&z2 STR2 ( write z2,z3 ) - #00 ,&r LDR SFT2 ,&z1 LDR2 ORA2 ,&z1 STR2 ( write z1,z2 ) - #00 ,&z1 LDR ,&z2 LDR2 + ;sh/r LDA SFT ;sh/z3 STA ( write z3 ) + #00 ;sh/r LDA SFT2 ;sh/z2 LDA2 ORA2 ;sh/z2 STA2 ( write z2,z3 ) + #00 ;sh/r LDA SFT2 ;sh/z1 LDA2 ORA2 ;sh/z1 STA2 ( write z1,z2 ) + #00 ;sh/z1 LDA ;sh/z2 LDA2 RTN -[ &r $1 &z1 $1 &z2 $1 &z3 $1 ] ( shift right by 16-23 bits ) @right-shift2 ( x** n^ -> x< r ) + #0000 ;sh/z2 STA2 + #10 SUB ;sh/r STA ( n -> r ) POP2 - ,&r LDR SFT ,&z3 STR ( write z3 ) - #00 ,&r LDR SFT2 ,&z2 LDR2 ORA2 ,&z2 STR2 ( write z2,z3 ) - #0000 ,&z2 LDR2 + ;sh/r LDA SFT ;sh/z3 STA ( write z3 ) + #00 ;sh/r LDA SFT2 ;sh/z2 LDA2 ORA2 ;sh/z2 STA2 ( write z2,z3 ) + #0000 ;sh/z2 LDA2 RTN -[ &r $1 &z2 $1 &z3 $1 ] ( shift right by 16-23 bits ) @right-shift3 ( x** n^ -> x< r ) + #18 SUB ;sh/r STA ( n -> r ) POP2 POP #00 SWP #0000 SWP2 ( 00 00 00 x0 ) - ,&r LDR SFT + ;sh/r LDA SFT RTN -[ &r $1 ] @left-shift ( x** n^ -> x< x< r ) + #0000 ;sh/z0 STA2 #0000 ;sh/z2 STA2 + #40 SFT ;sh/r STA ( n<<4 -> r ) SWP SWP2 SWP ( x3 x2 x1 x0 ) - ,&r LDR SFT ,&z0 STR ( x3 x2 x1 ) + ;sh/r LDA SFT ;sh/z0 STA ( x3 x2 x1 ) - #00 SWP ,&r LDR SFT2 ( x3 x2 00x1< x< r ) + #0000 ;sh/z0 STA2 #00 ;sh/z2 STA + #08 SUB #40 SFT ;sh/r STA ( n<<4 -> r ) SWP SWP2 SWP POP ( x3 x2 x1 ) - ,&r LDR SFT ,&z0 STR ( x3 x2 ) + ;sh/r LDA SFT ;sh/z0 STA ( x3 x2 ) - #00 SWP ,&r LDR SFT2 ( x3 00x2< x< r ) + #0000 ;sh/z0 STA2 + #10 SUB #40 SFT ;sh/r STA ( n<<4 -> r ) SWP2 POP2 SWP ( x3 x2 ) - ,&r LDR SFT ,&z0 STR ( x3 ) + ;sh/r LDA SFT ;sh/z0 STA ( x3 ) - #00 SWP ,&r LDR SFT2 ( x3< x< zhi* zlo* ) - ,&y2 STR2 ,&y0 STR2 ( save ylo, yhi ) - ,&x2 STR2 ,&x0 STR2 ( save xlo, xhi ) - #0000 #0000 ,&z0 STR2 ,&z2 STR2 ( reset zhi, zlo ) + ;sh/y2 STA2 ;sh/y0 STA2 ( save ylo, yhi ) + ;sh/x2 STA2 ;sh/x0 STA2 ( save xlo, xhi ) + #0000 #0000 ;sh/z0 STA2 ;sh/z2 STA2 ( reset zhi, zlo ) ( x3 + y3 => z2z3 ) - #00 ,&x3 LDR #00 ,&y3 LDR ADD2 ,&z2 STR2 + #00 ;sh/x3 LDA #00 ;sh/y3 LDA ADD2 ;sh/z2 STA2 ( x2 + y2 + z2 => z1z2 ) - #00 ,&x2 LDR ,&z1 LDR2 ADD2 ,&z1 STR2 - #00 ,&y2 LDR ,&z1 LDR2 ADD2 ,&z1 STR2 + #00 ;sh/x2 LDA ;sh/z1 LDA2 ADD2 ;sh/z1 STA2 + #00 ;sh/y2 LDA ;sh/z1 LDA2 ADD2 ;sh/z1 STA2 ( x1 + y1 + z1 => z0z1 ) - #00 ,&x1 LDR ,&z0 LDR2 ADD2 ,&z0 STR2 - #00 ,&y1 LDR ,&z0 LDR2 ADD2 ,&z0 STR2 + #00 ;sh/x1 LDA ;sh/z0 LDA2 ADD2 ;sh/z0 STA2 + #00 ;sh/y1 LDA ;sh/z0 LDA2 ADD2 ;sh/z0 STA2 ( x0 + y0 + z0 => z0 ) - ,&x0 LDR ,&z0 LDR ADD ,&z0 STR - ,&y0 LDR ,&z0 LDR ADD ,&z0 STR + ;sh/x0 LDA ;sh/z0 LDA ADD ;sh/z0 STA + ;sh/y0 LDA ;sh/z0 LDA ADD ;sh/z0 STA ( load zhi,zlo ) - ,&z0 LDR2 ,&z2 LDR2 + ;sh/z0 LDA2 ;sh/z2 LDA2 RTN -( registers for add32 ) -[ &x0 $1 &x1 $1 &x2 $1 &x3 $1 ] -[ &y0 $1 &y1 $1 &y2 $1 &y3 $1 ] -[ &z0 $1 &z1 $1 &z2 $2 ] @negate32 ( x** -> -x** ) COMPLEMENT32 @@ -273,48 +272,44 @@ ;negate32 JSR2 ;add32 JSR2 RTN @mul16 ( x* y* -> z** ) - ,&y1 STR ,&y0 STR ( save ylo, yhi ) - ,&x1 STR ,&x0 STR ( save xlo, xhi ) - #0000 #00 ,&z0 STR2 ,&z2 STR ( reset z0,z1,z2 ) - #0000 #00 ,&a0 STR2 ,&a2 STR ( reset a0,a1,a2 ) + ;sh/y1 STA ;sh/y0 STA ( save ylo, yhi ) + ;sh/x1 STA ;sh/x0 STA ( save xlo, xhi ) + #0000 #00 ;sh/z1 STA2 ;sh/z3 STA ( reset z1,z2,z3 ) + #0000 #00 ;sh/a0 STA2 ;sh/a2 STA ( reset a0,a1,a2 ) ( x1 * y1 => z1z2 ) - #00 ,&x1 LDR #00 ,&y1 LDR MUL2 ,&z1 STR2 + #00 ;sh/x1 LDA #00 ;sh/y1 LDA MUL2 ;sh/z2 STA2 ( x0 * y1 => z0z1 ) - #00 ,&x0 LDR #00 ,&y1 LDR MUL2 ,&z0 LDR2 ADD2 ,&z0 STR2 + #00 ;sh/x0 LDA #00 ;sh/y1 LDA MUL2 ;sh/z1 LDA2 ADD2 ;sh/z1 STA2 ( x1 * y0 => a1a2 ) - #00 ,&x1 LDR #00 ,&y0 LDR MUL2 ,&a1 STR2 + #00 ;sh/x1 LDA #00 ;sh/y0 LDA MUL2 ;sh/a1 STA2 ( x0 * y0 => a0a1 ) - #00 ,&x0 LDR #00 ,&y0 LDR MUL2 ,&a0 LDR2 ADD2 ,&a0 STR2 + #00 ;sh/x0 LDA #00 ;sh/y0 LDA MUL2 ;sh/a0 LDA2 ADD2 ;sh/a0 STA2 ( add z and a<<8 ) - #00 ,&z0 LDR2 ,&z2 LDR - ,&a0 LDR2 ,&a2 LDR #00 + #00 ;sh/z1 LDA2 ;sh/z3 LDA + ;sh/a0 LDA2 ;sh/a2 LDA #00 ;add32 JSR2 RTN -[ &x0 $1 &x1 $1 ] -[ &y0 $1 &y1 $1 ] -[ &z0 $1 &z1 $1 &z2 $1 ] -[ &a0 $1 &a1 $1 &a2 $1 ] @mul32 ( x** y** -> z** ) - ,&y1 STR2 ,&y0 STR2 ( save ylo, yhi ) - ,&x1 STR2 ,&x0 STR2 ( save xlo, xhi ) - ,&y1 LDR2 ,&x1 LDR2 ;mul16 JSR2 ( [x1*y1] ) - ,&z1 STR2 ,&z0 STR2 ( sum = x1*y1, save zlo, zhi ) + ,&y2 STR2 ,&y0 STR2 ( save ylo, yhi ) + ,&x2 STR2 ,&x0 STR2 ( save xlo, xhi ) + ,&y2 LDR2 ,&x2 LDR2 ;mul16 JSR2 ( [x2*y2] ) + ,&z2 STR2 ,&z0 STR2 ( sum = x2*y2, save zlo, zhi ) - ,&y1 LDR2 ,&x0 LDR2 MUL2 ( [x0*y1]<<16 ) - ,&y0 LDR2 ,&x1 LDR2 MUL2 ( [x1*y0]<<16 ) + ,&y2 LDR2 ,&x0 LDR2 MUL2 ( [x0*y2]<<16 ) + ,&y0 LDR2 ,&x2 LDR2 MUL2 ( [x2*y0]<<16 ) ( [x0*y0]<<32 will completely overflow ) - ADD2 ,&z0 LDR2 ADD2 ( sum += x0*y1<<16 + x1*y0<<16 ) - ,&z1 LDR2 + ADD2 ,&z0 LDR2 ADD2 ( sum += x0*y2<<16 + x2*y0<<16 ) + ,&z2 LDR2 RTN -[ &x0 $2 &x1 $2 ] -[ &y0 $2 &y1 $2 ] -[ &z0 $2 &z1 $2 ] +[ &x0 $2 &x2 $2 ] +[ &y0 $2 &y2 $2 ] +[ &z0 $2 &z2 $2 ] @div32 ( x** y** -> q** ) ( store y and x for repeated use )