math clean up

2022-02-09 21:24:36 -05:00 · 2022-02-09 21:24:36 -05:00 · df971a1fc8
parent 99e2ac5a1c
commit df971a1fc8
2 changed files with 99 additions and 103 deletions
--- a/math32.tal
+++ b/math32.tal
@ -1,60 +1,59 @@
-( math32.tal )
-( )
-( This library supports arithmetic on 32-bit unsigned integers, )
-( also known as long values. )
-( )
-( 32-bit long values are represented by two 16-bit short values: )
-( )
-(      decimal  hexadecimal  uxn literals )
-(            0   0x00000000   #0000 #0000 )
-(            1   0x00000001   #0000 #0001 )
-(         4660   0x00001234   #0000 #1234 )
-(        65535   0x0000ffff   #0000 #ffff )
-(        65536   0x00010000   #0001 #0000 )
-(     16777215   0x00ffffff   #00ff #ffff )
-(   4294967295   0xffffffff   #ffff #ffff )
-( )
+( math32.tal                                                      )
+(                                                                 )
+( This library supports arithmetic on 32-bit unsigned integers,   )
+( also known as long values.                                      )
+(                                                                 )
+( 32-bit long values are represented by two 16-bit short values:  )
+(                                                                 )
+(      decimal  hexadecimal  uxn literals                         )
+(            0   0x00000000   #0000 #0000                         )
+(            1   0x00000001   #0000 #0001                         )
+(         4660   0x00001234   #0000 #1234                         )
+(        65535   0x0000ffff   #0000 #ffff                         )
+(        65536   0x00010000   #0001 #0000                         )
+(     16777215   0x00ffffff   #00ff #ffff                         )
+(   4294967295   0xffffffff   #ffff #ffff                         )
+(                                                                 )
 ( The most significant 16-bit, the "high bits", are stored first. )
-( We document long values as x** -- equivalent to xhi* xlo*. )
-( )
-( Operations supported: )
-( )
-(   NAME            STACK EFFECT        DEFINITION       )
-(   add32           x** y** -> z**      x + y            )
-(   sub32           x** y** -> z**      x - y            )
-(   mul16           x*  y*  -> z**      x * y            )
-(   mul32           x** y** -> z**      x * y            )
-(   div32           x** y** -> q**      x / y            )
-(   mod32           x** y** -> r**      x % y            )
-(   divmod32        x** y** -> q** r**  x / y, x % y     )
-(   gcd32           x** y** -> z**      gcd(x, y)        )
-(   negate32        x**     -> z**      -x               )
-(   lshift32        x** n^  -> z**      x<<n             )
-(   rshift32        x** n^  -> z**      x>>n             )
-(   and32           x** y** -> z**      x & y            )
-(   or32            x** y** -> z**      x | y            )
-(   xor32           x** y** -> z**      x ^ y            )
-(   complement32    x**     -> z**      ~x               )
-(   eq32            x** y** -> bool^    x == y           )
-(   ne32            x** y** -> bool^    x != y           )
-(   is-zero32       x**     -> bool^    x == 0           )
-(   non-zero32      x**     -> bool^    x != 0           )
-(   lt32            x** y** -> bool^    x < y            )
-(   gt32            x** y** -> bool^    x > y            )
-(   lteq32          x** y** -> bool^    x <= y           )
-(   gteq32          x** y** -> bool^    x >= y           )
-(   bitcount8       x^      -> bool^    floor(log2(x))+1 )
-(   bitcount16      x*      -> bool^    floor(log2(x))+1 )
-(   bitcount32      x**     -> bool^    floor(log2(x))+1 )
-( )
-( In addition to the code this file uses 44 bytes of registers )
-( to store temporary state: )
-( )
-(   - shared memory, 16 bytes )
-(   - mul32 memory, 12 bytes )
-(   - _divmod32 memory, 16 bytes )
+( We document long values as x** -- equivalent to xhi* xlo*.      )
+(                                                                 )
+( Operations supported:                                           )
+(                                                                 )
+(   NAME            STACK EFFECT        DEFINITION                )
+(   add32           x** y** -> z**      x + y                     )
+(   sub32           x** y** -> z**      x - y                     )
+(   mul16           x*  y*  -> z**      x * y                     )
+(   mul32           x** y** -> z**      x * y                     )
+(   div32           x** y** -> q**      x / y                     )
+(   mod32           x** y** -> r**      x % y                     )
+(   divmod32        x** y** -> q** r**  x / y, x % y              )
+(   gcd32           x** y** -> z**      gcd(x, y)                 )
+(   negate32        x**     -> z**      -x                        )
+(   lshift32        x** n^  -> z**      x<<n                      )
+(   rshift32        x** n^  -> z**      x>>n                      )
+(   and32           x** y** -> z**      x & y                     )
+(   or32            x** y** -> z**      x | y                     )
+(   xor32           x** y** -> z**      x ^ y                     )
+(   complement32    x**     -> z**      ~x                        )
+(   eq32            x** y** -> bool^    x == y                    )
+(   ne32            x** y** -> bool^    x != y                    )
+(   is-zero32       x**     -> bool^    x == 0                    )
+(   non-zero32      x**     -> bool^    x != 0                    )
+(   lt32            x** y** -> bool^    x < y                     )
+(   gt32            x** y** -> bool^    x > y                     )
+(   lteq32          x** y** -> bool^    x <= y                    )
+(   gteq32          x** y** -> bool^    x >= y                    )
+(   bitcount8       x^      -> bool^    floor(log2(x))+1          )
+(   bitcount16      x*      -> bool^    floor(log2(x))+1          )
+(   bitcount32      x**     -> bool^    floor(log2(x))+1          )
+(                                                                 )
+( In addition to the code this file uses 44 bytes of registers    )
+( to store temporary state:                                       )
+(                                                                 )
+(   - shared memory, 16 bytes                                     )
+(   - mul32 memory, 12 bytes                                      )
+(   - _divmod32 memory, 16 bytes                                  )

-%RTN  { JMP2r }
 %TOR { ROT ROT } ( a b c -> c a b )
 %COMPLEMENT32 { SWP2 #ffff EOR2 SWP2 #ffff EOR2 }
 %DUP4 { OVR2 OVR2 }
@ -73,7 +72,7 @@
    ,&loop JMP
    &done
    POP ( n )
-    RTN
+    JMP2r

@bitcount16 ( x* -> n^ )
    SWP ( xlo xhi )
@ -81,40 +80,40 @@
    DUP #00 NEQ ( xlo nhi nhi!=0 )
    ,&hi-set JCN ( xlo nhi )
    SWP ;bitcount8 JSR2 ADD ( nhi+nlo )
-    RTN 
+    JMP2r 
    &hi-set
    SWP POP #08 ADD ( nhi+8 )
-    RTN
+    JMP2r

@bitcount32 ( x** -> n^ )
    SWP2 ( xlo* xhi* )
    ;bitcount16 JSR2 ( xlo* nhi )
    DUP #00 NEQ ( xlo* nhi nhi!=0 )
    ,&hi-set JCN ( xlo* nhi )
-    TOR ;bitcount16 JSR2 ADD RTN ( nhi+nlo )
+    TOR ;bitcount16 JSR2 ADD JMP2r ( nhi+nlo )
    &hi-set
    TOR POP2 #10 ADD ( nhi+16 )    
-    RTN
+    JMP2r

 ( equality )

 ( x == y )
@eq32 ( xhi* xlo* yhi* ylo* -> bool^ )
    ROT2 EQU2 STH
-    EQU2 STHr AND RTN
+    EQU2 STHr AND JMP2r

 ( x != y )
@ne32 ( xhi* xlo* yhi* ylo* -> bool^ )
    ROT2 NEQ2 STH
-    NEQ2 STHr ORA RTN
+    NEQ2 STHr ORA JMP2r

 ( x == 0 )
@is-zero32 ( x** -> bool^ )
-    ORA2 #0000 EQU2 RTN
+    ORA2 #0000 EQU2 JMP2r

 ( x != 0 )
@non-zero32 ( x** -> bool^ )
-    ORA2 #0000 NEQ2 RTN
+    ORA2 #0000 NEQ2 JMP2r

 ( comparisons )

@ -122,51 +121,51 @@
@lt32 ( x** y** -> bool^ )
    ROT2 SWP2 ( xhi yhi xlo ylo )
    LTH2 ,&lt-lo JCN ( xhi yhi )
-    LTH2 RTN
+    LTH2 JMP2r
    &lt-lo
-    GTH2 #00 EQU RTN
+    GTH2 #00 EQU JMP2r

 ( x <= y )
@lteq32 ( x** y** -> bool^ )
    ROT2 SWP2 ( xhi yhi xlo ylo )
    GTH2 ,&gt-lo JCN ( xhi yhi )
-    GTH2 #00 EQU RTN
+    GTH2 #00 EQU JMP2r
    &gt-lo
-    LTH2 RTN
+    LTH2 JMP2r

 ( x > y )
@gt32 ( x** y** -> bool^ )
    ROT2 SWP2 ( xhi yhi xlo ylo )
    GTH2 ,&gt-lo JCN ( xhi yhi )
-    GTH2 RTN
+    GTH2 JMP2r
    &gt-lo
-    LTH2 #00 EQU RTN
+    LTH2 #00 EQU JMP2r

 ( x > y )
@gteq32 ( x** y** -> bool^ )
    ROT2 SWP2 ( xhi yhi xlo ylo )
    LTH2 ,&lt-lo JCN ( xhi yhi )
-    LTH2 #00 EQU RTN
+    LTH2 #00 EQU JMP2r
    &lt-lo
-    GTH2 RTN
+    GTH2 JMP2r

 ( bitwise operations )

 ( x & y )
@and32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
-    ROT2 AND2 STH2 AND2 STH2r RTN
+    ROT2 AND2 STH2 AND2 STH2r JMP2r

 ( x | y )
@or32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
-    ROT2 ORA2 STH2 ORA2 STH2r RTN
+    ROT2 ORA2 STH2 ORA2 STH2r JMP2r

 ( x ^ y )
@xor32 ( xhi* xlo* yhi* ylo* -> xhi|yhi* xlo|ylo* )
-    ROT2 EOR2 STH2 EOR2 STH2r RTN
+    ROT2 EOR2 STH2 EOR2 STH2r JMP2r

 ( ~x )
@complement32 ( x** -> ~x** )
-    COMPLEMENT32 RTN
+    COMPLEMENT32 JMP2r

 ( temporary registers )
 ( shared by most operations, except mul32 and div32 )
@ -183,7 +182,6 @@
    DUP #10 LTH ;rshift32-1 JCN2 ( x n )
    DUP #18 LTH ;rshift32-2 JCN2 ( x n )
    ;rshift32-3 JMP2 ( x n )
-    RTN

 ( shift right by 0-7 bits )
@rshift32-0 ( x** n^ -> x<<n )
@ -192,7 +190,7 @@
    #00 STHkr SFT2 #00 ;m32/z2 LDA ORA2 ;m32/z1 STA2 ( write z1,z2 )
    #00 STHr  SFT2 #00 ;m32/z1 LDA ORA2             ( compute z0,z1 )
    ;m32/z2 LDA2
-    RTN
+    JMP2r

 ( shift right by 8-15 bits )
@rshift32-1 ( x** n^ -> x<<n )
@ -201,7 +199,7 @@
    #00 STHkr SFT2 #00 ;m32/z3 LDA ORA2 ;m32/z2 STA2 ( write z2,z3 )
    #00 STHr  SFT2 #00 ;m32/z2 LDA ORA2             ( compute z1,z2 )
    #00 TOR ;m32/z3 LDA
-    RTN
+    JMP2r

 ( shift right by 16-23 bits )
@rshift32-2 ( x** n^ -> x<<n )
@ -209,14 +207,14 @@
        STHkr SFT                      ;m32/z3 STA ( write z3 )
    #00 STHr  SFT2 #00 ;m32/z3 LDA ORA2            ( compute z2,z3 )
    #0000 SWP2
-    RTN
+    JMP2r

 ( shift right by 16-23 bits )
@rshift32-3 ( x** n^ -> x<<n )
    #18 SUB STH POP2 POP ( x0 )
    #00 SWP #0000 SWP2 ( 00 00 00 x0 )
    STHr SFT
-    RTN
+    JMP2r

 ( x << n )
@lshift32 ( x** n^ -> x<<n )
@ -224,7 +222,6 @@
    DUP #10 LTH ;lshift32-1 JCN2 ( x n )
    DUP #18 LTH ;lshift32-2 JCN2 ( x n )
    ;lshift32-3 JMP2 ( x n )
-    RTN

 ( shift left by 0-7 bits )
@lshift32-0 ( x** n^ -> x<<n )
@ -234,7 +231,7 @@
    #00 SWP STHkr SFT2 #00 ;m32/z1 LDA ORA2 ;m32/z0 STA2 ( store z0,z1 )
            STHr  SFT      ;m32/z0 LDA ORA              ( calculate z0 )
    ;m32/z1 LDA ;m32/z2 LDA2
-    RTN
+    JMP2r

 ( shift left by 8-15 bits )
@lshift32-1 ( x** n^ -> x<<n )
@ -244,7 +241,7 @@
            STHr  SFT      ;m32/z0 LDA ORA              ( calculate z0 )
    SWP POP ( x0 unused )
    ;m32/z1 LDA2 #00
-    RTN
+    JMP2r

 ( shift left by 16-23 bits )
@lshift32-2 ( x** n^ -> x<<n )
@ -253,14 +250,14 @@
            STHr  SFT  ;m32/z0 LDA ORA             ( calculate z0 )
    STH POP2 STHr
    ;m32/z1 LDA #0000
-    RTN
+    JMP2r

 ( shift left by 24-31 bits )
@lshift32-3 ( x** n^ -> x<<n )
    #18 SUB #40 SFT ( x0 x1 x2 x3 r=[n-24]<<4 )
    SFT ( x0 x1 x2 x3<<r )
    SWP2 POP2 SWP POP #0000 #00
-    RTN
+    JMP2r

 ( arithmetic )

@ -287,7 +284,7 @@

    ( load zhi,zlo )
    ;m32/z0 LDA2 ;m32/z2 LDA2
-    RTN
+    JMP2r

 ( -x )
@negate32 ( x** -> -x** )
@ -297,11 +294,11 @@
    ,&done JCN ( xlo non-zero => don't inc hi )
    SWP2 INC2 SWP2 ( -xhi -xlo )
    &done
-    RTN
+    JMP2r

 ( x - y )
@sub32 ( x** y** -> z** )
-    ;negate32 JSR2 ;add32 JSR2 RTN
+    ;negate32 JSR2 ;add32 JMP2

 ( 16-bit multiplication )
@mul16 ( x* y* -> z** )
@ -325,8 +322,7 @@
    ( add z and a<<8 )
    #00 ;m32/z1 LDA2 ;m32/z3 LDA
    ;m32/w0 LDA2 ;m32/w2 LDA #00
-    ;add32 JSR2
-    RTN
+    ;add32 JMP2

 ( x * y )
@mul32 ( x** y** -> z** ) 
@ -339,7 +335,7 @@
    ( [x0*y0]<<32 will completely overflow )
    ADD2 ,&z0 LDR2 ADD2 ( sum += x0*y1<<16 + x1*y0<<16 )
    ,&z1 LDR2
-    RTN
+    JMP2r
 [ &x0 $2 &x1 $2
  &y0 $2 &y1 $2
  &z0 $2 &z1 $2 ]
@ -347,18 +343,18 @@
@div32 ( x** y** -> q** )
    ;_divmod32 JSR2
    ;_divmod32/quo0 LDA2 ;_divmod32/quo1 LDA2
-    RTN
+    JMP2r

@mod32 ( x** y** -> r** )
    ;_divmod32 JSR2
    ;_divmod32/rem0 LDA2 ;_divmod32/rem1 LDA2
-    RTN
+    JMP2r

@divmod32 ( x** y** -> q** r** )
    ;_divmod32 JSR2
    ;_divmod32/quo0 LDA2 ;_divmod32/quo1 LDA2
    ;_divmod32/rem0 LDA2 ;_divmod32/rem1 LDA2
-    RTN
+    JMP2r

 ( calculate and store x / y and x % y )
@_divmod32 ( x** y** -> )
@ -371,7 +367,7 @@
    ,&div0 LDR2 ,&div1 LDR2
    ;lt32 JSR2 ,&is-zero JCN ,&not-zero JMP
    &is-zero
-    #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 RTN
+    #0000 ,&quo0 STR2 #0000 ,&quo1 STR2 JMP2r

    ( x >= y so the answer is >= 1 )
    &not-zero
@ -411,7 +407,7 @@
    ,&div0 LDR2 ,&div1 LDR2 #01 ;rshift32 JSR2 ,&div1 STR2 ,&div0 STR2 ( div >>= 1 )
    ,&cur0 LDR2 ,&cur1 LDR2 #01 ;rshift32 JSR2 ,&cur1 STR2 ,&cur0 STR2 ( cur >>= 1 )
    ,&cur0 LDR2 ,&cur1 LDR2 ;non-zero32 JSR2 ,&loop JCN ( if cur>0, loop. else we're done )
-    RTN
+    JMP2r

 ( greatest common divisor - euclidean algorithm )
@gcd32 ( x** y** -> z** )
@ -431,4 +427,4 @@
    ,&loop JMP
    &done
    POP4 ( x )
-    RTN
+    JMP2r
--- a/test-math32.tal
+++ b/test-math32.tal
@ -32,7 +32,7 @@
    DUP #3a LTH ,&hi-digit JCN
    #57 ,&hi JMP &hi-digit #30
    &hi SUB #40 SFT ORA
-RTN
+JMP2r

@buf $24 ( buffer used by test-interact )
@pos $2 ( position in buffer used by test-interact )
@ -74,14 +74,14 @@ RTN

@read-byte ( addr* -> x^ )
    LDA2 ;parse-byte JSR2
-RTN
+JMP2r

@read-long ( addr* -> x** )
    DUP2 ,&loc STR2 LDA2 ;parse-byte JSR2
    ,&loc LDR2 #0002 ADD2 LDA2 ;parse-byte JSR2
    ,&loc LDR2 #0004 ADD2 LDA2 ;parse-byte JSR2
    ,&loc LDR2 #0006 ADD2 LDA2 ;parse-byte JSR2
-RTN
+JMP2r
 [ &loc $2 ]

 ( format: ". xxxxxxxx" -> "zzzzzzzz" )
@ -148,11 +148,11 @@ RTN
    SWP2
    SWP EMIT-BYTE EMIT-BYTE
    SWP EMIT-BYTE EMIT-BYTE
-RTN
+JMP2r

@emit-byte ( x^ -> )
    EMIT-BYTE
-RTN
+JMP2r

 ( convenience for less branching when printing hex )
@digits