major fixes to fix16 accuracy

2023-11-05 20:17:26 -05:00 · 2023-11-05 20:17:26 -05:00 · 2fe1d7d770
parent 1d0bb3da6e
commit 2fe1d7d770
2 changed files with 114 additions and 50 deletions
--- a/fix16.tal
+++ b/fix16.tal
@ -22,12 +22,17 @@
 ( #0700     1792/256      7.0000                          )
 ( #7f00    32512/256    127.0000                          )
 ( #7fff    32767/256    127.9961                          )
-( #8000   -32768/256   -128.0000                          )
+( #8000      invalid     invalid                          )
 ( #8001   -32767/256   -127.9961                          )
 ( #8100   -32767/256   -127.0000                          )
 ( #ff00     -256/256     -1.0000                          )
 ( #ffff       -1/256     -0.0039                          )
 (                                                         )
 ( due to the limited range operations saturate at the     )
 ( terminal values (i.e. #7fff and #8001). #8000 should    )
 ( never be generated, and can be considered a Nan value   )
 ( although this library does not use it as such.          )
 (                                                         )
 ( many 8.8 operations are equivalent to unsigned int16:   )
 (   * addition                                            )
 (   * subtraction                                         )
@ -60,7 +65,7 @@
 (  - tan() is mostly supported                            )
 (  - tan(#0192) and tan(#04b6) throw an error             )
 (  - a few tan() values are inaccurate due to range       )
-(    + values "next to" pi/2 and 3pi/2 are affected       ) 
+(    + values "next to" pi/2 and 3pi/2 are affected       )
 (    + tan(#0191) returns 127.996 not 227.785             )
 (    + tan(#0193) returns -127.996 not -292.189           )
 (    + etc.                                               )
@ -198,43 +203,66 @@
@x16-mul ( x* y* -- xy* )
    ;x16-mul-unsigned !x16-signed-op
-@x16-mul-unsigned ( x* y* -- xy* )
+@x16-mul8 ( x^ y^ -> xy* )
-         DUP #00 EQU ?x16-mul-unsigned-rhs-whole
+    #0000 SWP2 ROT SWP MUL2 JMP2r
    SWP2 DUP #00 EQU ?x16-mul-unsigned-rhs-whole
    ,&y0 STR2 ,&x0 STR2
    #00 ,&x0 LDR #00 ,&y0 LDR MUL2      ( acc* )
    OVR ?&overflow SWP                  ( acc* )
    #00 ,&x1 LDR #00 ,&y0 LDR MUL2 ADD2 ( acc* )
    #00 ,&x0 LDR #00 ,&y1 LDR MUL2 ADD2 ( acc* )
    #00 ,&x1 LDR #00 ,&y1 LDR MUL2 #08 SFT2 ADD2 ( acc* )
    DUP2 #7fff GTH2 ?&overflow
    JMP2r [ &x0 $1 &x1 $1 &y0 $1 &y1 $1 ]
    &overflow POP2 #7fff JMP2r
-@x16-mul-unsigned-rhs-whole ( x0_x1* y0_00* -- xy* )
+@x16-mul-unsigned ( ab* cd* -> ac+ad+bc+bd* )
-    #08 SFT2 MUL2 #7fff !unsigned-min
+    OVR2 OVR2 STH2 STH2 ROT SWPr ROTr             ( a c d b [c b a d] )
    SWP2  x16-mul8 DUP2 #007f GTH2 ?&o1 SWP       ( d b acc* [c b a d] )
    SWP2  x16-mul8 #08 SFT2 ADD2                  ( acc* [c b a d] )
    STH2r x16-mul8 ADD2 OVR #7f GTH ?&o3          ( acc* [c d] )
    STH2r x16-mul8 ADD2 OVR #7f GTH ?&o4 JMP2r    ( acc* )
    &o1 POP2 POP2r &o3 POP2r &o4 POP2 #7fff JMP2r ( ; handle overflow )
@x16-div ( x* y* -- x/y* )
    ;x16-div-unsigned !x16-signed-op
 ( The idea here is a bit complicated. )
 ( )
 ( First we look at `x y DIV2`. If that is >255 then we are )
 ( going to overflow and we can return 7fff and be done. )
 ( )
 ( If not, we multiply that result by 256 and start looking )
 ( for smaller and smaller fractions of y to star chipping )
 ( away at the remainder, if any. )
 ( )
 ( However we want to avoid rounding errors caused by needlessly )
 ( truncating y. For that reason, if the remainder is less than )
 ( 0x8000 we will double it rather than diving y. Once it cannot )
 ( be safely multiplied we will start dividing y by 2. After 16 )
 ( rounds of multiplying the remainder or dividing the quotient )
 ( we get our final answer. )
@x16-div-unsigned ( x* y* -> x/y* )
-    DIV2k STH2k       ( x y x/y [x/y] )
+    DIV2k DUP2 #007f GTH2 ?&o   ( x* y* x/y* )
-    LITr 80 SFT2r     ( x y x/y [div=(x/y)<<8] )
+    STH2k LITr 80 SFT2r         ( x* y* x/y* [div=(x/y)<<8*] )
-    OVR2 STH2         ( x y x/y [y div] )
+    OVR2 STH2                   ( x* y* x/y* [div* y*] )
-    MUL2 SUB2         ( x%y     [y div] )
+    MUL2 SUB2                   ( x%y* [div* y*] )
-    STH2r LIT2r 0100  ( x%y y   [0100 div] )
+    STH2r LIT2r 0100            ( x%y* y* [div* 0100*] )
    ( We know rem < y, so start left-shifting rem. )
    &ploop                       ( rem* y* [div* s*] )
        STH2kr #0000 EQU2 ?&done ( rem* y* [div* s*] ; done when s=0 )
        OVR2 #7fff GTH2 ?&loop   ( rem* y* [div* s*] ; rem too big, start shifting y )
        SWP2 #10 SFT2 SWP2       ( rem<<1* y* [div* s*] )
        LITr 01 SFT2r            ( rem<<1* y* [div* s>>1*] )
        LTH2k ?&ploop            ( rem<<1* y* [div* s>>1*] ; rem too small )
        SWP2 OVR2 SUB2 SWP2      ( rem<<1-y* y* [div* s>>1*] )
        DUP2r ROT2r ADD2r SWP2r  ( rem<<1-y* y* [div+s>>1* s>>1*] )
        !&ploop                  ( rem<<1-y* y* [div+s>>1* s>>1*] )
    ( We know rem < y, so start right-shifting y. )
    &loop                        ( rem* y* [div* s*] )
        STH2kr #0000 EQU2 ?&done ( rem* y* [div* s*] ; done when s=0 )
        #01 SFT2 LITr 01 SFT2r   ( rem* y>>1* [div* s>>1*] )
        LTH2k ?&loop             ( rem* y>>1* [div* s>>1**] ; rem too small )
        SWP2 OVR2 SUB2 SWP2      ( rem-y>>1* y>>1* [div* s>>1*] )
        DUP2r ROT2r ADD2r SWP2r  ( rem-y>>1* y>>1* [div+s>>1* s>>1*] )
        !&loop                   ( rem-y>>1* y>>1* [div+s>>1* s>>1*] )
    ( We know x%y < y, so start right-shifting y. )
    &loop DUP2 #0000 EQU2 ?&done
        #01 SFT2 LITr 01 SFT2r  ( rem yi    [shifti div] )
        LTH2k ,&loop JCN        ( rem yi    [shifti div] )
        SWP2 OVR2 SUB2 SWP2     ( rem-yi yi [shifti div] )
        DUP2r ROT2r ADD2r SWP2r ( rem-yi yi [shifti div+shifti] )
        !&loop                  ( rem-yi yi [shifti div+shifti] )
    &done
-
+    POP2 POP2 POP2r STH2r        ( div* )
-    POP2 POP2         ( [shiftk div] )
+    DUP2 #7fff GTH2 ?&oo JMP2r   ( div* )
-    POP2r STH2r JMP2r ( div )
+    &o POP2 POP2 &oo POP2 #7fff JMP2r ( 7fff ; saturate on overflow )
@x16-signed-op ( x* y* f* -> f(x,y)* )
    STH2 LIT2r 0001
@ -245,7 +273,11 @@
    JMP2r
@x16-quotient ( x* y* -> x//y* )
-    DIV2 #80 SFT2 JMP2r
+    ;x16-quot-unsigned !x16-signed-op
@x16-quot-unsigned ( x* y* -> x//y* )
    DIV2 DUP2 #007f GTH2 ?{ #80 SFT2 JMP2r }
    POP2 #7fff JMP2r
@x16-remainder ( x* y* -> x%y* )
    DIV2k MUL2 SUB2 JMP2r
@ -311,16 +343,20 @@
        POP2r POP2r NIP2 JMP2r  ( s1* )
@x16-cos ( x* -> cos[x]* )
-    x16-pi/2 ADD2 ( fall-thru )
+    x16-pi/2 ,&x16-sin-cos-helper/offset STR2 !sin-cos-helper
@x16-sin ( x* -> sin[x]* )
    #0000 ,&x16-sin-cos-helper/offset STR2 ( fall-through )
@x16-sin-cos-helper
    DUP2 #8000 LTH2 ?&non-negative
    x16-negate x16-sin/non-negative !x16-negate
    &non-negative
    x16-pi*2 STH2            ( x       [2pi] )
    DUP2 STH2kr x16-quotient ( x x/2pi [2pi] )
-    STH2r x16-mul SUB2       ( x' ; 0 <= x' < 2pi )
+    DUP2 #1400 DIV2 STH2 SWP2r ( x x/2pi [adj* 2pi*] )
-
+    STH2r x16-mul STH2r ADD2 SUB2       ( x' ; 0 <= x' < 2pi )
    LIT2 [ &offset $2 ] ADD2
        DUP2 x16-3pi/2 LTH2 ?&c1
    ( -sin(2pi - x) ) x16-pi*2 SWP2 SUB2 x16-sin-q !x16-negate
    &c1 DUP2 x16-pi LTH2 ?&c2
@ -365,7 +401,8 @@
@x16-tan ( x* -> tan[x]* )
    x16-pi*2 STH2            ( x       [2pi] )
    DUP2 STH2kr x16-quotient ( x x/2pi [2pi] )
-    STH2r x16-mul SUB2       ( x' ; 0 <= x' < 2pi )
+    DUP2 #1400 DIV2 STH2 SWP2r ( x x/2pi [adj* 2pi*] )
    STH2r x16-mul STH2r ADD2 SUB2       ( x' ; 0 <= x' < 2pi )
    ( tan(pi/2) = tan(3pi/2) = error )
    DUP2 x16-3pi/2 EQU2 ?error
--- a/test-fix16.py
+++ b/test-fix16.py
@ -4,15 +4,16 @@ from math import ceil, copysign, cos, floor, log, sin, sqrt, tan
 from os import environ
 from random import randint
 from subprocess import Popen, PIPE, run
 from sys import argv
 def tosigned(x):
    return x if x < 32768 else x - 65536
 u8  = {'sz': 1 << 8,  'fmt': b'%02x'}
 u16 = {'sz': 1 << 16, 'fmt': b'%04x'}
-z16 = {'sz': 1 << 16, 'fmt': b'%04x'}
+z16 = {'sz': 1 << 16, 'fmt': b'%04x'} # non-zero
-p16 = {'sz': 1 << 16, 'fmt': b'%04x'}
+p16 = {'sz': 1 << 16, 'fmt': b'%04x'} # positive
-t16 = {'sz': 1 << 16, 'fmt': b'%04x'}
+t16 = {'sz': 1 << 16, 'fmt': b'%04x'} # tangent, must not be pi/2
 def eq(got, expected):
    return got == expected
@ -25,7 +26,7 @@ def releq(got0, expected0):
    else:
        error = abs(got - expected) / (abs(expected) + 0.001)
        return error < 0.01
-def abseq(got0, expected0):
+def sineq(got0, expected0):
    got, expected = tosigned(got0), tosigned(expected0)
    if (expected - 10) <= got and got <= (expected + 10):
        return True
@ -79,6 +80,24 @@ def test(p, trials, sym, args, out, f, eq=eq):
    else:
        print('%s failed %d/%d trials (%r)' % (name, fails, trials, cases))
 def fromfix(n):
    assert 0 <= n and n <= 65535
    if n >= 32768:
        res = (n - 65536) / 256
    else:
        res = n / 256
    return res
 bound = 32767 / 256
 def tofix(x):
    y = min(max(x, -bound), bound)
    if y < 0:
        res = int(ceil(65536 + y * 256))
    else:
        res = int(y * 256)
    return res % 65536
 def pipe():
    return Popen(['uxncli', 'run.rom'], stdin=PIPE, stdout=PIPE)
@ -87,11 +106,17 @@ def x16_add(x, y):
 def x16_sub(x, y):
    return (x - y) % 65536
 def x16_mul(x, y):
-    return ((x * y) // 256) % 65536
+    return tofix(fromfix(x) * fromfix(y))
 def x16_div(x, y):
-    return ((x * 256) // y) % 65536
+    return tofix(fromfix(x) / fromfix(y))
 def x16_quot(x, y):
-    return x16_div(x, y) & 0xff00
+    n = x16_div(x, y)
    if n < 0x7fff:
        return n & 0xff00
    elif n > 0x8001:
        return (n + 255) & 0xff00
    else:
        return n
 def x16_rem(x, y):
    return x % y
 def x16_is_whole(x):
@ -147,13 +172,13 @@ def x16_trunc8(x):
        return floor(x / 256)
 def main():
-    trials = 1000
+    trials = int(argv[1]) if argv[1:] else 100
    run(['uxnasm', 'test-fix16.tal', 'run.rom'])
    p = pipe()
    test(p, trials, b'+', [('x', u16), ('y', u16)], u16, x16_add)
    test(p, trials, b'-', [('x', u16), ('y', u16)], u16, x16_sub)
    test(p, trials, b'*', [('x', u16), ('y', u16)], u16, x16_mul)
-    test(p, trials, b'/', [('x', u16), ('y', z16)], u16, x16_div, eq=releq)
+    test(p, trials, b'/', [('x', u16), ('y', z16)], u16, x16_div)
    test(p, trials, b'\\', [('x', u16), ('y', z16)], u16, x16_quot)
    test(p, trials, b'%', [('x', u16), ('y', z16)], u16, x16_rem)
    test(p, trials, b'w', [('x', u16)], u8, x16_is_whole, eq=booleq)
@ -164,16 +189,18 @@ def main():
    test(p, trials, b'>', [('x', u16), ('y', u16)], u8, x16_gt)
    test(p, trials, b'{', [('x', u16), ('y', u16)], u8, x16_lteq)
    test(p, trials, b'}', [('x', u16), ('y', u16)], u8, x16_gteq)
    test(p, trials, b'r', [('x', p16)], u16, x16_sqrt, eq=releq)
    test(p, trials, b's', [('x', p16)], u16, x16_sin, eq=abseq)
    test(p, trials, b'c', [('x', p16)], u16, x16_cos, eq=abseq)
    test(p, trials, b't', [('x', t16)], u16, x16_tan, eq=taneq)
    test(p, trials, b'l', [('x', p16)], u16, x16_log, eq=releq)
    test(p, trials, b'F', [('x', u16)], u16, x16_floor)
    test(p, trials, b'C', [('x', u16)], u16, x16_ceil)
    test(p, trials, b'R', [('x', u16)], u16, x16_round)
    test(p, trials, b'8', [('x', u16)], u16, x16_trunc8)
    test(p, trials, b'T', [('x', u16)], u16, x16_trunc16)
    # the next five are known to be somewhat inaccurate and use
    # a "relaxed" equality predicate for testing purposes.
    test(p, trials, b'r', [('x', p16)], u16, x16_sqrt, eq=releq)
    test(p, trials, b's', [('x', p16)], u16, x16_sin, eq=sineq)
    test(p, trials, b'c', [('x', p16)], u16, x16_cos, eq=sineq)
    test(p, trials, b't', [('x', t16)], u16, x16_tan, eq=taneq)
    test(p, trials, b'l', [('x', p16)], u16, x16_log, eq=releq)
    p.stdin.write(b'\n\n')
    p.stdin.flush()
    p.stdin.close()