From 2fe1d7d7706c70568f2518289e117c21e411e838 Mon Sep 17 00:00:00 2001 From: d_m Date: Sun, 5 Nov 2023 20:17:26 -0500 Subject: [PATCH] major fixes to fix16 accuracy --- fix16.tal | 109 +++++++++++++++++++++++++++++++++----------------- test-fix16.py | 55 ++++++++++++++++++------- 2 files changed, 114 insertions(+), 50 deletions(-) diff --git a/fix16.tal b/fix16.tal index efc10bd..e8bf879 100644 --- a/fix16.tal +++ b/fix16.tal @@ -22,12 +22,17 @@ ( #0700 1792/256 7.0000 ) ( #7f00 32512/256 127.0000 ) ( #7fff 32767/256 127.9961 ) -( #8000 -32768/256 -128.0000 ) +( #8000 invalid invalid ) ( #8001 -32767/256 -127.9961 ) ( #8100 -32767/256 -127.0000 ) ( #ff00 -256/256 -1.0000 ) ( #ffff -1/256 -0.0039 ) ( ) +( due to the limited range operations saturate at the ) +( terminal values (i.e. #7fff and #8001). #8000 should ) +( never be generated, and can be considered a Nan value ) +( although this library does not use it as such. ) +( ) ( many 8.8 operations are equivalent to unsigned int16: ) ( * addition ) ( * subtraction ) @@ -60,7 +65,7 @@ ( - tan() is mostly supported ) ( - tan(#0192) and tan(#04b6) throw an error ) ( - a few tan() values are inaccurate due to range ) -( + values "next to" pi/2 and 3pi/2 are affected ) +( + values "next to" pi/2 and 3pi/2 are affected ) ( + tan(#0191) returns 127.996 not 227.785 ) ( + tan(#0193) returns -127.996 not -292.189 ) ( + etc. ) @@ -198,43 +203,66 @@ @x16-mul ( x* y* -- xy* ) ;x16-mul-unsigned !x16-signed-op -@x16-mul-unsigned ( x* y* -- xy* ) - DUP #00 EQU ?x16-mul-unsigned-rhs-whole - SWP2 DUP #00 EQU ?x16-mul-unsigned-rhs-whole - ,&y0 STR2 ,&x0 STR2 - #00 ,&x0 LDR #00 ,&y0 LDR MUL2 ( acc* ) - OVR ?&overflow SWP ( acc* ) - #00 ,&x1 LDR #00 ,&y0 LDR MUL2 ADD2 ( acc* ) - #00 ,&x0 LDR #00 ,&y1 LDR MUL2 ADD2 ( acc* ) - #00 ,&x1 LDR #00 ,&y1 LDR MUL2 #08 SFT2 ADD2 ( acc* ) - DUP2 #7fff GTH2 ?&overflow - JMP2r [ &x0 $1 &x1 $1 &y0 $1 &y1 $1 ] - &overflow POP2 #7fff JMP2r +@x16-mul8 ( x^ y^ -> xy* ) + #0000 SWP2 ROT SWP MUL2 JMP2r -@x16-mul-unsigned-rhs-whole ( x0_x1* y0_00* -- xy* ) - #08 SFT2 MUL2 #7fff !unsigned-min +@x16-mul-unsigned ( ab* cd* -> ac+ad+bc+bd* ) + OVR2 OVR2 STH2 STH2 ROT SWPr ROTr ( a c d b [c b a d] ) + SWP2 x16-mul8 DUP2 #007f GTH2 ?&o1 SWP ( d b acc* [c b a d] ) + SWP2 x16-mul8 #08 SFT2 ADD2 ( acc* [c b a d] ) + STH2r x16-mul8 ADD2 OVR #7f GTH ?&o3 ( acc* [c d] ) + STH2r x16-mul8 ADD2 OVR #7f GTH ?&o4 JMP2r ( acc* ) + &o1 POP2 POP2r &o3 POP2r &o4 POP2 #7fff JMP2r ( ; handle overflow ) @x16-div ( x* y* -- x/y* ) ;x16-div-unsigned !x16-signed-op +( The idea here is a bit complicated. ) +( ) +( First we look at `x y DIV2`. If that is >255 then we are ) +( going to overflow and we can return 7fff and be done. ) +( ) +( If not, we multiply that result by 256 and start looking ) +( for smaller and smaller fractions of y to star chipping ) +( away at the remainder, if any. ) +( ) +( However we want to avoid rounding errors caused by needlessly ) +( truncating y. For that reason, if the remainder is less than ) +( 0x8000 we will double it rather than diving y. Once it cannot ) +( be safely multiplied we will start dividing y by 2. After 16 ) +( rounds of multiplying the remainder or dividing the quotient ) +( we get our final answer. ) @x16-div-unsigned ( x* y* -> x/y* ) - DIV2k STH2k ( x y x/y [x/y] ) - LITr 80 SFT2r ( x y x/y [div=(x/y)<<8] ) - OVR2 STH2 ( x y x/y [y div] ) - MUL2 SUB2 ( x%y [y div] ) - STH2r LIT2r 0100 ( x%y y [0100 div] ) + DIV2k DUP2 #007f GTH2 ?&o ( x* y* x/y* ) + STH2k LITr 80 SFT2r ( x* y* x/y* [div=(x/y)<<8*] ) + OVR2 STH2 ( x* y* x/y* [div* y*] ) + MUL2 SUB2 ( x%y* [div* y*] ) + STH2r LIT2r 0100 ( x%y* y* [div* 0100*] ) + + ( We know rem < y, so start left-shifting rem. ) + &ploop ( rem* y* [div* s*] ) + STH2kr #0000 EQU2 ?&done ( rem* y* [div* s*] ; done when s=0 ) + OVR2 #7fff GTH2 ?&loop ( rem* y* [div* s*] ; rem too big, start shifting y ) + SWP2 #10 SFT2 SWP2 ( rem<<1* y* [div* s*] ) + LITr 01 SFT2r ( rem<<1* y* [div* s>>1*] ) + LTH2k ?&ploop ( rem<<1* y* [div* s>>1*] ; rem too small ) + SWP2 OVR2 SUB2 SWP2 ( rem<<1-y* y* [div* s>>1*] ) + DUP2r ROT2r ADD2r SWP2r ( rem<<1-y* y* [div+s>>1* s>>1*] ) + !&ploop ( rem<<1-y* y* [div+s>>1* s>>1*] ) + + ( We know rem < y, so start right-shifting y. ) + &loop ( rem* y* [div* s*] ) + STH2kr #0000 EQU2 ?&done ( rem* y* [div* s*] ; done when s=0 ) + #01 SFT2 LITr 01 SFT2r ( rem* y>>1* [div* s>>1*] ) + LTH2k ?&loop ( rem* y>>1* [div* s>>1**] ; rem too small ) + SWP2 OVR2 SUB2 SWP2 ( rem-y>>1* y>>1* [div* s>>1*] ) + DUP2r ROT2r ADD2r SWP2r ( rem-y>>1* y>>1* [div+s>>1* s>>1*] ) + !&loop ( rem-y>>1* y>>1* [div+s>>1* s>>1*] ) - ( We know x%y < y, so start right-shifting y. ) - &loop DUP2 #0000 EQU2 ?&done - #01 SFT2 LITr 01 SFT2r ( rem yi [shifti div] ) - LTH2k ,&loop JCN ( rem yi [shifti div] ) - SWP2 OVR2 SUB2 SWP2 ( rem-yi yi [shifti div] ) - DUP2r ROT2r ADD2r SWP2r ( rem-yi yi [shifti div+shifti] ) - !&loop ( rem-yi yi [shifti div+shifti] ) &done - - POP2 POP2 ( [shiftk div] ) - POP2r STH2r JMP2r ( div ) + POP2 POP2 POP2r STH2r ( div* ) + DUP2 #7fff GTH2 ?&oo JMP2r ( div* ) + &o POP2 POP2 &oo POP2 #7fff JMP2r ( 7fff ; saturate on overflow ) @x16-signed-op ( x* y* f* -> f(x,y)* ) STH2 LIT2r 0001 @@ -245,7 +273,11 @@ JMP2r @x16-quotient ( x* y* -> x//y* ) - DIV2 #80 SFT2 JMP2r + ;x16-quot-unsigned !x16-signed-op + +@x16-quot-unsigned ( x* y* -> x//y* ) + DIV2 DUP2 #007f GTH2 ?{ #80 SFT2 JMP2r } + POP2 #7fff JMP2r @x16-remainder ( x* y* -> x%y* ) DIV2k MUL2 SUB2 JMP2r @@ -311,16 +343,20 @@ POP2r POP2r NIP2 JMP2r ( s1* ) @x16-cos ( x* -> cos[x]* ) - x16-pi/2 ADD2 ( fall-thru ) + x16-pi/2 ,&x16-sin-cos-helper/offset STR2 !sin-cos-helper @x16-sin ( x* -> sin[x]* ) + #0000 ,&x16-sin-cos-helper/offset STR2 ( fall-through ) + +@x16-sin-cos-helper DUP2 #8000 LTH2 ?&non-negative x16-negate x16-sin/non-negative !x16-negate &non-negative x16-pi*2 STH2 ( x [2pi] ) DUP2 STH2kr x16-quotient ( x x/2pi [2pi] ) - STH2r x16-mul SUB2 ( x' ; 0 <= x' < 2pi ) - + DUP2 #1400 DIV2 STH2 SWP2r ( x x/2pi [adj* 2pi*] ) + STH2r x16-mul STH2r ADD2 SUB2 ( x' ; 0 <= x' < 2pi ) + LIT2 [ &offset $2 ] ADD2 DUP2 x16-3pi/2 LTH2 ?&c1 ( -sin(2pi - x) ) x16-pi*2 SWP2 SUB2 x16-sin-q !x16-negate &c1 DUP2 x16-pi LTH2 ?&c2 @@ -365,7 +401,8 @@ @x16-tan ( x* -> tan[x]* ) x16-pi*2 STH2 ( x [2pi] ) DUP2 STH2kr x16-quotient ( x x/2pi [2pi] ) - STH2r x16-mul SUB2 ( x' ; 0 <= x' < 2pi ) + DUP2 #1400 DIV2 STH2 SWP2r ( x x/2pi [adj* 2pi*] ) + STH2r x16-mul STH2r ADD2 SUB2 ( x' ; 0 <= x' < 2pi ) ( tan(pi/2) = tan(3pi/2) = error ) DUP2 x16-3pi/2 EQU2 ?error diff --git a/test-fix16.py b/test-fix16.py index 18be196..b168f06 100644 --- a/test-fix16.py +++ b/test-fix16.py @@ -4,15 +4,16 @@ from math import ceil, copysign, cos, floor, log, sin, sqrt, tan from os import environ from random import randint from subprocess import Popen, PIPE, run +from sys import argv def tosigned(x): return x if x < 32768 else x - 65536 u8 = {'sz': 1 << 8, 'fmt': b'%02x'} u16 = {'sz': 1 << 16, 'fmt': b'%04x'} -z16 = {'sz': 1 << 16, 'fmt': b'%04x'} -p16 = {'sz': 1 << 16, 'fmt': b'%04x'} -t16 = {'sz': 1 << 16, 'fmt': b'%04x'} +z16 = {'sz': 1 << 16, 'fmt': b'%04x'} # non-zero +p16 = {'sz': 1 << 16, 'fmt': b'%04x'} # positive +t16 = {'sz': 1 << 16, 'fmt': b'%04x'} # tangent, must not be pi/2 def eq(got, expected): return got == expected @@ -25,7 +26,7 @@ def releq(got0, expected0): else: error = abs(got - expected) / (abs(expected) + 0.001) return error < 0.01 -def abseq(got0, expected0): +def sineq(got0, expected0): got, expected = tosigned(got0), tosigned(expected0) if (expected - 10) <= got and got <= (expected + 10): return True @@ -79,6 +80,24 @@ def test(p, trials, sym, args, out, f, eq=eq): else: print('%s failed %d/%d trials (%r)' % (name, fails, trials, cases)) +def fromfix(n): + assert 0 <= n and n <= 65535 + if n >= 32768: + res = (n - 65536) / 256 + else: + res = n / 256 + return res + +bound = 32767 / 256 + +def tofix(x): + y = min(max(x, -bound), bound) + if y < 0: + res = int(ceil(65536 + y * 256)) + else: + res = int(y * 256) + return res % 65536 + def pipe(): return Popen(['uxncli', 'run.rom'], stdin=PIPE, stdout=PIPE) @@ -87,11 +106,17 @@ def x16_add(x, y): def x16_sub(x, y): return (x - y) % 65536 def x16_mul(x, y): - return ((x * y) // 256) % 65536 + return tofix(fromfix(x) * fromfix(y)) def x16_div(x, y): - return ((x * 256) // y) % 65536 + return tofix(fromfix(x) / fromfix(y)) def x16_quot(x, y): - return x16_div(x, y) & 0xff00 + n = x16_div(x, y) + if n < 0x7fff: + return n & 0xff00 + elif n > 0x8001: + return (n + 255) & 0xff00 + else: + return n def x16_rem(x, y): return x % y def x16_is_whole(x): @@ -147,13 +172,13 @@ def x16_trunc8(x): return floor(x / 256) def main(): - trials = 1000 + trials = int(argv[1]) if argv[1:] else 100 run(['uxnasm', 'test-fix16.tal', 'run.rom']) p = pipe() test(p, trials, b'+', [('x', u16), ('y', u16)], u16, x16_add) test(p, trials, b'-', [('x', u16), ('y', u16)], u16, x16_sub) test(p, trials, b'*', [('x', u16), ('y', u16)], u16, x16_mul) - test(p, trials, b'/', [('x', u16), ('y', z16)], u16, x16_div, eq=releq) + test(p, trials, b'/', [('x', u16), ('y', z16)], u16, x16_div) test(p, trials, b'\\', [('x', u16), ('y', z16)], u16, x16_quot) test(p, trials, b'%', [('x', u16), ('y', z16)], u16, x16_rem) test(p, trials, b'w', [('x', u16)], u8, x16_is_whole, eq=booleq) @@ -164,16 +189,18 @@ def main(): test(p, trials, b'>', [('x', u16), ('y', u16)], u8, x16_gt) test(p, trials, b'{', [('x', u16), ('y', u16)], u8, x16_lteq) test(p, trials, b'}', [('x', u16), ('y', u16)], u8, x16_gteq) - test(p, trials, b'r', [('x', p16)], u16, x16_sqrt, eq=releq) - test(p, trials, b's', [('x', p16)], u16, x16_sin, eq=abseq) - test(p, trials, b'c', [('x', p16)], u16, x16_cos, eq=abseq) - test(p, trials, b't', [('x', t16)], u16, x16_tan, eq=taneq) - test(p, trials, b'l', [('x', p16)], u16, x16_log, eq=releq) test(p, trials, b'F', [('x', u16)], u16, x16_floor) test(p, trials, b'C', [('x', u16)], u16, x16_ceil) test(p, trials, b'R', [('x', u16)], u16, x16_round) test(p, trials, b'8', [('x', u16)], u16, x16_trunc8) test(p, trials, b'T', [('x', u16)], u16, x16_trunc16) + # the next five are known to be somewhat inaccurate and use + # a "relaxed" equality predicate for testing purposes. + test(p, trials, b'r', [('x', p16)], u16, x16_sqrt, eq=releq) + test(p, trials, b's', [('x', p16)], u16, x16_sin, eq=sineq) + test(p, trials, b'c', [('x', p16)], u16, x16_cos, eq=sineq) + test(p, trials, b't', [('x', t16)], u16, x16_tan, eq=taneq) + test(p, trials, b'l', [('x', p16)], u16, x16_log, eq=releq) p.stdin.write(b'\n\n') p.stdin.flush() p.stdin.close()