major fixes to fix16 accuracy

This commit is contained in:
~d6 2023-11-05 20:17:26 -05:00
parent 1d0bb3da6e
commit 2fe1d7d770
2 changed files with 114 additions and 50 deletions

107
fix16.tal
View File

@ -22,12 +22,17 @@
( #0700 1792/256 7.0000 ) ( #0700 1792/256 7.0000 )
( #7f00 32512/256 127.0000 ) ( #7f00 32512/256 127.0000 )
( #7fff 32767/256 127.9961 ) ( #7fff 32767/256 127.9961 )
( #8000 -32768/256 -128.0000 ) ( #8000 invalid invalid )
( #8001 -32767/256 -127.9961 ) ( #8001 -32767/256 -127.9961 )
( #8100 -32767/256 -127.0000 ) ( #8100 -32767/256 -127.0000 )
( #ff00 -256/256 -1.0000 ) ( #ff00 -256/256 -1.0000 )
( #ffff -1/256 -0.0039 ) ( #ffff -1/256 -0.0039 )
( ) ( )
( due to the limited range operations saturate at the )
( terminal values (i.e. #7fff and #8001). #8000 should )
( never be generated, and can be considered a Nan value )
( although this library does not use it as such. )
( )
( many 8.8 operations are equivalent to unsigned int16: ) ( many 8.8 operations are equivalent to unsigned int16: )
( * addition ) ( * addition )
( * subtraction ) ( * subtraction )
@ -198,43 +203,66 @@
@x16-mul ( x* y* -- xy* ) @x16-mul ( x* y* -- xy* )
;x16-mul-unsigned !x16-signed-op ;x16-mul-unsigned !x16-signed-op
@x16-mul-unsigned ( x* y* -- xy* ) @x16-mul8 ( x^ y^ -> xy* )
DUP #00 EQU ?x16-mul-unsigned-rhs-whole #0000 SWP2 ROT SWP MUL2 JMP2r
SWP2 DUP #00 EQU ?x16-mul-unsigned-rhs-whole
,&y0 STR2 ,&x0 STR2
#00 ,&x0 LDR #00 ,&y0 LDR MUL2 ( acc* )
OVR ?&overflow SWP ( acc* )
#00 ,&x1 LDR #00 ,&y0 LDR MUL2 ADD2 ( acc* )
#00 ,&x0 LDR #00 ,&y1 LDR MUL2 ADD2 ( acc* )
#00 ,&x1 LDR #00 ,&y1 LDR MUL2 #08 SFT2 ADD2 ( acc* )
DUP2 #7fff GTH2 ?&overflow
JMP2r [ &x0 $1 &x1 $1 &y0 $1 &y1 $1 ]
&overflow POP2 #7fff JMP2r
@x16-mul-unsigned-rhs-whole ( x0_x1* y0_00* -- xy* ) @x16-mul-unsigned ( ab* cd* -> ac+ad+bc+bd* )
#08 SFT2 MUL2 #7fff !unsigned-min OVR2 OVR2 STH2 STH2 ROT SWPr ROTr ( a c d b [c b a d] )
SWP2 x16-mul8 DUP2 #007f GTH2 ?&o1 SWP ( d b acc* [c b a d] )
SWP2 x16-mul8 #08 SFT2 ADD2 ( acc* [c b a d] )
STH2r x16-mul8 ADD2 OVR #7f GTH ?&o3 ( acc* [c d] )
STH2r x16-mul8 ADD2 OVR #7f GTH ?&o4 JMP2r ( acc* )
&o1 POP2 POP2r &o3 POP2r &o4 POP2 #7fff JMP2r ( ; handle overflow )
@x16-div ( x* y* -- x/y* ) @x16-div ( x* y* -- x/y* )
;x16-div-unsigned !x16-signed-op ;x16-div-unsigned !x16-signed-op
( The idea here is a bit complicated. )
( )
( First we look at `x y DIV2`. If that is >255 then we are )
( going to overflow and we can return 7fff and be done. )
( )
( If not, we multiply that result by 256 and start looking )
( for smaller and smaller fractions of y to star chipping )
( away at the remainder, if any. )
( )
( However we want to avoid rounding errors caused by needlessly )
( truncating y. For that reason, if the remainder is less than )
( 0x8000 we will double it rather than diving y. Once it cannot )
( be safely multiplied we will start dividing y by 2. After 16 )
( rounds of multiplying the remainder or dividing the quotient )
( we get our final answer. )
@x16-div-unsigned ( x* y* -> x/y* ) @x16-div-unsigned ( x* y* -> x/y* )
DIV2k STH2k ( x y x/y [x/y] ) DIV2k DUP2 #007f GTH2 ?&o ( x* y* x/y* )
LITr 80 SFT2r ( x y x/y [div=(x/y)<<8] ) STH2k LITr 80 SFT2r ( x* y* x/y* [div=(x/y)<<8*] )
OVR2 STH2 ( x y x/y [y div] ) OVR2 STH2 ( x* y* x/y* [div* y*] )
MUL2 SUB2 ( x%y [y div] ) MUL2 SUB2 ( x%y* [div* y*] )
STH2r LIT2r 0100 ( x%y y [0100 div] ) STH2r LIT2r 0100 ( x%y* y* [div* 0100*] )
( We know rem < y, so start left-shifting rem. )
&ploop ( rem* y* [div* s*] )
STH2kr #0000 EQU2 ?&done ( rem* y* [div* s*] ; done when s=0 )
OVR2 #7fff GTH2 ?&loop ( rem* y* [div* s*] ; rem too big, start shifting y )
SWP2 #10 SFT2 SWP2 ( rem<<1* y* [div* s*] )
LITr 01 SFT2r ( rem<<1* y* [div* s>>1*] )
LTH2k ?&ploop ( rem<<1* y* [div* s>>1*] ; rem too small )
SWP2 OVR2 SUB2 SWP2 ( rem<<1-y* y* [div* s>>1*] )
DUP2r ROT2r ADD2r SWP2r ( rem<<1-y* y* [div+s>>1* s>>1*] )
!&ploop ( rem<<1-y* y* [div+s>>1* s>>1*] )
( We know rem < y, so start right-shifting y. )
&loop ( rem* y* [div* s*] )
STH2kr #0000 EQU2 ?&done ( rem* y* [div* s*] ; done when s=0 )
#01 SFT2 LITr 01 SFT2r ( rem* y>>1* [div* s>>1*] )
LTH2k ?&loop ( rem* y>>1* [div* s>>1**] ; rem too small )
SWP2 OVR2 SUB2 SWP2 ( rem-y>>1* y>>1* [div* s>>1*] )
DUP2r ROT2r ADD2r SWP2r ( rem-y>>1* y>>1* [div+s>>1* s>>1*] )
!&loop ( rem-y>>1* y>>1* [div+s>>1* s>>1*] )
( We know x%y < y, so start right-shifting y. )
&loop DUP2 #0000 EQU2 ?&done
#01 SFT2 LITr 01 SFT2r ( rem yi [shifti div] )
LTH2k ,&loop JCN ( rem yi [shifti div] )
SWP2 OVR2 SUB2 SWP2 ( rem-yi yi [shifti div] )
DUP2r ROT2r ADD2r SWP2r ( rem-yi yi [shifti div+shifti] )
!&loop ( rem-yi yi [shifti div+shifti] )
&done &done
POP2 POP2 POP2r STH2r ( div* )
POP2 POP2 ( [shiftk div] ) DUP2 #7fff GTH2 ?&oo JMP2r ( div* )
POP2r STH2r JMP2r ( div ) &o POP2 POP2 &oo POP2 #7fff JMP2r ( 7fff ; saturate on overflow )
@x16-signed-op ( x* y* f* -> f(x,y)* ) @x16-signed-op ( x* y* f* -> f(x,y)* )
STH2 LIT2r 0001 STH2 LIT2r 0001
@ -245,7 +273,11 @@
JMP2r JMP2r
@x16-quotient ( x* y* -> x//y* ) @x16-quotient ( x* y* -> x//y* )
DIV2 #80 SFT2 JMP2r ;x16-quot-unsigned !x16-signed-op
@x16-quot-unsigned ( x* y* -> x//y* )
DIV2 DUP2 #007f GTH2 ?{ #80 SFT2 JMP2r }
POP2 #7fff JMP2r
@x16-remainder ( x* y* -> x%y* ) @x16-remainder ( x* y* -> x%y* )
DIV2k MUL2 SUB2 JMP2r DIV2k MUL2 SUB2 JMP2r
@ -311,16 +343,20 @@
POP2r POP2r NIP2 JMP2r ( s1* ) POP2r POP2r NIP2 JMP2r ( s1* )
@x16-cos ( x* -> cos[x]* ) @x16-cos ( x* -> cos[x]* )
x16-pi/2 ADD2 ( fall-thru ) x16-pi/2 ,&x16-sin-cos-helper/offset STR2 !sin-cos-helper
@x16-sin ( x* -> sin[x]* ) @x16-sin ( x* -> sin[x]* )
#0000 ,&x16-sin-cos-helper/offset STR2 ( fall-through )
@x16-sin-cos-helper
DUP2 #8000 LTH2 ?&non-negative DUP2 #8000 LTH2 ?&non-negative
x16-negate x16-sin/non-negative !x16-negate x16-negate x16-sin/non-negative !x16-negate
&non-negative &non-negative
x16-pi*2 STH2 ( x [2pi] ) x16-pi*2 STH2 ( x [2pi] )
DUP2 STH2kr x16-quotient ( x x/2pi [2pi] ) DUP2 STH2kr x16-quotient ( x x/2pi [2pi] )
STH2r x16-mul SUB2 ( x' ; 0 <= x' < 2pi ) DUP2 #1400 DIV2 STH2 SWP2r ( x x/2pi [adj* 2pi*] )
STH2r x16-mul STH2r ADD2 SUB2 ( x' ; 0 <= x' < 2pi )
LIT2 [ &offset $2 ] ADD2
DUP2 x16-3pi/2 LTH2 ?&c1 DUP2 x16-3pi/2 LTH2 ?&c1
( -sin(2pi - x) ) x16-pi*2 SWP2 SUB2 x16-sin-q !x16-negate ( -sin(2pi - x) ) x16-pi*2 SWP2 SUB2 x16-sin-q !x16-negate
&c1 DUP2 x16-pi LTH2 ?&c2 &c1 DUP2 x16-pi LTH2 ?&c2
@ -365,7 +401,8 @@
@x16-tan ( x* -> tan[x]* ) @x16-tan ( x* -> tan[x]* )
x16-pi*2 STH2 ( x [2pi] ) x16-pi*2 STH2 ( x [2pi] )
DUP2 STH2kr x16-quotient ( x x/2pi [2pi] ) DUP2 STH2kr x16-quotient ( x x/2pi [2pi] )
STH2r x16-mul SUB2 ( x' ; 0 <= x' < 2pi ) DUP2 #1400 DIV2 STH2 SWP2r ( x x/2pi [adj* 2pi*] )
STH2r x16-mul STH2r ADD2 SUB2 ( x' ; 0 <= x' < 2pi )
( tan(pi/2) = tan(3pi/2) = error ) ( tan(pi/2) = tan(3pi/2) = error )
DUP2 x16-3pi/2 EQU2 ?error DUP2 x16-3pi/2 EQU2 ?error

View File

@ -4,15 +4,16 @@ from math import ceil, copysign, cos, floor, log, sin, sqrt, tan
from os import environ from os import environ
from random import randint from random import randint
from subprocess import Popen, PIPE, run from subprocess import Popen, PIPE, run
from sys import argv
def tosigned(x): def tosigned(x):
return x if x < 32768 else x - 65536 return x if x < 32768 else x - 65536
u8 = {'sz': 1 << 8, 'fmt': b'%02x'} u8 = {'sz': 1 << 8, 'fmt': b'%02x'}
u16 = {'sz': 1 << 16, 'fmt': b'%04x'} u16 = {'sz': 1 << 16, 'fmt': b'%04x'}
z16 = {'sz': 1 << 16, 'fmt': b'%04x'} z16 = {'sz': 1 << 16, 'fmt': b'%04x'} # non-zero
p16 = {'sz': 1 << 16, 'fmt': b'%04x'} p16 = {'sz': 1 << 16, 'fmt': b'%04x'} # positive
t16 = {'sz': 1 << 16, 'fmt': b'%04x'} t16 = {'sz': 1 << 16, 'fmt': b'%04x'} # tangent, must not be pi/2
def eq(got, expected): def eq(got, expected):
return got == expected return got == expected
@ -25,7 +26,7 @@ def releq(got0, expected0):
else: else:
error = abs(got - expected) / (abs(expected) + 0.001) error = abs(got - expected) / (abs(expected) + 0.001)
return error < 0.01 return error < 0.01
def abseq(got0, expected0): def sineq(got0, expected0):
got, expected = tosigned(got0), tosigned(expected0) got, expected = tosigned(got0), tosigned(expected0)
if (expected - 10) <= got and got <= (expected + 10): if (expected - 10) <= got and got <= (expected + 10):
return True return True
@ -79,6 +80,24 @@ def test(p, trials, sym, args, out, f, eq=eq):
else: else:
print('%s failed %d/%d trials (%r)' % (name, fails, trials, cases)) print('%s failed %d/%d trials (%r)' % (name, fails, trials, cases))
def fromfix(n):
assert 0 <= n and n <= 65535
if n >= 32768:
res = (n - 65536) / 256
else:
res = n / 256
return res
bound = 32767 / 256
def tofix(x):
y = min(max(x, -bound), bound)
if y < 0:
res = int(ceil(65536 + y * 256))
else:
res = int(y * 256)
return res % 65536
def pipe(): def pipe():
return Popen(['uxncli', 'run.rom'], stdin=PIPE, stdout=PIPE) return Popen(['uxncli', 'run.rom'], stdin=PIPE, stdout=PIPE)
@ -87,11 +106,17 @@ def x16_add(x, y):
def x16_sub(x, y): def x16_sub(x, y):
return (x - y) % 65536 return (x - y) % 65536
def x16_mul(x, y): def x16_mul(x, y):
return ((x * y) // 256) % 65536 return tofix(fromfix(x) * fromfix(y))
def x16_div(x, y): def x16_div(x, y):
return ((x * 256) // y) % 65536 return tofix(fromfix(x) / fromfix(y))
def x16_quot(x, y): def x16_quot(x, y):
return x16_div(x, y) & 0xff00 n = x16_div(x, y)
if n < 0x7fff:
return n & 0xff00
elif n > 0x8001:
return (n + 255) & 0xff00
else:
return n
def x16_rem(x, y): def x16_rem(x, y):
return x % y return x % y
def x16_is_whole(x): def x16_is_whole(x):
@ -147,13 +172,13 @@ def x16_trunc8(x):
return floor(x / 256) return floor(x / 256)
def main(): def main():
trials = 1000 trials = int(argv[1]) if argv[1:] else 100
run(['uxnasm', 'test-fix16.tal', 'run.rom']) run(['uxnasm', 'test-fix16.tal', 'run.rom'])
p = pipe() p = pipe()
test(p, trials, b'+', [('x', u16), ('y', u16)], u16, x16_add) test(p, trials, b'+', [('x', u16), ('y', u16)], u16, x16_add)
test(p, trials, b'-', [('x', u16), ('y', u16)], u16, x16_sub) test(p, trials, b'-', [('x', u16), ('y', u16)], u16, x16_sub)
test(p, trials, b'*', [('x', u16), ('y', u16)], u16, x16_mul) test(p, trials, b'*', [('x', u16), ('y', u16)], u16, x16_mul)
test(p, trials, b'/', [('x', u16), ('y', z16)], u16, x16_div, eq=releq) test(p, trials, b'/', [('x', u16), ('y', z16)], u16, x16_div)
test(p, trials, b'\\', [('x', u16), ('y', z16)], u16, x16_quot) test(p, trials, b'\\', [('x', u16), ('y', z16)], u16, x16_quot)
test(p, trials, b'%', [('x', u16), ('y', z16)], u16, x16_rem) test(p, trials, b'%', [('x', u16), ('y', z16)], u16, x16_rem)
test(p, trials, b'w', [('x', u16)], u8, x16_is_whole, eq=booleq) test(p, trials, b'w', [('x', u16)], u8, x16_is_whole, eq=booleq)
@ -164,16 +189,18 @@ def main():
test(p, trials, b'>', [('x', u16), ('y', u16)], u8, x16_gt) test(p, trials, b'>', [('x', u16), ('y', u16)], u8, x16_gt)
test(p, trials, b'{', [('x', u16), ('y', u16)], u8, x16_lteq) test(p, trials, b'{', [('x', u16), ('y', u16)], u8, x16_lteq)
test(p, trials, b'}', [('x', u16), ('y', u16)], u8, x16_gteq) test(p, trials, b'}', [('x', u16), ('y', u16)], u8, x16_gteq)
test(p, trials, b'r', [('x', p16)], u16, x16_sqrt, eq=releq)
test(p, trials, b's', [('x', p16)], u16, x16_sin, eq=abseq)
test(p, trials, b'c', [('x', p16)], u16, x16_cos, eq=abseq)
test(p, trials, b't', [('x', t16)], u16, x16_tan, eq=taneq)
test(p, trials, b'l', [('x', p16)], u16, x16_log, eq=releq)
test(p, trials, b'F', [('x', u16)], u16, x16_floor) test(p, trials, b'F', [('x', u16)], u16, x16_floor)
test(p, trials, b'C', [('x', u16)], u16, x16_ceil) test(p, trials, b'C', [('x', u16)], u16, x16_ceil)
test(p, trials, b'R', [('x', u16)], u16, x16_round) test(p, trials, b'R', [('x', u16)], u16, x16_round)
test(p, trials, b'8', [('x', u16)], u16, x16_trunc8) test(p, trials, b'8', [('x', u16)], u16, x16_trunc8)
test(p, trials, b'T', [('x', u16)], u16, x16_trunc16) test(p, trials, b'T', [('x', u16)], u16, x16_trunc16)
# the next five are known to be somewhat inaccurate and use
# a "relaxed" equality predicate for testing purposes.
test(p, trials, b'r', [('x', p16)], u16, x16_sqrt, eq=releq)
test(p, trials, b's', [('x', p16)], u16, x16_sin, eq=sineq)
test(p, trials, b'c', [('x', p16)], u16, x16_cos, eq=sineq)
test(p, trials, b't', [('x', t16)], u16, x16_tan, eq=taneq)
test(p, trials, b'l', [('x', p16)], u16, x16_log, eq=releq)
p.stdin.write(b'\n\n') p.stdin.write(b'\n\n')
p.stdin.flush() p.stdin.flush()
p.stdin.close() p.stdin.close()