major fixes to fix16 accuracy
This commit is contained in:
parent
1d0bb3da6e
commit
2fe1d7d770
109
fix16.tal
109
fix16.tal
|
@ -22,12 +22,17 @@
|
||||||
( #0700 1792/256 7.0000 )
|
( #0700 1792/256 7.0000 )
|
||||||
( #7f00 32512/256 127.0000 )
|
( #7f00 32512/256 127.0000 )
|
||||||
( #7fff 32767/256 127.9961 )
|
( #7fff 32767/256 127.9961 )
|
||||||
( #8000 -32768/256 -128.0000 )
|
( #8000 invalid invalid )
|
||||||
( #8001 -32767/256 -127.9961 )
|
( #8001 -32767/256 -127.9961 )
|
||||||
( #8100 -32767/256 -127.0000 )
|
( #8100 -32767/256 -127.0000 )
|
||||||
( #ff00 -256/256 -1.0000 )
|
( #ff00 -256/256 -1.0000 )
|
||||||
( #ffff -1/256 -0.0039 )
|
( #ffff -1/256 -0.0039 )
|
||||||
( )
|
( )
|
||||||
|
( due to the limited range operations saturate at the )
|
||||||
|
( terminal values (i.e. #7fff and #8001). #8000 should )
|
||||||
|
( never be generated, and can be considered a Nan value )
|
||||||
|
( although this library does not use it as such. )
|
||||||
|
( )
|
||||||
( many 8.8 operations are equivalent to unsigned int16: )
|
( many 8.8 operations are equivalent to unsigned int16: )
|
||||||
( * addition )
|
( * addition )
|
||||||
( * subtraction )
|
( * subtraction )
|
||||||
|
@ -60,7 +65,7 @@
|
||||||
( - tan() is mostly supported )
|
( - tan() is mostly supported )
|
||||||
( - tan(#0192) and tan(#04b6) throw an error )
|
( - tan(#0192) and tan(#04b6) throw an error )
|
||||||
( - a few tan() values are inaccurate due to range )
|
( - a few tan() values are inaccurate due to range )
|
||||||
( + values "next to" pi/2 and 3pi/2 are affected )
|
( + values "next to" pi/2 and 3pi/2 are affected )
|
||||||
( + tan(#0191) returns 127.996 not 227.785 )
|
( + tan(#0191) returns 127.996 not 227.785 )
|
||||||
( + tan(#0193) returns -127.996 not -292.189 )
|
( + tan(#0193) returns -127.996 not -292.189 )
|
||||||
( + etc. )
|
( + etc. )
|
||||||
|
@ -198,43 +203,66 @@
|
||||||
@x16-mul ( x* y* -- xy* )
|
@x16-mul ( x* y* -- xy* )
|
||||||
;x16-mul-unsigned !x16-signed-op
|
;x16-mul-unsigned !x16-signed-op
|
||||||
|
|
||||||
@x16-mul-unsigned ( x* y* -- xy* )
|
@x16-mul8 ( x^ y^ -> xy* )
|
||||||
DUP #00 EQU ?x16-mul-unsigned-rhs-whole
|
#0000 SWP2 ROT SWP MUL2 JMP2r
|
||||||
SWP2 DUP #00 EQU ?x16-mul-unsigned-rhs-whole
|
|
||||||
,&y0 STR2 ,&x0 STR2
|
|
||||||
#00 ,&x0 LDR #00 ,&y0 LDR MUL2 ( acc* )
|
|
||||||
OVR ?&overflow SWP ( acc* )
|
|
||||||
#00 ,&x1 LDR #00 ,&y0 LDR MUL2 ADD2 ( acc* )
|
|
||||||
#00 ,&x0 LDR #00 ,&y1 LDR MUL2 ADD2 ( acc* )
|
|
||||||
#00 ,&x1 LDR #00 ,&y1 LDR MUL2 #08 SFT2 ADD2 ( acc* )
|
|
||||||
DUP2 #7fff GTH2 ?&overflow
|
|
||||||
JMP2r [ &x0 $1 &x1 $1 &y0 $1 &y1 $1 ]
|
|
||||||
&overflow POP2 #7fff JMP2r
|
|
||||||
|
|
||||||
@x16-mul-unsigned-rhs-whole ( x0_x1* y0_00* -- xy* )
|
@x16-mul-unsigned ( ab* cd* -> ac+ad+bc+bd* )
|
||||||
#08 SFT2 MUL2 #7fff !unsigned-min
|
OVR2 OVR2 STH2 STH2 ROT SWPr ROTr ( a c d b [c b a d] )
|
||||||
|
SWP2 x16-mul8 DUP2 #007f GTH2 ?&o1 SWP ( d b acc* [c b a d] )
|
||||||
|
SWP2 x16-mul8 #08 SFT2 ADD2 ( acc* [c b a d] )
|
||||||
|
STH2r x16-mul8 ADD2 OVR #7f GTH ?&o3 ( acc* [c d] )
|
||||||
|
STH2r x16-mul8 ADD2 OVR #7f GTH ?&o4 JMP2r ( acc* )
|
||||||
|
&o1 POP2 POP2r &o3 POP2r &o4 POP2 #7fff JMP2r ( ; handle overflow )
|
||||||
|
|
||||||
@x16-div ( x* y* -- x/y* )
|
@x16-div ( x* y* -- x/y* )
|
||||||
;x16-div-unsigned !x16-signed-op
|
;x16-div-unsigned !x16-signed-op
|
||||||
|
|
||||||
|
( The idea here is a bit complicated. )
|
||||||
|
( )
|
||||||
|
( First we look at `x y DIV2`. If that is >255 then we are )
|
||||||
|
( going to overflow and we can return 7fff and be done. )
|
||||||
|
( )
|
||||||
|
( If not, we multiply that result by 256 and start looking )
|
||||||
|
( for smaller and smaller fractions of y to star chipping )
|
||||||
|
( away at the remainder, if any. )
|
||||||
|
( )
|
||||||
|
( However we want to avoid rounding errors caused by needlessly )
|
||||||
|
( truncating y. For that reason, if the remainder is less than )
|
||||||
|
( 0x8000 we will double it rather than diving y. Once it cannot )
|
||||||
|
( be safely multiplied we will start dividing y by 2. After 16 )
|
||||||
|
( rounds of multiplying the remainder or dividing the quotient )
|
||||||
|
( we get our final answer. )
|
||||||
@x16-div-unsigned ( x* y* -> x/y* )
|
@x16-div-unsigned ( x* y* -> x/y* )
|
||||||
DIV2k STH2k ( x y x/y [x/y] )
|
DIV2k DUP2 #007f GTH2 ?&o ( x* y* x/y* )
|
||||||
LITr 80 SFT2r ( x y x/y [div=(x/y)<<8] )
|
STH2k LITr 80 SFT2r ( x* y* x/y* [div=(x/y)<<8*] )
|
||||||
OVR2 STH2 ( x y x/y [y div] )
|
OVR2 STH2 ( x* y* x/y* [div* y*] )
|
||||||
MUL2 SUB2 ( x%y [y div] )
|
MUL2 SUB2 ( x%y* [div* y*] )
|
||||||
STH2r LIT2r 0100 ( x%y y [0100 div] )
|
STH2r LIT2r 0100 ( x%y* y* [div* 0100*] )
|
||||||
|
|
||||||
|
( We know rem < y, so start left-shifting rem. )
|
||||||
|
&ploop ( rem* y* [div* s*] )
|
||||||
|
STH2kr #0000 EQU2 ?&done ( rem* y* [div* s*] ; done when s=0 )
|
||||||
|
OVR2 #7fff GTH2 ?&loop ( rem* y* [div* s*] ; rem too big, start shifting y )
|
||||||
|
SWP2 #10 SFT2 SWP2 ( rem<<1* y* [div* s*] )
|
||||||
|
LITr 01 SFT2r ( rem<<1* y* [div* s>>1*] )
|
||||||
|
LTH2k ?&ploop ( rem<<1* y* [div* s>>1*] ; rem too small )
|
||||||
|
SWP2 OVR2 SUB2 SWP2 ( rem<<1-y* y* [div* s>>1*] )
|
||||||
|
DUP2r ROT2r ADD2r SWP2r ( rem<<1-y* y* [div+s>>1* s>>1*] )
|
||||||
|
!&ploop ( rem<<1-y* y* [div+s>>1* s>>1*] )
|
||||||
|
|
||||||
|
( We know rem < y, so start right-shifting y. )
|
||||||
|
&loop ( rem* y* [div* s*] )
|
||||||
|
STH2kr #0000 EQU2 ?&done ( rem* y* [div* s*] ; done when s=0 )
|
||||||
|
#01 SFT2 LITr 01 SFT2r ( rem* y>>1* [div* s>>1*] )
|
||||||
|
LTH2k ?&loop ( rem* y>>1* [div* s>>1**] ; rem too small )
|
||||||
|
SWP2 OVR2 SUB2 SWP2 ( rem-y>>1* y>>1* [div* s>>1*] )
|
||||||
|
DUP2r ROT2r ADD2r SWP2r ( rem-y>>1* y>>1* [div+s>>1* s>>1*] )
|
||||||
|
!&loop ( rem-y>>1* y>>1* [div+s>>1* s>>1*] )
|
||||||
|
|
||||||
( We know x%y < y, so start right-shifting y. )
|
|
||||||
&loop DUP2 #0000 EQU2 ?&done
|
|
||||||
#01 SFT2 LITr 01 SFT2r ( rem yi [shifti div] )
|
|
||||||
LTH2k ,&loop JCN ( rem yi [shifti div] )
|
|
||||||
SWP2 OVR2 SUB2 SWP2 ( rem-yi yi [shifti div] )
|
|
||||||
DUP2r ROT2r ADD2r SWP2r ( rem-yi yi [shifti div+shifti] )
|
|
||||||
!&loop ( rem-yi yi [shifti div+shifti] )
|
|
||||||
&done
|
&done
|
||||||
|
POP2 POP2 POP2r STH2r ( div* )
|
||||||
POP2 POP2 ( [shiftk div] )
|
DUP2 #7fff GTH2 ?&oo JMP2r ( div* )
|
||||||
POP2r STH2r JMP2r ( div )
|
&o POP2 POP2 &oo POP2 #7fff JMP2r ( 7fff ; saturate on overflow )
|
||||||
|
|
||||||
@x16-signed-op ( x* y* f* -> f(x,y)* )
|
@x16-signed-op ( x* y* f* -> f(x,y)* )
|
||||||
STH2 LIT2r 0001
|
STH2 LIT2r 0001
|
||||||
|
@ -245,7 +273,11 @@
|
||||||
JMP2r
|
JMP2r
|
||||||
|
|
||||||
@x16-quotient ( x* y* -> x//y* )
|
@x16-quotient ( x* y* -> x//y* )
|
||||||
DIV2 #80 SFT2 JMP2r
|
;x16-quot-unsigned !x16-signed-op
|
||||||
|
|
||||||
|
@x16-quot-unsigned ( x* y* -> x//y* )
|
||||||
|
DIV2 DUP2 #007f GTH2 ?{ #80 SFT2 JMP2r }
|
||||||
|
POP2 #7fff JMP2r
|
||||||
|
|
||||||
@x16-remainder ( x* y* -> x%y* )
|
@x16-remainder ( x* y* -> x%y* )
|
||||||
DIV2k MUL2 SUB2 JMP2r
|
DIV2k MUL2 SUB2 JMP2r
|
||||||
|
@ -311,16 +343,20 @@
|
||||||
POP2r POP2r NIP2 JMP2r ( s1* )
|
POP2r POP2r NIP2 JMP2r ( s1* )
|
||||||
|
|
||||||
@x16-cos ( x* -> cos[x]* )
|
@x16-cos ( x* -> cos[x]* )
|
||||||
x16-pi/2 ADD2 ( fall-thru )
|
x16-pi/2 ,&x16-sin-cos-helper/offset STR2 !sin-cos-helper
|
||||||
|
|
||||||
@x16-sin ( x* -> sin[x]* )
|
@x16-sin ( x* -> sin[x]* )
|
||||||
|
#0000 ,&x16-sin-cos-helper/offset STR2 ( fall-through )
|
||||||
|
|
||||||
|
@x16-sin-cos-helper
|
||||||
DUP2 #8000 LTH2 ?&non-negative
|
DUP2 #8000 LTH2 ?&non-negative
|
||||||
x16-negate x16-sin/non-negative !x16-negate
|
x16-negate x16-sin/non-negative !x16-negate
|
||||||
&non-negative
|
&non-negative
|
||||||
x16-pi*2 STH2 ( x [2pi] )
|
x16-pi*2 STH2 ( x [2pi] )
|
||||||
DUP2 STH2kr x16-quotient ( x x/2pi [2pi] )
|
DUP2 STH2kr x16-quotient ( x x/2pi [2pi] )
|
||||||
STH2r x16-mul SUB2 ( x' ; 0 <= x' < 2pi )
|
DUP2 #1400 DIV2 STH2 SWP2r ( x x/2pi [adj* 2pi*] )
|
||||||
|
STH2r x16-mul STH2r ADD2 SUB2 ( x' ; 0 <= x' < 2pi )
|
||||||
|
LIT2 [ &offset $2 ] ADD2
|
||||||
DUP2 x16-3pi/2 LTH2 ?&c1
|
DUP2 x16-3pi/2 LTH2 ?&c1
|
||||||
( -sin(2pi - x) ) x16-pi*2 SWP2 SUB2 x16-sin-q !x16-negate
|
( -sin(2pi - x) ) x16-pi*2 SWP2 SUB2 x16-sin-q !x16-negate
|
||||||
&c1 DUP2 x16-pi LTH2 ?&c2
|
&c1 DUP2 x16-pi LTH2 ?&c2
|
||||||
|
@ -365,7 +401,8 @@
|
||||||
@x16-tan ( x* -> tan[x]* )
|
@x16-tan ( x* -> tan[x]* )
|
||||||
x16-pi*2 STH2 ( x [2pi] )
|
x16-pi*2 STH2 ( x [2pi] )
|
||||||
DUP2 STH2kr x16-quotient ( x x/2pi [2pi] )
|
DUP2 STH2kr x16-quotient ( x x/2pi [2pi] )
|
||||||
STH2r x16-mul SUB2 ( x' ; 0 <= x' < 2pi )
|
DUP2 #1400 DIV2 STH2 SWP2r ( x x/2pi [adj* 2pi*] )
|
||||||
|
STH2r x16-mul STH2r ADD2 SUB2 ( x' ; 0 <= x' < 2pi )
|
||||||
|
|
||||||
( tan(pi/2) = tan(3pi/2) = error )
|
( tan(pi/2) = tan(3pi/2) = error )
|
||||||
DUP2 x16-3pi/2 EQU2 ?error
|
DUP2 x16-3pi/2 EQU2 ?error
|
||||||
|
|
|
@ -4,15 +4,16 @@ from math import ceil, copysign, cos, floor, log, sin, sqrt, tan
|
||||||
from os import environ
|
from os import environ
|
||||||
from random import randint
|
from random import randint
|
||||||
from subprocess import Popen, PIPE, run
|
from subprocess import Popen, PIPE, run
|
||||||
|
from sys import argv
|
||||||
|
|
||||||
def tosigned(x):
|
def tosigned(x):
|
||||||
return x if x < 32768 else x - 65536
|
return x if x < 32768 else x - 65536
|
||||||
|
|
||||||
u8 = {'sz': 1 << 8, 'fmt': b'%02x'}
|
u8 = {'sz': 1 << 8, 'fmt': b'%02x'}
|
||||||
u16 = {'sz': 1 << 16, 'fmt': b'%04x'}
|
u16 = {'sz': 1 << 16, 'fmt': b'%04x'}
|
||||||
z16 = {'sz': 1 << 16, 'fmt': b'%04x'}
|
z16 = {'sz': 1 << 16, 'fmt': b'%04x'} # non-zero
|
||||||
p16 = {'sz': 1 << 16, 'fmt': b'%04x'}
|
p16 = {'sz': 1 << 16, 'fmt': b'%04x'} # positive
|
||||||
t16 = {'sz': 1 << 16, 'fmt': b'%04x'}
|
t16 = {'sz': 1 << 16, 'fmt': b'%04x'} # tangent, must not be pi/2
|
||||||
|
|
||||||
def eq(got, expected):
|
def eq(got, expected):
|
||||||
return got == expected
|
return got == expected
|
||||||
|
@ -25,7 +26,7 @@ def releq(got0, expected0):
|
||||||
else:
|
else:
|
||||||
error = abs(got - expected) / (abs(expected) + 0.001)
|
error = abs(got - expected) / (abs(expected) + 0.001)
|
||||||
return error < 0.01
|
return error < 0.01
|
||||||
def abseq(got0, expected0):
|
def sineq(got0, expected0):
|
||||||
got, expected = tosigned(got0), tosigned(expected0)
|
got, expected = tosigned(got0), tosigned(expected0)
|
||||||
if (expected - 10) <= got and got <= (expected + 10):
|
if (expected - 10) <= got and got <= (expected + 10):
|
||||||
return True
|
return True
|
||||||
|
@ -79,6 +80,24 @@ def test(p, trials, sym, args, out, f, eq=eq):
|
||||||
else:
|
else:
|
||||||
print('%s failed %d/%d trials (%r)' % (name, fails, trials, cases))
|
print('%s failed %d/%d trials (%r)' % (name, fails, trials, cases))
|
||||||
|
|
||||||
|
def fromfix(n):
|
||||||
|
assert 0 <= n and n <= 65535
|
||||||
|
if n >= 32768:
|
||||||
|
res = (n - 65536) / 256
|
||||||
|
else:
|
||||||
|
res = n / 256
|
||||||
|
return res
|
||||||
|
|
||||||
|
bound = 32767 / 256
|
||||||
|
|
||||||
|
def tofix(x):
|
||||||
|
y = min(max(x, -bound), bound)
|
||||||
|
if y < 0:
|
||||||
|
res = int(ceil(65536 + y * 256))
|
||||||
|
else:
|
||||||
|
res = int(y * 256)
|
||||||
|
return res % 65536
|
||||||
|
|
||||||
def pipe():
|
def pipe():
|
||||||
return Popen(['uxncli', 'run.rom'], stdin=PIPE, stdout=PIPE)
|
return Popen(['uxncli', 'run.rom'], stdin=PIPE, stdout=PIPE)
|
||||||
|
|
||||||
|
@ -87,11 +106,17 @@ def x16_add(x, y):
|
||||||
def x16_sub(x, y):
|
def x16_sub(x, y):
|
||||||
return (x - y) % 65536
|
return (x - y) % 65536
|
||||||
def x16_mul(x, y):
|
def x16_mul(x, y):
|
||||||
return ((x * y) // 256) % 65536
|
return tofix(fromfix(x) * fromfix(y))
|
||||||
def x16_div(x, y):
|
def x16_div(x, y):
|
||||||
return ((x * 256) // y) % 65536
|
return tofix(fromfix(x) / fromfix(y))
|
||||||
def x16_quot(x, y):
|
def x16_quot(x, y):
|
||||||
return x16_div(x, y) & 0xff00
|
n = x16_div(x, y)
|
||||||
|
if n < 0x7fff:
|
||||||
|
return n & 0xff00
|
||||||
|
elif n > 0x8001:
|
||||||
|
return (n + 255) & 0xff00
|
||||||
|
else:
|
||||||
|
return n
|
||||||
def x16_rem(x, y):
|
def x16_rem(x, y):
|
||||||
return x % y
|
return x % y
|
||||||
def x16_is_whole(x):
|
def x16_is_whole(x):
|
||||||
|
@ -147,13 +172,13 @@ def x16_trunc8(x):
|
||||||
return floor(x / 256)
|
return floor(x / 256)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
trials = 1000
|
trials = int(argv[1]) if argv[1:] else 100
|
||||||
run(['uxnasm', 'test-fix16.tal', 'run.rom'])
|
run(['uxnasm', 'test-fix16.tal', 'run.rom'])
|
||||||
p = pipe()
|
p = pipe()
|
||||||
test(p, trials, b'+', [('x', u16), ('y', u16)], u16, x16_add)
|
test(p, trials, b'+', [('x', u16), ('y', u16)], u16, x16_add)
|
||||||
test(p, trials, b'-', [('x', u16), ('y', u16)], u16, x16_sub)
|
test(p, trials, b'-', [('x', u16), ('y', u16)], u16, x16_sub)
|
||||||
test(p, trials, b'*', [('x', u16), ('y', u16)], u16, x16_mul)
|
test(p, trials, b'*', [('x', u16), ('y', u16)], u16, x16_mul)
|
||||||
test(p, trials, b'/', [('x', u16), ('y', z16)], u16, x16_div, eq=releq)
|
test(p, trials, b'/', [('x', u16), ('y', z16)], u16, x16_div)
|
||||||
test(p, trials, b'\\', [('x', u16), ('y', z16)], u16, x16_quot)
|
test(p, trials, b'\\', [('x', u16), ('y', z16)], u16, x16_quot)
|
||||||
test(p, trials, b'%', [('x', u16), ('y', z16)], u16, x16_rem)
|
test(p, trials, b'%', [('x', u16), ('y', z16)], u16, x16_rem)
|
||||||
test(p, trials, b'w', [('x', u16)], u8, x16_is_whole, eq=booleq)
|
test(p, trials, b'w', [('x', u16)], u8, x16_is_whole, eq=booleq)
|
||||||
|
@ -164,16 +189,18 @@ def main():
|
||||||
test(p, trials, b'>', [('x', u16), ('y', u16)], u8, x16_gt)
|
test(p, trials, b'>', [('x', u16), ('y', u16)], u8, x16_gt)
|
||||||
test(p, trials, b'{', [('x', u16), ('y', u16)], u8, x16_lteq)
|
test(p, trials, b'{', [('x', u16), ('y', u16)], u8, x16_lteq)
|
||||||
test(p, trials, b'}', [('x', u16), ('y', u16)], u8, x16_gteq)
|
test(p, trials, b'}', [('x', u16), ('y', u16)], u8, x16_gteq)
|
||||||
test(p, trials, b'r', [('x', p16)], u16, x16_sqrt, eq=releq)
|
|
||||||
test(p, trials, b's', [('x', p16)], u16, x16_sin, eq=abseq)
|
|
||||||
test(p, trials, b'c', [('x', p16)], u16, x16_cos, eq=abseq)
|
|
||||||
test(p, trials, b't', [('x', t16)], u16, x16_tan, eq=taneq)
|
|
||||||
test(p, trials, b'l', [('x', p16)], u16, x16_log, eq=releq)
|
|
||||||
test(p, trials, b'F', [('x', u16)], u16, x16_floor)
|
test(p, trials, b'F', [('x', u16)], u16, x16_floor)
|
||||||
test(p, trials, b'C', [('x', u16)], u16, x16_ceil)
|
test(p, trials, b'C', [('x', u16)], u16, x16_ceil)
|
||||||
test(p, trials, b'R', [('x', u16)], u16, x16_round)
|
test(p, trials, b'R', [('x', u16)], u16, x16_round)
|
||||||
test(p, trials, b'8', [('x', u16)], u16, x16_trunc8)
|
test(p, trials, b'8', [('x', u16)], u16, x16_trunc8)
|
||||||
test(p, trials, b'T', [('x', u16)], u16, x16_trunc16)
|
test(p, trials, b'T', [('x', u16)], u16, x16_trunc16)
|
||||||
|
# the next five are known to be somewhat inaccurate and use
|
||||||
|
# a "relaxed" equality predicate for testing purposes.
|
||||||
|
test(p, trials, b'r', [('x', p16)], u16, x16_sqrt, eq=releq)
|
||||||
|
test(p, trials, b's', [('x', p16)], u16, x16_sin, eq=sineq)
|
||||||
|
test(p, trials, b'c', [('x', p16)], u16, x16_cos, eq=sineq)
|
||||||
|
test(p, trials, b't', [('x', t16)], u16, x16_tan, eq=taneq)
|
||||||
|
test(p, trials, b'l', [('x', p16)], u16, x16_log, eq=releq)
|
||||||
p.stdin.write(b'\n\n')
|
p.stdin.write(b'\n\n')
|
||||||
p.stdin.flush()
|
p.stdin.flush()
|
||||||
p.stdin.close()
|
p.stdin.close()
|
||||||
|
|
Loading…
Reference in New Issue