From e3905178a4ca64c6d6cb46c51e7c9e9382f35446 Mon Sep 17 00:00:00 2001 From: d6 Date: Sun, 9 Jan 2022 19:04:48 -0500 Subject: [PATCH] wip --- bfloat16.tal | 305 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 bfloat16.tal diff --git a/bfloat16.tal b/bfloat16.tal new file mode 100644 index 0000000..43bb380 --- /dev/null +++ b/bfloat16.tal @@ -0,0 +1,305 @@ +( bfloat16.tal ) +( ) +( This file implements the bfloat16 format. ) +( ) +( This differs from IEEE float-16 by providing more exponent bits ) +( in exchange for fewer mantissa bits. In other words it trades ) +( coarser precision for larger numerical range. ) +( ) +( The bfloat16 value uses 16-bits divided as follows: ) +( - sign (1 bit, 0-1) ) +( - exponent (8 bits, 0-255) ) +( - mantissa (7 bits, 0-127) ) +( ) +( Kinds of values: ) +( - zeros (exponent==0 mantissa==0) ) +( - subnormal (exponent==0 mantissa!=0) ) +( - infinities (exponent==255 mantissa==0) ) +( - nans (exponent==255 mantissa!=0) ) +( - normal (everything else) ) +( ) +( Equations: ) +( - normal = -1^sign * 2^(exponent - 127) * (1 + mantissa/128) ) +( - subnormal = -1^sign * 2^-126 * mantissa/128 ) +( (exponent ranges from 1 to 254 since 0 and 255 are special) ) +( ) +( VALUE SIGN EXPONENT MANTISSA NOTES ) +( 0 0 00000000 0000000 ) +( -0 1 00000000 0000000 mostly equivalent to zero ) +( 1 0 01111111 0000000 ) +( 2 0 10000000 0000000 ) +( 3 0 10000000 1000000 ) +( -1 1 01111111 0000000 ) +( 17 0 10000011 0001000 ) +( ~9.184e-41 0 00000000 0000001 smallest positive value ) +( ~1.689e38 0 11111110 1111111 largest finite value ) +( +inf 0 11111111 0000000 positive infinity ) +( -inf 1 11111111 0000000 negative infinity ) +( nan * 11111111 ******* lots of nans; * is wild ) +( ) +( This code doesn't distinguish between quiet and signaling NaNs. ) +( ) +( Bfloat16 values are emitted in a hexadecimal format: ) +( ) +( HEXADECIMAL SIGN EXPONENT MANTISSA DECIMAL ) +( 0x1.00p+00 1 10000000 0000000 1.0 ) +( 0x0.01p-7f 0 00000000 0000001 ~9.184e-41 ) +( -0x1.80p+02 1 10000010 1000000 -6.0 ) +( 0x1.c0p+02 0 10000010 1100000 7.0 ) +( ) +( Eventually I'd like to display integral part of the number ) +( in a more natural way but the 1.xx format is OK for now. ) +( ) +( For consistency zeros are emitted as 0x00p+00 and -0x00p+00. ) +( Infinities are "inf" and "-inf" and NaN is "nan". ) + +%EMIT { #18 DEO } +%SPACE { #20 EMIT } +%NEWLINE { #0a EMIT } +%DEBUG { #ff #0e DEO } + +|0100 +( #01 ;byte-to-bf16 JSR2 ;test JSR2 + #02 ;byte-to-bf16 JSR2 ;test JSR2 ) + ( #437d -> 0 01010110 1111101 ) + ( #437c -> 0 01010110 1111100 ) +( #00 #86 #7f ;bf16-join JSR2 ;emit-bf16 JSR2 NEWLINE ) +( #ff ;byte-to-bf16 JSR2 ;test JSR2 ) +( #ff ;byte-to-bf16 JSR2 #01 ;round-shift JSR2 ;test JSR2 + #03 ;byte-to-bf16 JSR2 ;test JSR2 + #7f80 ;test JSR2 + #ff80 ;test JSR2 + #ff81 ;test JSR2 + #0000 ;test JSR2 + #8000 ;test JSR2 + #0001 ;test JSR2 + #8001 ;test JSR2 + #3f80 ;test JSR2 + #bf80 ;test JSR2 + #4000 ;test JSR2 + #4080 ;test JSR2 ) + #00 #00 DIV ( exit ) + BRK + +@test ( x* -> ) + DUP2 ;emit-u16 JSR2 SPACE LIT '- EMIT LIT '> EMIT SPACE ;emit-bf16 JSR2 NEWLINE JMP2r + +@emit-digit ( d^ -> ) + DUP #0a LTH + ,<-10 JCN #27 ADD + <-10 #30 ADD EMIT + JMP2r + +@emit-u8 ( n^ -> ) + DUP #04 SFT ;emit-digit JSR2 + #0f AND ;emit-digit JSR2 + JMP2r + +@emit-u16 ( x* -> ) + SWP ;emit-u8 JSR2 + ;emit-u8 JSR2 + JMP2r + +@emit-s8 ( x^ -> ) + DUP #07 SFT ,&is-negative JCN LIT '+ EMIT ;emit-u8 JSR2 JMP2r + &is-negative LIT '- EMIT #7f AND #80 SWP SUB ;emit-u8 JSR2 JMP2r + +@emit-s16 ( x* -> ) + DUP2 #0f SFT2 SWP POP ,&is-negative JCN LIT '+ EMIT ;emit-u16 JSR2 JMP2r + &is-negative LIT '- EMIT #7fff AND2 #8000 SWP2 SUB2 ;emit-u16 JSR2 JMP2r + +@emit-bf16 ( x* -> ) + ;bf16-split JSR2 ( sgn exp mnt ) + + ( sentinel or value ) + OVR #ff NEQ ,&non-sentinal JCN + ,&is-nan JCN POP #00 EQU ,&pos-inf JCN LIT '- EMIT + &pos-inf LIT 'i EMIT LIT 'n EMIT LIT 'f EMIT JMP2r + &is-nan LIT 'n EMIT LIT 'a EMIT LIT 'n EMIT JMP2r + + ( zero or non-zero ) + &non-sentinal DUP2 ORA ,&non-zero JCN + POP2 ,&is-negative-zero JCN ,&zero-suffix JMP + &is-negative-zero LIT '- EMIT + &zero-suffix LIT '0 EMIT LIT 'x EMIT LIT '0 EMIT LIT '. EMIT + #00 ;emit-u8 JSR2 LIT 'p EMIT #00 ;emit-s8 JSR2 JMP2r + + ( normal or subnormal ) + &non-zero ROT ,&is-negative JCN ,&post-sgn JMP + &is-negative LIT '- EMIT + &post-sgn LIT '0 EMIT LIT 'x EMIT + OVR ,&is-normal JCN LIT '0 ,&suffix JMP &is-normal LIT '1 + &suffix EMIT LIT '. EMIT ;emit-u8 JSR2 + LIT 'p EMIT #7f SUB ;emit-s8 JSR2 + JMP2r + +@bf16-join ( sgn^ exp^ mta^ -> x* ) + STH #00 #01 SFT2 ( sgn^ exp* [mta^] ) + ROT #00 #01 SFT2 NIP #00 ORA2 ( sgn|exp* [mta^] ) + #00 STHr ORA2 ( sgn|exp|mta* ) + JMP2r + +( sgn: 0-1, exp: 0-255, mta: 0-127 ) +@bf16-split ( x* -> sgn^ exp^ mta^ ) + OVR #07 SFT STH ( xhi xlo [sgn] ) + #10 SFT2 SWP STHr ( mnt<1 exp sgn ) + SWP ( mnt<1 exp sgn ) + ROT ( exp sgn mnt<1 ) + #01 SFT ( sgn exp mnt ) + JMP2r + +%SIGN { POP #07 SFT } +%EXPONENT { #10 SFT2 POP } +%MANTISSA { NIP #7f AND } + +( returns sign: #00 or #01 ) +@sign ( x* -> sgn^ ) + SIGN JMP2r + +( returns exp: #00 to #ff ) +@exponent ( x* -> exp^ ) + EXPONENT JMP2r + +( returns mta: #00 to #7f ) +@mantissa ( x* -> mta^ ) + MANTISSA JMP2r + +( returns full mta: #00 to #ff ) +( normal numbers will be >= #80 ) +( subnormal numbers will be < #80 ) +@full-mantissa ( x* -> fmta^ ) + DUP2 MANTISSA STH + EXPONENT ,&is-normal JCN STHr JMP2r + &is-normal #80 STHr ORA JMP2r + +@negate-bf16 ( x* -> z* ) + #8000 EOR2 JMP2r + +@is-zero ( x* -> bool^ ) + #7fff AND2 #0000 EQU2 + +@non-zero ( x* -> bool^ ) + #7fff AND2 #0000 NEQ2 + +( Shift mantissa m right by n bits, with rounding ) +( ) +( We round differently depending on the value to be lost: ) +( ) +( 1. If the bits to be removed are > 0.5 we round up ) +( 2. If the bits to be removed are < 0.5 we round down ) +( 3. If the bits to be removed are = 0.5 we: ) +( a. Round up if doing so produces an even mantissa ) +( b. Round down if diong so produces an even mantissa ) +( ) +( This method is useful when adding two values that have ) +( different exponents. We will want to truncate the value ) +( with the smaller exponent to try to shift the mantissa ) +( into the range of the larger value. ) +( ) +( It's important to remember to include the mantissa's ) +( leading one value (if any) before calling this method ) +@round-shift ( mta^ n^ -> z* ) + STH2k ( mta n [mta n] ) + #08 SWP SUB ( mta 8-n [mta n] ) + STHk ( mta 8-n [8-n mta n] ) + #7f SWP SFT ( mta mask=7f>>(8-n) ) + AND ( mta&mask ) + STHr #01 SWP #10 MUL SFT ( mta&mask lim=1<<(8-n) [mta n] ) + DUP2 LTH ,&rnd-down JCN ( masked limit [mta n] ) + GTH ,&rnd-up JCN ( [mta n] ) + ( round-to-even ) + STH2r #01 SUB SFT ( mta>>(n-1) ) + INC #01 SFT ( (mta>>(n-1)+1)>>1 ) + JMP2r + &rnd-down ( masked limit [mta n] ) + POP2 STH2r SFT JMP2r + &rnd-up ( [mta n] ) + STH2r SFT INC JMP2r + +@is-nan ( x* -> bool^ ) + SWP ( xlo xhi ) + #7f ANDk ( xlo xhi 7f xhi&7f ) + EQU ( xlo xhi xhi&7f=7f ) + NIP SWP #00 NEQ ( xhi&7f=7f xlo!=0 ) + AND JMP2r + +@non-nan ( x* -> bool^ ) + #00 EQU STH ( xhi [xlo=0] ) + #7f ANDk NEQ ( xhi 7f!=xhi&7f [xlo=0] ) + NIP STHr ORA ( 7f!=xhi&7f|xlo=0 ) + JMP2r + +@add-bf16 ( x* y* -> z* ) + DUP2 ;non-nan + +@byte-to-bf16 ( n^ -> x* ) + #86 SWP ( exp n ) + &loop + DUP #7f GTH ,&ready JCN + #10 SFT SWP #01 SUB SWP + ,&loop JMP + &ready + #7f AND STH ( exp [n&7f] ) + #00 #01 SFT2 #00 STHr ORA2 + JMP2r + +( rules: ) +( 1. nan = nan is false ) +( 2. x = x is true ) +( 3. (x = y) is (y = x) ) +( 4. -0 = +0 is true ) +@eq-bf16 ( x* y* -> bool^ ) + DUP2 ;non-nan JSR2 STH SWP2 ( is y not nan? ) + DUP2 ;non-nan JSR2 STH SWP2 ( is x not nan? ) + STH2r ORA ,¬-nan JCN ( is either x or y not nan? ) + POP2 POP2 #00 JMP2r ( else return false ) + ¬-nan + DUP2 ;non-zero JSR2 ,¬-zero JCN ( is y non-zero? ) + POP2 ;is-zero JSR2 JMP2r ( if y is zero, return x-is-zero ) + ¬-zero + EQU2 JMP2r ( if not nan or zero, standard comparison ) + +@ne-bf16 ( x* y* -> bool^ ) + ;eq-bf16 JSR2 #00 EQU JMP2r + +( rules for sentinels (in order): ) +( 1. x < x is false ) +( 2a. nan < x is false ) +( 2b. x < nan is false ) +( 3a. x < +inf is true ) +( 3b. +inf < x is false ) +( 4a. -inf < x is true ) +( 4b. x < -inf is false ) +( 5. -0 < +0 is false ) +( 6. -x < +x (or 0) ) +( 7. 0 < +x ) +( 8. x*2^p < y*2^q if p < q ) +( 9. x*2^p < y*2^p if x < y ) +@lt-bf16 ( x* y* -> bool^ ) + ,&y STR2 ,&x STR2 + DUP2 ;non-nan JSR2 STH SWP2 ( is y not nan? ) + DUP2 ;non-nan JSR2 STH SWP2 ( is x not nan? ) + STH2r ORA ,¬-nan JCN ( is either x or y not nan? ) + POP2 POP2 #00 JMP2r ( else return false ) + ¬-nan + DUP2 ;non-zero JSR2 STH SWP2 ( is y non-zero? ) + DUP2 ;non-zero JSR2 STH SWP2 ( is x non-zero? ) + STH2r ORA ,¬-zero JCN ( both x and y non-zero? ) + POP2 POP2 #00 JMP2r + ¬-zero + DUP2 ;sign JSR2 STH SWP2 + DUP2 ;sign JSR2 STH SWP2 + STH2r EQUk ,&same-sign JCN + GTH STH POP2 POP2 STHr JMP2r + [ &x $2 &y $2 ] + &same-sign + DUP2 ;exponent JSR2 STH SWP2 + DUP2 ;exponent JSR2 STH SWP2 + STH2r EQUk ,&exp-eq JCN + LTH STH POP2 POP2 STHr JMP2r + &exp-eq + POP2 + ;mantissa JSR2 STH + ;exponent JSR2 STHr + LTH JMP2r + \ No newline at end of file