tuklib_integer: Improve unaligned memory access.
Now memcpy() or GNU C packed structs for unaligned access instead of type punning. See the comment in this commit for details. Avoiding type punning with unaligned access is needed to silence gcc -fsanitize=undefined. New functions: unaliged_readXXne and unaligned_writeXXne where XX is 16, 32, or 64.
This commit is contained in:
parent
596ed3de44
commit
29afef0348
|
@ -42,6 +42,7 @@
|
||||||
#define TUKLIB_INTEGER_H
|
#define TUKLIB_INTEGER_H
|
||||||
|
|
||||||
#include "tuklib_common.h"
|
#include "tuklib_common.h"
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
|
@ -274,61 +275,211 @@ write64ne(uint8_t *buf, uint64_t num)
|
||||||
// Unaligned reads and writes //
|
// Unaligned reads and writes //
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
|
|
||||||
|
// The traditional way of casting e.g. *(const uint16_t *)uint8_pointer
|
||||||
|
// is bad (at least) because compilers can emit vector instructions that
|
||||||
|
// require aligned pointers even if non-vector instructions work with
|
||||||
|
// unaligned pointers.
|
||||||
|
//
|
||||||
|
// Using memcpy() is the standard compliant way to do unaligned access.
|
||||||
|
// Many modern compilers inline it so there is no function call overhead.
|
||||||
|
//
|
||||||
|
// However, it seems that some compilers generate better code with a cast
|
||||||
|
// to a packed struct than with memcpy():
|
||||||
|
// - Old GCC versions (early 4.x and older) on x86
|
||||||
|
// - GCC <= 8.2 (and possibly newer) on ARMv5 (but ARMv5 is old and maybe
|
||||||
|
// doesn't matter so much)
|
||||||
|
// - GCC <= 5.x on ARMv7 (on 4.x neither is great but packed is less bad)
|
||||||
|
// - Intel C Compiler <= 19 (and possibly newer)
|
||||||
|
//
|
||||||
|
// GCC on ARMv6 is weird:
|
||||||
|
// - GCC >= 6.x is better with memcpy() than with a packed struct.
|
||||||
|
// - On GCC < 6 neither method is good, but packed seems less bad.
|
||||||
|
//
|
||||||
|
// https://gcc.godbolt.org/ was useful for seeing what kind of code is
|
||||||
|
// generated by different compilers on different archs. Note that one
|
||||||
|
// may need to try a little less trivial code than than these functions
|
||||||
|
// alone to spot differences. For example this is better with packed method
|
||||||
|
// on Intel C Compiler 19:
|
||||||
|
//
|
||||||
|
// int foo(const uint8_t *a, const uint8_t *b)
|
||||||
|
// {
|
||||||
|
// return unaligned_read16ne(a) == unaligned_read16ne(b);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Based on the above information, prefer the memcpy() method in
|
||||||
|
// general (including all Clang versions), but use the packed struct
|
||||||
|
// with GCC 5.x and older and with the Intel C Compiler. This isn't
|
||||||
|
// optimal but at least it covers some known special cases.
|
||||||
|
#if defined(__GNUC__) && !defined(__clang__) \
|
||||||
|
&& (__GNUC__ < 6 || defined(__INTEL_COMPILER))
|
||||||
|
# define TUKLIB_UNALIGNED_WITH_PACKED 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
static inline uint16_t
|
||||||
|
unaligned_read16ne(const uint8_t *buf)
|
||||||
|
{
|
||||||
|
#ifdef TUKLIB_UNALIGNED_WITH_PACKED
|
||||||
|
struct __attribute__((__packed__)) s { uint16_t v; };
|
||||||
|
const struct s *p = (const struct s *)buf;
|
||||||
|
return p->v;
|
||||||
|
#else
|
||||||
|
uint16_t num;
|
||||||
|
memcpy(&num, buf, sizeof(num));
|
||||||
|
return num;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
unaligned_read32ne(const uint8_t *buf)
|
||||||
|
{
|
||||||
|
#ifdef TUKLIB_UNALIGNED_WITH_PACKED
|
||||||
|
struct __attribute__((__packed__)) s { uint32_t v; };
|
||||||
|
const struct s *p = (const struct s *)buf;
|
||||||
|
return p->v;
|
||||||
|
#else
|
||||||
|
uint32_t num;
|
||||||
|
memcpy(&num, buf, sizeof(num));
|
||||||
|
return num;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline uint64_t
|
||||||
|
unaligned_read64ne(const uint8_t *buf)
|
||||||
|
{
|
||||||
|
#ifdef TUKLIB_UNALIGNED_WITH_PACKED
|
||||||
|
struct __attribute__((__packed__)) s { uint64_t v; };
|
||||||
|
const struct s *p = (const struct s *)buf;
|
||||||
|
return p->v;
|
||||||
|
#else
|
||||||
|
uint64_t num;
|
||||||
|
memcpy(&num, buf, sizeof(num));
|
||||||
|
return num;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
unaligned_write16ne(uint8_t *buf, uint16_t num)
|
||||||
|
{
|
||||||
|
#ifdef TUKLIB_UNALIGNED_WITH_PACKED
|
||||||
|
struct __attribute__((__packed__)) s { uint16_t v; };
|
||||||
|
struct s *p = (struct s *)buf;
|
||||||
|
p->v = num;
|
||||||
|
#else
|
||||||
|
memcpy(buf, &num, sizeof(num));
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
unaligned_write32ne(uint8_t *buf, uint32_t num)
|
||||||
|
{
|
||||||
|
#ifdef TUKLIB_UNALIGNED_WITH_PACKED
|
||||||
|
struct __attribute__((__packed__)) s { uint32_t v; };
|
||||||
|
struct s *p = (struct s *)buf;
|
||||||
|
p->v = num;
|
||||||
|
#else
|
||||||
|
memcpy(buf, &num, sizeof(num));
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
unaligned_write64ne(uint8_t *buf, uint64_t num)
|
||||||
|
{
|
||||||
|
#ifdef TUKLIB_UNALIGNED_WITH_PACKED
|
||||||
|
struct __attribute__((__packed__)) s { uint64_t v; };
|
||||||
|
struct s *p = (struct s *)buf;
|
||||||
|
p->v = num;
|
||||||
|
#else
|
||||||
|
memcpy(buf, &num, sizeof(num));
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
|
// NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
|
||||||
// 32-bit unaligned integer loads and stores. It's possible that 64-bit
|
// 32-bit unaligned integer loads and stores. It's possible that 64-bit
|
||||||
// unaligned access doesn't work or is slower than byte-by-byte access.
|
// unaligned access doesn't work or is slower than byte-by-byte access.
|
||||||
// Since unaligned 64-bit is probably not needed as often as 16-bit or
|
// Since unaligned 64-bit is probably not needed as often as 16-bit or
|
||||||
// 32-bit, we simply don't support 64-bit unaligned access for now.
|
// 32-bit, we simply don't support 64-bit unaligned access for now.
|
||||||
#ifdef TUKLIB_FAST_UNALIGNED_ACCESS
|
|
||||||
# define unaligned_read16be read16be
|
|
||||||
# define unaligned_read16le read16le
|
|
||||||
# define unaligned_read32be read32be
|
|
||||||
# define unaligned_read32le read32le
|
|
||||||
# define unaligned_write16be write16be
|
|
||||||
# define unaligned_write16le write16le
|
|
||||||
# define unaligned_write32be write32be
|
|
||||||
# define unaligned_write32le write32le
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
static inline uint16_t
|
static inline uint16_t
|
||||||
unaligned_read16be(const uint8_t *buf)
|
unaligned_read16be(const uint8_t *buf)
|
||||||
{
|
{
|
||||||
|
#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
||||||
|
return conv16be(unaligned_read16ne(buf));
|
||||||
|
#else
|
||||||
uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
|
uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
|
||||||
return num;
|
return num;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline uint16_t
|
static inline uint16_t
|
||||||
unaligned_read16le(const uint8_t *buf)
|
unaligned_read16le(const uint8_t *buf)
|
||||||
{
|
{
|
||||||
|
#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
||||||
|
return conv16le(unaligned_read16ne(buf));
|
||||||
|
#else
|
||||||
uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
|
uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
|
||||||
return num;
|
return num;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
unaligned_read32be(const uint8_t *buf)
|
unaligned_read32be(const uint8_t *buf)
|
||||||
{
|
{
|
||||||
|
#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
||||||
|
return conv32be(unaligned_read32ne(buf));
|
||||||
|
#else
|
||||||
uint32_t num = (uint32_t)buf[0] << 24;
|
uint32_t num = (uint32_t)buf[0] << 24;
|
||||||
num |= (uint32_t)buf[1] << 16;
|
num |= (uint32_t)buf[1] << 16;
|
||||||
num |= (uint32_t)buf[2] << 8;
|
num |= (uint32_t)buf[2] << 8;
|
||||||
num |= (uint32_t)buf[3];
|
num |= (uint32_t)buf[3];
|
||||||
return num;
|
return num;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
unaligned_read32le(const uint8_t *buf)
|
unaligned_read32le(const uint8_t *buf)
|
||||||
{
|
{
|
||||||
|
#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
||||||
|
return conv32le(unaligned_read32ne(buf));
|
||||||
|
#else
|
||||||
uint32_t num = (uint32_t)buf[0];
|
uint32_t num = (uint32_t)buf[0];
|
||||||
num |= (uint32_t)buf[1] << 8;
|
num |= (uint32_t)buf[1] << 8;
|
||||||
num |= (uint32_t)buf[2] << 16;
|
num |= (uint32_t)buf[2] << 16;
|
||||||
num |= (uint32_t)buf[3] << 24;
|
num |= (uint32_t)buf[3] << 24;
|
||||||
return num;
|
return num;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
||||||
|
// Like in the aligned case, these need to be macros.
|
||||||
|
# define unaligned_write16be(buf, num) \
|
||||||
|
unaligned_write16ne(buf, conv16be(num))
|
||||||
|
# define unaligned_write32be(buf, num) \
|
||||||
|
unaligned_write32ne(buf, conv32be(num))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
||||||
|
# define unaligned_write16le(buf, num) \
|
||||||
|
unaligned_write16ne(buf, conv16le(num))
|
||||||
|
# define unaligned_write32le(buf, num) \
|
||||||
|
unaligned_write32ne(buf, conv32le(num))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef unaligned_write16be
|
||||||
static inline void
|
static inline void
|
||||||
unaligned_write16be(uint8_t *buf, uint16_t num)
|
unaligned_write16be(uint8_t *buf, uint16_t num)
|
||||||
{
|
{
|
||||||
|
@ -336,8 +487,10 @@ unaligned_write16be(uint8_t *buf, uint16_t num)
|
||||||
buf[1] = (uint8_t)num;
|
buf[1] = (uint8_t)num;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef unaligned_write16le
|
||||||
static inline void
|
static inline void
|
||||||
unaligned_write16le(uint8_t *buf, uint16_t num)
|
unaligned_write16le(uint8_t *buf, uint16_t num)
|
||||||
{
|
{
|
||||||
|
@ -345,8 +498,10 @@ unaligned_write16le(uint8_t *buf, uint16_t num)
|
||||||
buf[1] = (uint8_t)(num >> 8);
|
buf[1] = (uint8_t)(num >> 8);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef unaligned_write32be
|
||||||
static inline void
|
static inline void
|
||||||
unaligned_write32be(uint8_t *buf, uint32_t num)
|
unaligned_write32be(uint8_t *buf, uint32_t num)
|
||||||
{
|
{
|
||||||
|
@ -356,8 +511,10 @@ unaligned_write32be(uint8_t *buf, uint32_t num)
|
||||||
buf[3] = (uint8_t)num;
|
buf[3] = (uint8_t)num;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef unaligned_write32le
|
||||||
static inline void
|
static inline void
|
||||||
unaligned_write32le(uint8_t *buf, uint32_t num)
|
unaligned_write32le(uint8_t *buf, uint32_t num)
|
||||||
{
|
{
|
||||||
|
@ -367,7 +524,6 @@ unaligned_write32le(uint8_t *buf, uint32_t num)
|
||||||
buf[3] = (uint8_t)(num >> 24);
|
buf[3] = (uint8_t)(num >> 24);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue