tuklib_integer: Add 64-bit endianness-converting reads and writes.
Also update the comment in liblzma's memcmplen.h. Thanks to Michał Górny for the original patch for the reads.
This commit is contained in:
parent
508a44372c
commit
fae37ad2af
|
@ -64,8 +64,8 @@ main(void)
|
||||||
AC_MSG_CHECKING([if unaligned memory access should be used])
|
AC_MSG_CHECKING([if unaligned memory access should be used])
|
||||||
AC_ARG_ENABLE([unaligned-access], AS_HELP_STRING([--enable-unaligned-access],
|
AC_ARG_ENABLE([unaligned-access], AS_HELP_STRING([--enable-unaligned-access],
|
||||||
[Enable if the system supports *fast* unaligned memory access
|
[Enable if the system supports *fast* unaligned memory access
|
||||||
with 16-bit and 32-bit integers. By default, this is enabled
|
with 16-bit, 32-bit, and 64-bit integers. By default,
|
||||||
only on x86, x86_64, big endian PowerPC,
|
this is enabled only on x86, x86_64, big endian PowerPC,
|
||||||
and some ARM systems.]),
|
and some ARM systems.]),
|
||||||
[], [enable_unaligned_access=auto])
|
[], [enable_unaligned_access=auto])
|
||||||
if test "x$enable_unaligned_access" = xauto ; then
|
if test "x$enable_unaligned_access" = xauto ; then
|
||||||
|
@ -93,8 +93,8 @@ int main(void) { return 0; }
|
||||||
fi
|
fi
|
||||||
if test "x$enable_unaligned_access" = xyes ; then
|
if test "x$enable_unaligned_access" = xyes ; then
|
||||||
AC_DEFINE([TUKLIB_FAST_UNALIGNED_ACCESS], [1], [Define to 1 if
|
AC_DEFINE([TUKLIB_FAST_UNALIGNED_ACCESS], [1], [Define to 1 if
|
||||||
the system supports fast unaligned access to 16-bit and
|
the system supports fast unaligned access to 16-bit,
|
||||||
32-bit integers.])
|
32-bit, and 64-bit integers.])
|
||||||
AC_MSG_RESULT([yes])
|
AC_MSG_RESULT([yes])
|
||||||
else
|
else
|
||||||
AC_MSG_RESULT([no])
|
AC_MSG_RESULT([no])
|
||||||
|
|
|
@ -17,8 +17,8 @@
|
||||||
/// - Byte swapping: bswapXX(num)
|
/// - Byte swapping: bswapXX(num)
|
||||||
/// - Byte order conversions to/from native (byteswaps if Y isn't
|
/// - Byte order conversions to/from native (byteswaps if Y isn't
|
||||||
/// the native endianness): convXXYe(num)
|
/// the native endianness): convXXYe(num)
|
||||||
/// - Unaligned reads (16/32-bit only): readXXYe(ptr)
|
/// - Unaligned reads: readXXYe(ptr)
|
||||||
/// - Unaligned writes (16/32-bit only): writeXXYe(ptr, num)
|
/// - Unaligned writes: writeXXYe(ptr, num)
|
||||||
/// - Aligned reads: aligned_readXXYe(ptr)
|
/// - Aligned reads: aligned_readXXYe(ptr)
|
||||||
/// - Aligned writes: aligned_writeXXYe(ptr, num)
|
/// - Aligned writes: aligned_writeXXYe(ptr, num)
|
||||||
///
|
///
|
||||||
|
@ -343,6 +343,46 @@ read32le(const uint8_t *buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline uint64_t
|
||||||
|
read64be(const uint8_t *buf)
|
||||||
|
{
|
||||||
|
#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
||||||
|
uint64_t num = read64ne(buf);
|
||||||
|
return conv64be(num);
|
||||||
|
#else
|
||||||
|
uint64_t num = (uint64_t)buf[0] << 56;
|
||||||
|
num |= (uint64_t)buf[1] << 48;
|
||||||
|
num |= (uint64_t)buf[2] << 40;
|
||||||
|
num |= (uint64_t)buf[3] << 32;
|
||||||
|
num |= (uint64_t)buf[4] << 24;
|
||||||
|
num |= (uint64_t)buf[5] << 16;
|
||||||
|
num |= (uint64_t)buf[6] << 8;
|
||||||
|
num |= (uint64_t)buf[7];
|
||||||
|
return num;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline uint64_t
|
||||||
|
read64le(const uint8_t *buf)
|
||||||
|
{
|
||||||
|
#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
||||||
|
uint64_t num = read64ne(buf);
|
||||||
|
return conv64le(num);
|
||||||
|
#else
|
||||||
|
uint64_t num = (uint64_t)buf[0];
|
||||||
|
num |= (uint64_t)buf[1] << 8;
|
||||||
|
num |= (uint64_t)buf[2] << 16;
|
||||||
|
num |= (uint64_t)buf[3] << 24;
|
||||||
|
num |= (uint64_t)buf[4] << 32;
|
||||||
|
num |= (uint64_t)buf[5] << 40;
|
||||||
|
num |= (uint64_t)buf[6] << 48;
|
||||||
|
num |= (uint64_t)buf[7] << 56;
|
||||||
|
return num;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// NOTE: Possible byte swapping must be done in a macro to allow the compiler
|
// NOTE: Possible byte swapping must be done in a macro to allow the compiler
|
||||||
// to optimize byte swapping of constants when using glibc's or *BSD's
|
// to optimize byte swapping of constants when using glibc's or *BSD's
|
||||||
// byte swapping macros. The actual write is done in an inline function
|
// byte swapping macros. The actual write is done in an inline function
|
||||||
|
@ -350,11 +390,13 @@ read32le(const uint8_t *buf)
|
||||||
#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
||||||
# define write16be(buf, num) write16ne(buf, conv16be(num))
|
# define write16be(buf, num) write16ne(buf, conv16be(num))
|
||||||
# define write32be(buf, num) write32ne(buf, conv32be(num))
|
# define write32be(buf, num) write32ne(buf, conv32be(num))
|
||||||
|
# define write64be(buf, num) write64ne(buf, conv64be(num))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
||||||
# define write16le(buf, num) write16ne(buf, conv16le(num))
|
# define write16le(buf, num) write16ne(buf, conv16le(num))
|
||||||
# define write32le(buf, num) write32ne(buf, conv32le(num))
|
# define write32le(buf, num) write32ne(buf, conv32le(num))
|
||||||
|
# define write64le(buf, num) write64ne(buf, conv64le(num))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -51,10 +51,6 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
|
||||||
|| (defined(__INTEL_COMPILER) && defined(__x86_64__)) \
|
|| (defined(__INTEL_COMPILER) && defined(__x86_64__)) \
|
||||||
|| (defined(__INTEL_COMPILER) && defined(_M_X64)) \
|
|| (defined(__INTEL_COMPILER) && defined(_M_X64)) \
|
||||||
|| (defined(_MSC_VER) && defined(_M_X64)))
|
|| (defined(_MSC_VER) && defined(_M_X64)))
|
||||||
// NOTE: This will use 64-bit unaligned access which
|
|
||||||
// TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit, but
|
|
||||||
// it's convenient here at least as long as it's x86-64 only.
|
|
||||||
//
|
|
||||||
// I keep this x86-64 only for now since that's where I know this
|
// I keep this x86-64 only for now since that's where I know this
|
||||||
// to be a good method. This may be fine on other 64-bit CPUs too.
|
// to be a good method. This may be fine on other 64-bit CPUs too.
|
||||||
// On big endian one should use xor instead of subtraction and switch
|
// On big endian one should use xor instead of subtraction and switch
|
||||||
|
@ -84,8 +80,9 @@ lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
|
||||||
|| (defined(__INTEL_COMPILER) && defined(__SSE2__)) \
|
|| (defined(__INTEL_COMPILER) && defined(__SSE2__)) \
|
||||||
|| (defined(_MSC_VER) && defined(_M_IX86_FP) \
|
|| (defined(_MSC_VER) && defined(_M_IX86_FP) \
|
||||||
&& _M_IX86_FP >= 2))
|
&& _M_IX86_FP >= 2))
|
||||||
// NOTE: Like above, this will use 128-bit unaligned access which
|
// NOTE: This will use 128-bit unaligned access which
|
||||||
// TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit.
|
// TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit,
|
||||||
|
// but it's convenient here since this is x86-only.
|
||||||
//
|
//
|
||||||
// SSE2 version for 32-bit and 64-bit x86. On x86-64 the above
|
// SSE2 version for 32-bit and 64-bit x86. On x86-64 the above
|
||||||
// version is sometimes significantly faster and sometimes
|
// version is sometimes significantly faster and sometimes
|
||||||
|
|
Loading…
Reference in New Issue