liblzma: SHA-256: Unroll a little more.
This way a branch isn't needed for each operation to choose between blk0 and blk2, and still the code doesn't grow as much as it would with full unrolling.
This commit is contained in:
parent
bc7650d87b
commit
9a096f8e57
|
@ -47,11 +47,12 @@
|
|||
#define g(i) T[(6 - i) & 7]
|
||||
#define h(i) T[(7 - i) & 7]
|
||||
|
||||
#define R(i) \
|
||||
h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] \
|
||||
+ (j ? blk2(i) : blk0(i)); \
|
||||
#define R(i, j, blk) \
|
||||
h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] + blk; \
|
||||
d(i) += h(i); \
|
||||
h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
|
||||
#define R0(i) R(i, 0, blk0(i))
|
||||
#define R2(i) R(i, j, blk2(i))
|
||||
|
||||
#define S0(x) (rotr_32(x, 2) ^ rotr_32(x, 13) ^ rotr_32(x, 22))
|
||||
#define S1(x) (rotr_32(x, 6) ^ rotr_32(x, 11) ^ rotr_32(x, 25))
|
||||
|
@ -88,12 +89,18 @@ transform(uint32_t state[8], const uint32_t data[16])
|
|||
// Copy state[] to working vars.
|
||||
memcpy(T, state, sizeof(T));
|
||||
|
||||
// 64 operations, partially loop unrolled
|
||||
for (unsigned int j = 0; j < 64; j += 16) {
|
||||
R( 0); R( 1); R( 2); R( 3);
|
||||
R( 4); R( 5); R( 6); R( 7);
|
||||
R( 8); R( 9); R(10); R(11);
|
||||
R(12); R(13); R(14); R(15);
|
||||
// The first 16 operations unrolled
|
||||
R0( 0); R0( 1); R0( 2); R0( 3);
|
||||
R0( 4); R0( 5); R0( 6); R0( 7);
|
||||
R0( 8); R0( 9); R0(10); R0(11);
|
||||
R0(12); R0(13); R0(14); R0(15);
|
||||
|
||||
// The remaining 48 operations partially unrolled
|
||||
for (unsigned int j = 16; j < 64; j += 16) {
|
||||
R2( 0); R2( 1); R2( 2); R2( 3);
|
||||
R2( 4); R2( 5); R2( 6); R2( 7);
|
||||
R2( 8); R2( 9); R2(10); R2(11);
|
||||
R2(12); R2(13); R2(14); R2(15);
|
||||
}
|
||||
|
||||
// Add the working vars back into state[].
|
||||
|
|
Loading…
Reference in New Issue