mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-24 03:59:43 +00:00
avutil/md5: fix misaligned reads
This makes ubsan happy and also considerably increases performance on big endian systems. Tested on an IBM POWER7 3.55 GHz Before: 2.24user 0.14system 0:02.39elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k 2.26user 0.11system 0:02.38elapsed 99%CPU (0avgtext+0avgdata 2688maxresident)k 2.23user 0.15system 0:02.38elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k 2.25user 0.12system 0:02.38elapsed 100%CPU (0avgtext+0avgdata 2624maxresident)k 2.20user 0.15system 0:02.36elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k After: 1.86user 0.13system 0:02.00elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k 1.89user 0.11system 0:02.01elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k 1.85user 0.14system 0:02.00elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k 1.84user 0.15system 0:01.99elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k 1.89user 0.13system 0:02.02elapsed 99%CPU (0avgtext+0avgdata 2688maxresident)k Tested-by: Nicolas George <george@nsup.org> Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
d8094a303b
commit
e2b7ae4b19
@ -86,14 +86,14 @@ static const uint32_t T[64] = { // T[i]= fabs(sin(i+1)<<32)
|
||||
\
|
||||
if (i < 32) { \
|
||||
if (i < 16) \
|
||||
a += (d ^ (b & (c ^ d))) + X[ i & 15]; \
|
||||
a += (d ^ (b & (c ^ d))) + AV_RL32(X+( i & 15));\
|
||||
else \
|
||||
a += ((d & b) | (~d & c)) + X[(1 + 5*i) & 15]; \
|
||||
a += ((d & b) | (~d & c)) + AV_RL32(X+((1 + 5*i) & 15));\
|
||||
} else { \
|
||||
if (i < 48) \
|
||||
a += (b ^ c ^ d) + X[(5 + 3*i) & 15]; \
|
||||
a += (b ^ c ^ d) + AV_RL32(X+((5 + 3*i) & 15));\
|
||||
else \
|
||||
a += (c ^ (b | ~d)) + X[( 7*i) & 15]; \
|
||||
a += (c ^ (b | ~d)) + AV_RL32(X+(( 7*i) & 15));\
|
||||
} \
|
||||
a = b + (a << t | a >> (32 - t)); \
|
||||
} while (0)
|
||||
@ -112,11 +112,6 @@ static void body(uint32_t ABCD[4], uint32_t *src, int nblocks)
|
||||
|
||||
X = src + n * 16;
|
||||
|
||||
#if HAVE_BIGENDIAN
|
||||
for (i = 0; i < 16; i++)
|
||||
X[i] = av_bswap32(X[i]);
|
||||
#endif
|
||||
|
||||
#if CONFIG_SMALL
|
||||
for (i = 0; i < 64; i++) {
|
||||
CORE(i, a, b, c, d);
|
||||
@ -173,7 +168,7 @@ void av_md5_update(AVMD5 *ctx, const uint8_t *src, int len)
|
||||
}
|
||||
|
||||
end = src + (len & ~63);
|
||||
if (HAVE_BIGENDIAN || (!HAVE_FAST_UNALIGNED && ((intptr_t)src & 3))) {
|
||||
if (!HAVE_FAST_UNALIGNED && ((intptr_t)src & 3)) {
|
||||
while (src < end) {
|
||||
memcpy(ctx->block, src, 64);
|
||||
body(ctx->ABCD, (uint32_t *) ctx->block, 1);
|
||||
|
Loading…
Reference in New Issue
Block a user