mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-26 13:00:33 +00:00
For rounding in chroma MC SSSE3, use 16-byte pw_3/4 instead of reading 8 bytes
and then using movlhps to dup it into the higher half of the register. Originally committed as revision 26086 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
95ee581a95
commit
8d147f1f60
@ -41,7 +41,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
|
||||
DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =
|
||||
{0x8000000080000000ULL, 0x8000000080000000ULL};
|
||||
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;
|
||||
DECLARE_ALIGNED(8, const xmm_reg, ff_pw_3 ) = {0x0003000300030003ULL, 0x0003000300030003ULL};
|
||||
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x0004000400040004ULL};
|
||||
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};
|
||||
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL};
|
||||
|
@ -32,7 +32,7 @@ extern const uint64_t ff_wtwo;
|
||||
|
||||
extern const uint64_t ff_pdw_80000000[2];
|
||||
|
||||
extern const uint64_t ff_pw_3;
|
||||
extern const xmm_reg ff_pw_3;
|
||||
extern const xmm_reg ff_pw_4;
|
||||
extern const xmm_reg ff_pw_5;
|
||||
extern const xmm_reg ff_pw_8;
|
||||
|
@ -530,9 +530,8 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
|
||||
add r4, 8
|
||||
sub r4, r5 ; 255*x+8 = x<<8 | (8-x)
|
||||
movd m7, r4d
|
||||
movq m6, [rnd_1d_%2]
|
||||
movdqa m6, [rnd_1d_%2]
|
||||
pshuflw m7, m7, 0
|
||||
movlhps m6, m6
|
||||
movlhps m7, m7
|
||||
|
||||
.next2xrows
|
||||
@ -568,9 +567,8 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
|
||||
add r5, 8
|
||||
sub r5, r4 ; 255*y+8 = y<<8 | (8-y)
|
||||
movd m7, r5d
|
||||
movq m6, [rnd_1d_%2]
|
||||
movdqa m6, [rnd_1d_%2]
|
||||
pshuflw m7, m7, 0
|
||||
movlhps m6, m6
|
||||
movlhps m7, m7
|
||||
|
||||
.next2yrows
|
||||
|
Loading…
Reference in New Issue
Block a user