Revert "arm/h264: fix overreads in h264_chroma_mc8-and-h264_chroma_mc4"

This reverts commit d25f87f5174df5923be7ce0bb89fad08999286ed.

This breaks decoding of some h264 files
I have tested the original patch with fate but by mistake have
forgotten to specify the fate samples so testing was limited to
the internal regression tests.
This commit is contained in:
Michael Niedermayer 2012-09-26 17:31:25 +02:00
parent 55a6f705ac
commit 3b92075e6c

View File

@ -58,15 +58,14 @@ T cmp r7, #0
vdup.8 d1, r12
vld1.8 {d4, d5}, [r1], r4
vdup.8 d2, r6
vld1.8 {d6, d7}, [r5], r4
vdup.8 d3, r7
vext.8 d5, d4, d5, #1
1:
vld1.64 {d6, d7}, [r5], r4
pld [r5]
vmull.u8 q8, d4, d0
vext.8 d7, d6, d7, #1
1: pld [r5]
vmull.u8 q8, d4, d0
vmlal.u8 q8, d5, d1
vld1.8 {d4, d5}, [r1], r4
vmlal.u8 q8, d6, d2
@ -77,6 +76,7 @@ T cmp r7, #0
vmlal.u8 q9, d7, d1
vmlal.u8 q9, d4, d2
vmlal.u8 q9, d5, d3
vld1.8 {d6, d7}, [r5], r4
pld [r1]
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
@ -92,6 +92,7 @@ T cmp r7, #0
vld1.8 {d21}, [lr,:64], r2
vrhadd.u8 q8, q8, q10
.endif
vext.8 d7, d6, d7, #1
vst1.8 {d16}, [r0,:64], r2
vst1.8 {d17}, [r0,:64], r2
bgt 1b
@ -107,16 +108,16 @@ T cmp r7, #0
add r5, r1, r2
lsl r4, r2, #1
3:
vld1.8 {d4}, [r1], r4
vld1.8 {d6}, [r5], r4
pld [r5]
3: pld [r5]
vmull.u8 q8, d4, d0
vmlal.u8 q8, d6, d1
vld1.8 {d4}, [r1], r4
vmull.u8 q9, d6, d0
vmlal.u8 q9, d4, d1
vld1.8 {d6}, [r5], r4
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d17, q9, #6
@ -144,13 +145,15 @@ T cmp r7, #0
vext.8 d5, d4, d5, #1
vext.8 d7, d6, d7, #1
pld [r1]
5: pld [r1]
subs r3, r3, #2
vmull.u8 q8, d4, d0
vmlal.u8 q8, d5, d1
vld1.8 {d4, d5}, [r1], r2
vmull.u8 q9, d6, d0
vmlal.u8 q9, d7, d1
pld [r1]
vext.8 d5, d4, d5, #1
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d17, q9, #6
@ -165,9 +168,11 @@ T cmp r7, #0
vld1.8 {d21}, [lr,:64], r2
vrhadd.u8 q8, q8, q10
.endif
vld1.8 {d6, d7}, [r1], r2
vext.8 d7, d6, d7, #1
vst1.8 {d16}, [r0,:64], r2
vst1.8 {d17}, [r0,:64], r2
bgt 4b
bgt 5b
pop {r4-r7, pc}
endfunc
@ -177,7 +182,7 @@ endfunc
.macro h264_chroma_mc4 type, codec=h264
function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
push {r4-r7, lr}
ldrd r4, [sp, #20]
ldrd r4, r5, [sp, #20]
.ifc \type,avg
mov lr, r0
.endif
@ -211,27 +216,26 @@ T cmp r7, #0
vdup.8 d1, r12
vld1.8 {d4}, [r1], r4
vdup.8 d2, r6
vld1.8 {d6}, [r5], r4
vdup.8 d3, r7
vext.8 d5, d4, d5, #1
vtrn.32 d0, d1
vtrn.32 d2, d3
vtrn.32 d4, d5
1:
vld1.8 {d6}, [r5], r4
pld [r5]
vext.8 d7, d6, d7, #1
vmull.u8 q8, d4, d0
vtrn.32 d4, d5
vtrn.32 d6, d7
vld1.8 {d4}, [r1], r4
vmlal.u8 q8, d6, d2
vext.8 d5, d4, d5, #1
vmull.u8 q9, d6, d0
vtrn.32 d4, d5
vmlal.u8 q9, d4, d2
vtrn.32 d0, d1
vtrn.32 d2, d3
1: pld [r5]
vmull.u8 q8, d4, d0
vmlal.u8 q8, d6, d2
vld1.8 {d4}, [r1], r4
vext.8 d5, d4, d5, #1
vtrn.32 d4, d5
vmull.u8 q9, d6, d0
vmlal.u8 q9, d4, d2
vld1.8 {d6}, [r5], r4
vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19
.ifc \codec,h264
@ -247,6 +251,8 @@ T cmp r7, #0
vld1.32 {d20[1]}, [lr,:32], r2
vrhadd.u8 d16, d16, d20
.endif
vext.8 d7, d6, d7, #1
vtrn.32 d6, d7
vst1.32 {d16[0]}, [r0,:32], r2
vst1.32 {d16[1]}, [r0,:32], r2
bgt 1b
@ -265,14 +271,13 @@ T cmp r7, #0
add r5, r1, r2
lsl r4, r2, #1
vld1.32 {d4[0]}, [r1], r4
3:
vld1.32 {d4[1]}, [r5], r4
pld [r5]
3: pld [r5]
vmull.u8 q8, d4, d0
vld1.32 {d4[0]}, [r1], r4
vmull.u8 q9, d4, d1
vld1.32 {d4[1]}, [r5], r4
vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19
.ifc \codec,h264
@ -301,10 +306,12 @@ T cmp r7, #0
vtrn.32 d4, d5
vtrn.32 d6, d7
vmull.u8 q8, d4, d0
5: vmull.u8 q8, d4, d0
vmull.u8 q9, d6, d0
subs r3, r3, #2
vld1.8 {d4}, [r1], r2
vext.8 d5, d4, d5, #1
vtrn.32 d4, d5
vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19
pld [r1]
@ -319,10 +326,13 @@ T cmp r7, #0
vld1.32 {d20[1]}, [lr,:32], r2
vrhadd.u8 d16, d16, d20
.endif
vld1.8 {d6}, [r1], r2
vext.8 d7, d6, d7, #1
vtrn.32 d6, d7
pld [r1]
vst1.32 {d16[0]}, [r0,:32], r2
vst1.32 {d16[1]}, [r0,:32], r2
bgt 4b
bgt 5b
pop {r4-r7, pc}
endfunc