Merge commit 'cdb1665f70def544ddab3e3ed3763ef99c8b3873'

* commit 'cdb1665f70def544ddab3e3ed3763ef99c8b3873':
  aarch64: Make transpose_4x4H do a regular transpose

Merged-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
This commit is contained in:
Derek Buitenhuis 2016-04-24 12:51:34 +01:00
commit 87b8e95008
2 changed files with 18 additions and 18 deletions

View File

@ -33,25 +33,25 @@ function ff_h264_idct_add_neon, export=1
sshr v17.4H, v3.4H, #1
st1 {v30.8H}, [x1], #16
sub v5.4H, v0.4H, v2.4H
add v6.4H, v1.4H, v17.4H
sub v7.4H, v16.4H, v3.4H
add v0.4H, v4.4H, v6.4H
add v1.4H, v5.4H, v7.4H
sub v3.4H, v4.4H, v6.4H
sub v2.4H, v5.4H, v7.4H
sub v6.4H, v16.4H, v3.4H
add v7.4H, v1.4H, v17.4H
add v0.4H, v4.4H, v7.4H
add v1.4H, v5.4H, v6.4H
sub v2.4H, v5.4H, v6.4H
sub v3.4H, v4.4H, v7.4H
transpose_4x4H v0, v1, v2, v3, v4, v5, v6, v7
add v4.4H, v0.4H, v3.4H
add v4.4H, v0.4H, v2.4H
ld1 {v18.S}[0], [x0], x2
sshr v16.4H, v2.4H, #1
sshr v16.4H, v3.4H, #1
sshr v17.4H, v1.4H, #1
ld1 {v19.S}[1], [x0], x2
sub v5.4H, v0.4H, v3.4H
ld1 {v18.S}[1], [x0], x2
sub v5.4H, v0.4H, v2.4H
ld1 {v19.S}[1], [x0], x2
add v6.4H, v16.4H, v1.4H
ins v4.D[1], v5.D[0]
sub v7.4H, v2.4H, v17.4H
sub v7.4H, v17.4H, v3.4H
ld1 {v19.S}[0], [x0], x2
ins v6.D[1], v7.D[0]
sub x0, x0, x2, lsl #2
@ -68,8 +68,8 @@ function ff_h264_idct_add_neon, export=1
sqxtun v1.8B, v1.8H
st1 {v0.S}[0], [x0], x2
st1 {v1.S}[1], [x0], x2
st1 {v0.S}[1], [x0], x2
st1 {v1.S}[1], [x0], x2
st1 {v1.S}[0], [x0], x2
sub x1, x1, #32

View File

@ -107,12 +107,12 @@
.macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7
trn1 \r4\().4H, \r0\().4H, \r1\().4H
trn2 \r5\().4H, \r0\().4H, \r1\().4H
trn1 \r7\().4H, \r2\().4H, \r3\().4H
trn2 \r6\().4H, \r2\().4H, \r3\().4H
trn1 \r0\().2S, \r4\().2S, \r7\().2S
trn2 \r3\().2S, \r4\().2S, \r7\().2S
trn1 \r1\().2S, \r5\().2S, \r6\().2S
trn2 \r2\().2S, \r5\().2S, \r6\().2S
trn1 \r6\().4H, \r2\().4H, \r3\().4H
trn2 \r7\().4H, \r2\().4H, \r3\().4H
trn1 \r0\().2S, \r4\().2S, \r6\().2S
trn2 \r2\().2S, \r4\().2S, \r6\().2S
trn1 \r1\().2S, \r5\().2S, \r7\().2S
trn2 \r3\().2S, \r5\().2S, \r7\().2S
.endm
.macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9