From cab9661dba472fc913fb43077c7b8dad13410e5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= Date: Fri, 8 Apr 2016 13:39:22 +0200 Subject: [PATCH] sws/aarch64/yuv2rgb: honor iOS calling convention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit y_offset and y_coeff being successive 32-bit integers, they are packed into 8 bytes instead of 2x8 bytes. See https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html > iOS diverges from Procedure Call Standard for the ARM 64-bit > Architecture in several ways [...] > In the generic procedure call standard, all function arguments passed > on the stack consume slots in multiples of 8 bytes. In iOS, this > requirement is dropped, and values consume only the space required. [...] > Padding is still inserted on the stack to satisfy arguments’ alignment > requirements. --- libswscale/aarch64/yuv2rgb_neon.S | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/libswscale/aarch64/yuv2rgb_neon.S b/libswscale/aarch64/yuv2rgb_neon.S index 8cefe2291b..b7446aa105 100644 --- a/libswscale/aarch64/yuv2rgb_neon.S +++ b/libswscale/aarch64/yuv2rgb_neon.S @@ -21,10 +21,18 @@ #include "libavutil/aarch64/asm.S" +.macro load_yoff_ycoeff yoff ycoeff +#if defined(__APPLE__) + ldp w9, w10, [sp, #\yoff] +#else + ldr w9, [sp, #\yoff] + ldr w10, [sp, #\ycoeff] +#endif +.endm + .macro load_args_nv12 ldr x8, [sp] // table - ldr w9, [sp, #8] // y_offset - ldr w10, [sp, #16] // y_coeff + load_yoff_ycoeff 8, 16 // y_offset, y_coeff ld1 {v1.1D}, [x8] dup v0.8H, w10 dup v3.8H, w9 @@ -42,8 +50,7 @@ ldr x13, [sp] // srcV ldr w14, [sp, #8] // linesizeV ldr x8, [sp, #16] // table - ldr w9, [sp, #24] // y_offset - ldr w10, [sp, #32] // y_coeff + load_yoff_ycoeff 24, 32 // y_offset, y_coeff ld1 {v1.1D}, [x8] dup v0.8H, w10 dup v3.8H, w9 @@ -59,8 +66,7 @@ ldr x13, [sp] // srcV ldr w14, [sp, #8] // linesizeV ldr x8, [sp, #16] // table - ldr w9, [sp, #24] // y_offset - ldr w10, [sp, #32] // y_coeff + load_yoff_ycoeff 24, 32 // y_offset, y_coeff ld1 {v1.1D}, [x8] dup v0.8H, w10 dup v3.8H, w9