From 699a9dd7c3d217f16c7dd0e6ea53738f7754ba6a Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Tue, 1 Sep 2015 21:56:00 +0000 Subject: [PATCH] [ARM] Don't abort on variable-idx extractelt in ReconstructShuffle. The code introduced in r244314 assumed that EXTRACT_VECTOR_ELT only takes constant indices, but it does accept variables. Bail out for those: we can't use them, as the shuffles we want to reconstruct do require constant masks. llvm-svn: 246594 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 ++++ llvm/test/CodeGen/ARM/vdup.ll | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 17c48287f059..f2915a4ab152 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5554,6 +5554,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // A shuffle can only come from building a vector from various // elements of other vectors. return SDValue(); + } else if (!isa(V.getOperand(1))) { + // Furthermore, shuffles require a constant mask, whereas extractelts + // accept variable indices. + return SDValue(); } // Add this element source to the list if it's not already there. diff --git a/llvm/test/CodeGen/ARM/vdup.ll b/llvm/test/CodeGen/ARM/vdup.ll index 36eebbfc4650..25c4807d9862 100644 --- a/llvm/test/CodeGen/ARM/vdup.ll +++ b/llvm/test/CodeGen/ARM/vdup.ll @@ -364,3 +364,19 @@ define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) { %sub = fsub <4 x float> %splat.splat, %p ret <4 x float> %sub } + +; Also make sure we don't barf on variable-index extractelts, where we almost +; could have generated a vdup. + +define <8 x i8> @check_i8_varidx(<16 x i8> %v, i32 %idx) { +; CHECK-LABEL: check_i8_varidx: +; CHECK: mov r[[FP:[0-9]+]], sp +; CHECK: ldr r[[IDX:[0-9]+]], [r[[FP]], #4] +; CHECK: mov r[[SPCOPY:[0-9]+]], sp +; CHECK: vst1.64 {d{{.*}}, d{{.*}}}, [r[[SPCOPY]]:128], r[[IDX]] +; CHECK: vld1.8 {d{{.*}}[]}, [r[[SPCOPY]]] + %x = extractelement <16 x i8> %v, i32 %idx + %1 = insertelement <8 x i8> undef, i8 %x, i32 0 + %2 = insertelement <8 x i8> %1, i8 %x, i32 1 + ret <8 x i8> %2 +}