From fd0096a42c6d21e922e99669b1752a03987ebc84 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Wed, 21 May 2014 22:00:39 +0000 Subject: [PATCH] [X86] Fix a bug in the lowering of BLENDI introduced in r209043. ISD::VSELECT mask uses 1 to identify the first argument and 0 to identify the second argument. On the other hand, BLENDI uses 0 to identify the first argument and 1 to identify the second argument. Fix the generation of the blend mask to account for this difference. The bug did not show up with r209043, because we were not checking for the actual arguments of the blend instruction! This commit also fixes the test cases. Note: The same mask works for the BLENDr variant because the arguments are swapped during instruction selection (see the BLENDXXrr patterns). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209324 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 +++++--- test/CodeGen/X86/avx-blend.ll | 38 +++++++++++++++++++++++------- test/CodeGen/X86/blend-msb.ll | 12 +++++++++- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 72743a97abd..61828759fc2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7980,7 +7980,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// This function assumes its argument is a BUILD_VECTOR of constand or +// This function assumes its argument is a BUILD_VECTOR of constants or // undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is // true. static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, @@ -8004,9 +8004,13 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, Lane2Cond = !isZero(SndLaneEltCond); if (Lane1Cond == Lane2Cond || Lane2Cond < 0) - MaskValue |= !!Lane1Cond << i; + // Lane1Cond != 0, means we want the first argument. + // Lane1Cond == 0, means we want the second argument. + // The encoding of this argument is 0 for the first argument, 1 + // for the second. Therefore, invert the condition. + MaskValue |= !Lane1Cond << i; else if (Lane1Cond < 0) - MaskValue |= !!Lane2Cond << i; + MaskValue |= !Lane2Cond << i; else return false; } diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll index 8577a616c3c..4d4f6c1a03a 100644 --- a/test/CodeGen/X86/avx-blend.ll +++ b/test/CodeGen/X86/avx-blend.ll @@ -3,7 +3,16 @@ ; AVX128 tests: ;CHECK-LABEL: vsel_float: -;CHECK: vblendps $5 +; select mask is . +; Big endian representation is 0101 = 5. +; '1' means takes the first argument, '0' means takes the second argument. +; This is the opposite of the intel syntax, thus we expect +; the inverted mask: 1010 = 10. +; According to the ABI: +; v1 is in xmm0 => first argument is xmm0. +; v2 is in xmm1 => second argument is xmm1. +; result is in xmm0 => destination argument. +;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0 ;CHECK: ret define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { %vsel = select <4 x i1> , <4 x float> %v1, <4 x float> %v2 @@ -12,7 +21,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { ;CHECK-LABEL: vsel_i32: -;CHECK: vblendps $5 +;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0 ;CHECK: ret define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { %vsel = select <4 x i1> , <4 x i32> %v1, <4 x i32> %v2 @@ -52,7 +61,13 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { ;CHECK-LABEL: vsel_float8: ;CHECK-NOT: vinsertf128 -;CHECK: vblendps $17 +; +; which translates into the boolean mask (big endian representation): +; 00010001 = 17. +; '1' means takes the first argument, '0' means takes the second argument. +; This is the opposite of the intel syntax, thus we expect +; the inverted mask: 11101110 = 238. +;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0 ;CHECK: ret define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { %vsel = select <8 x i1> , <8 x float> %v1, <8 x float> %v2 @@ -61,7 +76,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { ;CHECK-LABEL: vsel_i328: ;CHECK-NOT: vinsertf128 -;CHECK: vblendps $17 +;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0 ;CHECK-NEXT: ret define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) { %vsel = select <8 x i1> , <8 x i32> %v1, <8 x i32> %v2 @@ -69,8 +84,15 @@ define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) { } ;CHECK-LABEL: vsel_double8: -;CHECK: vblendpd $1 -;CHECK: vblendpd $1 +; select mask is 2x: 0001 => intel mask: ~0001 = 14 +; ABI: +; v1 is in ymm0 and ymm1. +; v2 is in ymm2 and ymm3. +; result is in ymm0 and ymm1. +; Compute the low part: res.low = blend v1.low, v2.low, blendmask +;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0 +; Compute the high part. +;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1 ;CHECK: ret define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { %vsel = select <8 x i1> , <8 x double> %v1, <8 x double> %v2 @@ -78,8 +100,8 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { } ;CHECK-LABEL: vsel_i648: -;CHECK: vblendpd $1 -;CHECK: vblendpd $1 +;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0 +;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1 ;CHECK: ret define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { %vsel = select <8 x i1> , <8 x i64> %v1, <8 x i64> %v2 diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll index 4e17a714bf5..34aaf2c31ac 100644 --- a/test/CodeGen/X86/blend-msb.ll +++ b/test/CodeGen/X86/blend-msb.ll @@ -22,7 +22,17 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { } ;CHECK-LABEL: vsel_8xi16: -;CHECK: pblendw $17 +; The select mask is +; +; which translates into the boolean mask (big endian representation): +; 00010001 = 17. +; '1' means takes the first argument, '0' means takes the second argument. +; This is the opposite of the intel syntax, thus we expect +; the inverted mask: 11101110 = 238. +; According to the ABI: +; v1 is in xmm0 => first argument is xmm0. +; v2 is in xmm1 => second argument is xmm1. +;CHECK: pblendw $238, %xmm1, %xmm0 ;CHECK: ret define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) { %vsel = select <8 x i1> , <8 x i16> %v1, <8 x i16> %v2