mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-31 17:42:40 +00:00
Fix a bug in the lowering of BUILD_VECTOR for AVX. SCALAR_TO_VECTOR does not zero untouched elements. Use INSERT_VECTOR_ELT instead.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147948 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1876abe63e
commit
394a1f53b9
@ -5161,11 +5161,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
|
||||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
|
||||
if (VT.getSizeInBits() == 256) {
|
||||
EVT VT128 = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems / 2);
|
||||
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Item);
|
||||
SDValue ZeroVec = getZeroVector(VT, true, DAG, dl);
|
||||
return Insert128BitVector(ZeroVec, Item, DAG.getConstant(0, MVT::i32),
|
||||
DAG, dl);
|
||||
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
|
||||
Item, DAG.getIntPtrConstant(0));
|
||||
}
|
||||
assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
|
||||
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
|
||||
|
@ -1,13 +1,26 @@
|
||||
; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32
|
||||
; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
|
||||
target triple = "i686-pc-win32"
|
||||
|
||||
;CHECK: bad_cast
|
||||
define void @bad_cast() {
|
||||
entry:
|
||||
%vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> <i32 0, i32 1, i32 undef>
|
||||
%vecinit8.i = shufflevector <3 x i64> zeroinitializer, <3 x i64> %vext.i, <3 x i32> <i32 0, i32 3, i32 4>
|
||||
store <3 x i64> %vecinit8.i, <3 x i64>* undef, align 32
|
||||
unreachable
|
||||
;CHECK: ret
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;CHECK: bad_insert
|
||||
define void @bad_insert(i32 %t) {
|
||||
entry:
|
||||
;CHECK: vpinsrd
|
||||
%v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0
|
||||
store <8 x i32> %v2, <8 x i32> addrspace(1)* undef, align 32
|
||||
;CHECK: ret
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -25,20 +25,26 @@ declare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
|
||||
|
||||
;;
|
||||
;; The two tests below check that we must fold load + scalar_to_vector
|
||||
;; + ins_subvec+ zext into only a single vmovss or vmovsd
|
||||
;; + ins_subvec+ zext into only a single vmovss or vmovsd or vinsertps from memory
|
||||
|
||||
; CHECK: vmovss (%
|
||||
; CHECK: mov00
|
||||
define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
|
||||
%val = load float* %ptr
|
||||
; CHECK: vinsertps
|
||||
; CHECK: vinsertf128
|
||||
%i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
|
||||
ret <8 x float> %i0
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
; CHECK: vmovsd (%
|
||||
; CHECK: mov01
|
||||
define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
|
||||
%val = load double* %ptr
|
||||
; CHECK: vmovlpd
|
||||
; CHECK: vinsertf128
|
||||
%i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
|
||||
ret <4 x double> %i0
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
; CHECK: vmovaps %ymm
|
||||
|
Loading…
x
Reference in New Issue
Block a user