mirror of
https://github.com/RPCSX/llvm.git
synced 2025-03-04 19:07:26 +00:00
[ARM] Prevent PerformVCVTCombine from combining a vmul/vcvt with 8 lanes
This would result in a crash since the vcvt used does not support v8i32 types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224332 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4519623e9f
commit
a9d9f7eae8
@ -9355,16 +9355,18 @@ static SDValue PerformVCVTCombine(SDNode *N,
|
||||
|
||||
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
|
||||
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
|
||||
if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
|
||||
unsigned NumLanes = Op.getValueType().getVectorNumElements();
|
||||
if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 ||
|
||||
NumLanes > 4) {
|
||||
// These instructions only exist converting from f32 to i32. We can handle
|
||||
// smaller integers by generating an extra truncate, but larger ones would
|
||||
// be lossy.
|
||||
// be lossy. We also can't handle more then 4 lanes, since these intructions
|
||||
// only support v2i32/v4i32 types.
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
|
||||
Intrinsic::arm_neon_vcvtfp2fxu;
|
||||
unsigned NumLanes = Op.getValueType().getVectorNumElements();
|
||||
SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
|
||||
NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
|
||||
DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
|
||||
|
26
test/CodeGen/ARM/isel-v8i32-crash.ll
Normal file
26
test/CodeGen/ARM/isel-v8i32-crash.ll
Normal file
@ -0,0 +1,26 @@
|
||||
; RUN: llc < %s -mtriple=armv7-linux-gnu | FileCheck %s
|
||||
|
||||
; Check we don't crash when trying to combine:
|
||||
; (d1 = <float 8.000000e+00, float 8.000000e+00, ...>) (power of 2)
|
||||
; vmul.f32 d0, d1, d0
|
||||
; vcvt.s32.f32 d0, d0
|
||||
; into:
|
||||
; vcvt.s32.f32 d0, d0, #3
|
||||
; when we have a vector length of 8, due to use of v8i32 types.
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
|
||||
; CHECK: func:
|
||||
; CHECK: vcvt.s32.f32 q[[R:[0-9]]], q[[R]], #3
|
||||
define void @func(i16* nocapture %pb, float* nocapture readonly %pf) #0 {
|
||||
entry:
|
||||
%0 = bitcast float* %pf to <8 x float>*
|
||||
%1 = load <8 x float>* %0, align 4
|
||||
%2 = fmul <8 x float> %1, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
|
||||
%3 = fptosi <8 x float> %2 to <8 x i16>
|
||||
%4 = bitcast i16* %pb to <8 x i16>*
|
||||
store <8 x i16> %3, <8 x i16>* %4, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
Loading…
x
Reference in New Issue
Block a user