mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-20 11:08:27 +00:00
[ARM64] Fix "Cannot select" for vector ctpop
The commit of r205855: Author: Arnold Schwaighofer <aschwaighofer@apple.com> Date: Wed Apr 9 14:20:47 2014 +0000 SLPVectorizer: Only vectorize intrinsics whose operands are widened equally The vectorizer only knows how to vectorize intrinics by widening all operands by the same factor. Patch by Tyler Nowicki! exposed a backend bug causing a regression (Cannot select ctpop). The commit msg is a bit confusing because the patch actually changes the behavior for the loop-vectorizer as well. As things got refactored into a helper ctpop got snuck in to the trivially-vectorizable helper which is now used by both vectorizers. In other words, we started seeing vector-ctpops in the backend. This change makes ctpop LegalizeAction::Expand for the types not supported by the byte-only CNT instruction. We may be able to custom-lower these later to a single CNT but this is to fix the compiler crash first. Fixes <rdar://problem/16578951> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206433 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d6312bbbbd
commit
e1a38f7041
@ -504,6 +504,10 @@ void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
|
||||
setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
|
||||
|
||||
// CNT supports only B element sizes.
|
||||
if (VT != MVT::v8i8 && VT != MVT::v16i8)
|
||||
setOperationAction(ISD::CTPOP, VT.getSimpleVT(), Expand);
|
||||
|
||||
setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
|
||||
setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
|
||||
setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
|
||||
|
68
test/CodeGen/ARM64/vpopcnt.ll
Normal file
68
test/CodeGen/ARM64/vpopcnt.ll
Normal file
@ -0,0 +1,68 @@
|
||||
; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
|
||||
target triple = "arm64-apple-ios"
|
||||
|
||||
; The non-byte ones used to fail with "Cannot select"
|
||||
|
||||
; CHECK-LABEL: ctpopv8i8
|
||||
; CHECK: cnt.8b
|
||||
define <8 x i8> @ctpopv8i8(<8 x i8> %x) nounwind readnone {
|
||||
%cnt = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %x)
|
||||
ret <8 x i8> %cnt
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: ctpopv4i16
|
||||
; CHECK: cnt.8b
|
||||
define <4 x i16> @ctpopv4i16(<4 x i16> %x) nounwind readnone {
|
||||
%cnt = tail call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %x)
|
||||
ret <4 x i16> %cnt
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: ctpopv2i32
|
||||
; CHECK: cnt.8b
|
||||
define <2 x i32> @ctpopv2i32(<2 x i32> %x) nounwind readnone {
|
||||
%cnt = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x)
|
||||
ret <2 x i32> %cnt
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
|
||||
|
||||
|
||||
; CHECK-LABEL: ctpopv16i8
|
||||
; CHECK: cnt.16b
|
||||
define <16 x i8> @ctpopv16i8(<16 x i8> %x) nounwind readnone {
|
||||
%cnt = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %x)
|
||||
ret <16 x i8> %cnt
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: ctpopv8i16
|
||||
; CHECK: cnt.8b
|
||||
define <8 x i16> @ctpopv8i16(<8 x i16> %x) nounwind readnone {
|
||||
%cnt = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %x)
|
||||
ret <8 x i16> %cnt
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: ctpopv4i32
|
||||
; CHECK: cnt.8b
|
||||
define <4 x i32> @ctpopv4i32(<4 x i32> %x) nounwind readnone {
|
||||
%cnt = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
|
||||
ret <4 x i32> %cnt
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: ctpopv2i64
|
||||
; CHECK: cnt.8b
|
||||
define <2 x i64> @ctpopv2i64(<2 x i64> %x) nounwind readnone {
|
||||
%cnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x)
|
||||
ret <2 x i64> %cnt
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
|
Loading…
Reference in New Issue
Block a user