[ARM64] Fix "Cannot select" for vector ctpop

The commit of r205855:

Author: Arnold Schwaighofer <aschwaighofer@apple.com>
Date:   Wed Apr 9 14:20:47 2014 +0000

    SLPVectorizer: Only vectorize intrinsics whose operands are widened equally

    The vectorizer only knows how to vectorize intrinics by widening all operands by
    the same factor.

    Patch by Tyler Nowicki!

exposed a backend bug causing a regression (Cannot select ctpop).

The commit msg is a bit confusing because the patch actually changes the
behavior for the loop-vectorizer as well.  As things got refactored into a
helper ctpop got snuck in to the trivially-vectorizable helper which is now
used by both vectorizers.  In other words, we started seeing vector-ctpops in
the backend.

This change makes ctpop LegalizeAction::Expand for the types not supported by
the byte-only CNT instruction.  We may be able to custom-lower these later to
a single CNT but this is to fix the compiler crash first.

Fixes <rdar://problem/16578951>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206433 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Adam Nemet 2014-04-17 01:01:37 +00:00
parent d6312bbbbd
commit e1a38f7041
2 changed files with 72 additions and 0 deletions

View File

@ -504,6 +504,10 @@ void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand);
setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
// CNT supports only B element sizes.
if (VT != MVT::v8i8 && VT != MVT::v16i8)
setOperationAction(ISD::CTPOP, VT.getSimpleVT(), Expand);
setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);

View File

@ -0,0 +1,68 @@
; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
target triple = "arm64-apple-ios"
; The non-byte ones used to fail with "Cannot select"
; CHECK-LABEL: ctpopv8i8
; CHECK: cnt.8b
define <8 x i8> @ctpopv8i8(<8 x i8> %x) nounwind readnone {
%cnt = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %x)
ret <8 x i8> %cnt
}
declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
; CHECK-LABEL: ctpopv4i16
; CHECK: cnt.8b
define <4 x i16> @ctpopv4i16(<4 x i16> %x) nounwind readnone {
%cnt = tail call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %x)
ret <4 x i16> %cnt
}
declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone
; CHECK-LABEL: ctpopv2i32
; CHECK: cnt.8b
define <2 x i32> @ctpopv2i32(<2 x i32> %x) nounwind readnone {
%cnt = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x)
ret <2 x i32> %cnt
}
declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
; CHECK-LABEL: ctpopv16i8
; CHECK: cnt.16b
define <16 x i8> @ctpopv16i8(<16 x i8> %x) nounwind readnone {
%cnt = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %x)
ret <16 x i8> %cnt
}
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
; CHECK-LABEL: ctpopv8i16
; CHECK: cnt.8b
define <8 x i16> @ctpopv8i16(<8 x i16> %x) nounwind readnone {
%cnt = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %x)
ret <8 x i16> %cnt
}
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone
; CHECK-LABEL: ctpopv4i32
; CHECK: cnt.8b
define <4 x i32> @ctpopv4i32(<4 x i32> %x) nounwind readnone {
%cnt = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
ret <4 x i32> %cnt
}
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
; CHECK-LABEL: ctpopv2i64
; CHECK: cnt.8b
define <2 x i64> @ctpopv2i64(<2 x i64> %x) nounwind readnone {
%cnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x)
ret <2 x i64> %cnt
}
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone