From 7c82e6a32a84e238c9e4e57dd43eaba540a79ce1 Mon Sep 17 00:00:00 2001 From: Joel Jones Date: Wed, 18 Jul 2012 00:02:16 +0000 Subject: [PATCH] More replacing of target-dependent intrinsics with target-indepdent intrinsics. The second instruction(s) to be handled are the vector versions of count set bits (ctpop). The changes here are to clang so that it generates a target independent vector ctpop when it sees an ARM dependent vector bits set count. The changes in llvm are to match the target independent vector ctpop and in VMCore/AutoUpgrade.cpp to update any existing bc files containing ARM dependent vector pop counts with target-independent ctpops. There are also changes to an existing test case in llvm for ARM vector count instructions and to a test for the bitcode upgrade. There is deliberately no test for the change to clang, as so far as I know, no consensus has been reached regarding how to test neon instructions in clang; q.v. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160410 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 4 ++-- lib/VMCore/AutoUpgrade.cpp | 12 +++++++++++- test/Bitcode/arm32_neon_vcnt_upgrade.ll | 11 ++++++++++- test/CodeGen/ARM/vcnt.ll | 12 ++++++------ 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index c540d0ddd70..d4afa3380af 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4829,10 +4829,10 @@ defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, // VCNT : Vector Count One Bits def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiD, "vcnt", "8", - v8i8, v8i8, int_arm_neon_vcnt>; + v8i8, v8i8, ctpop>; def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiQ, "vcnt", "8", - v16i8, v16i8, int_arm_neon_vcnt>; + v16i8, v16i8, ctpop>; // Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 8febce18c03..f0de5c2fff1 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -66,6 +66,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { "llvm.ctlz." + Name.substr(14), F->getParent()); return true; } + if (Name.startswith("arm.neon.vcnt")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, + F->arg_begin()->getType()); + return true; + } break; } case 'c': { @@ -314,11 +319,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::arm_neon_vclz: { // Change name from llvm.arm.neon.vclz.* to llvm.ctlz.* CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0), - Builder.getFalse(), + Builder.getFalse(), "llvm.ctlz." + Name.substr(14))); CI->eraseFromParent(); return; } + case Intrinsic::ctpop: { + CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0))); + CI->eraseFromParent(); + return; + } case Intrinsic::x86_xop_vfrcz_ss: case Intrinsic::x86_xop_vfrcz_sd: diff --git a/test/Bitcode/arm32_neon_vcnt_upgrade.ll b/test/Bitcode/arm32_neon_vcnt_upgrade.ll index b3f2f03d108..10b9284e981 100644 --- a/test/Bitcode/arm32_neon_vcnt_upgrade.ll +++ b/test/Bitcode/arm32_neon_vcnt_upgrade.ll @@ -1,5 +1,5 @@ ; RUN: llvm-as < %s | llvm-dis | FileCheck %s -; NB: currently tests only vclz, should also test vcnt and vcls +; Tests vclz and vcnt define <4 x i16> @vclz16(<4 x i16>* %A) nounwind { ;CHECK: @vclz16 @@ -9,4 +9,13 @@ define <4 x i16> @vclz16(<4 x i16>* %A) nounwind { ret <4 x i16> %tmp2 } +define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind { +;CHECK: @vcnt8 + %tmp1 = load <8 x i8>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1) +;CHECK: call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> + ret <8 x i8> %tmp2 +} + declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll index 7e54524db78..9f55c24b402 100644 --- a/test/CodeGen/ARM/vcnt.ll +++ b/test/CodeGen/ARM/vcnt.ll @@ -3,22 +3,22 @@ define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind { ;CHECK: vcnt8: -;CHECK: vcnt.8 +;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}} %tmp1 = load <8 x i8>* %A - %tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1) + %tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1) ret <8 x i8> %tmp2 } define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind { ;CHECK: vcntQ8: -;CHECK: vcnt.8 +;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}} %tmp1 = load <16 x i8>* %A - %tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1) + %tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1) ret <16 x i8> %tmp2 } -declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone +declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone +declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone define <8 x i8> @vclz8(<8 x i8>* %A) nounwind { ;CHECK: vclz8: