[AMDGPU] Promote uniform i16 ops to i32 ops for targets that have 16 bit instructions

Differential Revision: https://reviews.llvm.org/D24125



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282624 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Konstantin Zhuravlyov 2016-09-28 20:05:39 +00:00
parent 22cd98fa68
commit f9bcd7b189
7 changed files with 1339 additions and 150 deletions

View File

@ -39,6 +39,61 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
Module *Mod;
bool HasUnsafeFPMath;
/// \brief Copies exact/nsw/nuw flags (if any) from binary operator \p I to
/// binary operator \p V.
///
/// \returns Binary operator \p V.
Value *copyFlags(const BinaryOperator &I, Value *V) const;
/// \returns Equivalent 16 bit integer type for given 32 bit integer type
/// \p T.
Type *getI16Ty(IRBuilder<> &B, const Type *T) const;
/// \returns Equivalent 32 bit integer type for given 16 bit integer type
/// \p T.
Type *getI32Ty(IRBuilder<> &B, const Type *T) const;
/// \returns True if the base element of type \p T is 16 bit integer, false
/// otherwise.
bool isI16Ty(const Type *T) const;
/// \returns True if the base element of type \p T is 32 bit integer, false
/// otherwise.
bool isI32Ty(const Type *T) const;
/// \returns True if binary operation \p I is a signed binary operation, false
/// otherwise.
bool isSigned(const BinaryOperator &I) const;
/// \returns True if the condition of 'select' operation \p I comes from a
/// signed 'icmp' operation, false otherwise.
bool isSigned(const SelectInst &I) const;
/// \brief Promotes uniform 16 bit binary operation \p I to equivalent 32 bit
/// binary operation by sign or zero extending operands to 32 bits, replacing
/// 16 bit operation with equivalent 32 bit operation, and truncating the
/// result of 32 bit operation back to 16 bits. 16 bit division operation is
/// not promoted.
///
/// \returns True if 16 bit binary operation is promoted to equivalent 32 bit
/// binary operation, false otherwise.
bool promoteUniformI16OpToI32Op(BinaryOperator &I) const;
/// \brief Promotes uniform 16 bit 'icmp' operation \p I to 32 bit 'icmp'
/// operation by sign or zero extending operands to 32 bits, and replacing 16
/// bit operation with 32 bit operation.
///
/// \returns True.
bool promoteUniformI16OpToI32Op(ICmpInst &I) const;
/// \brief Promotes uniform 16 bit 'select' operation \p I to 32 bit 'select'
/// operation by sign or zero extending operands to 32 bits, replacing 16 bit
/// operation with 32 bit operation, and truncating the result of 32 bit
/// operation back to 16 bits.
///
/// \returns True.
bool promoteUniformI16OpToI32Op(SelectInst &I) const;
public:
static char ID;
AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
@ -51,9 +106,10 @@ public:
bool visitFDiv(BinaryOperator &I);
bool visitInstruction(Instruction &I) {
return false;
}
bool visitInstruction(Instruction &I) { return false; }
bool visitBinaryOperator(BinaryOperator &I);
bool visitICmpInst(ICmpInst &I);
bool visitSelectInst(SelectInst &I);
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@ -70,6 +126,150 @@ public:
} // End anonymous namespace
Value *AMDGPUCodeGenPrepare::copyFlags(
const BinaryOperator &I, Value *V) const {
assert(isa<BinaryOperator>(V) && "V must be binary operator");
BinaryOperator *BinOp = cast<BinaryOperator>(V);
if (isa<OverflowingBinaryOperator>(BinOp)) {
BinOp->setHasNoSignedWrap(I.hasNoSignedWrap());
BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
} else if (isa<PossiblyExactOperator>(BinOp)) {
BinOp->setIsExact(I.isExact());
}
return V;
}
Type *AMDGPUCodeGenPrepare::getI16Ty(IRBuilder<> &B, const Type *T) const {
assert(isI32Ty(T) && "T must be 32 bits");
if (T->isIntegerTy())
return B.getInt16Ty();
return VectorType::get(B.getInt16Ty(), cast<VectorType>(T)->getNumElements());
}
Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const {
assert(isI16Ty(T) && "T must be 16 bits");
if (T->isIntegerTy())
return B.getInt32Ty();
return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements());
}
bool AMDGPUCodeGenPrepare::isI16Ty(const Type *T) const {
if (T->isIntegerTy(16))
return true;
if (!T->isVectorTy())
return false;
return cast<VectorType>(T)->getElementType()->isIntegerTy(16);
}
bool AMDGPUCodeGenPrepare::isI32Ty(const Type *T) const {
if (T->isIntegerTy(32))
return true;
if (!T->isVectorTy())
return false;
return cast<VectorType>(T)->getElementType()->isIntegerTy(32);
}
bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
return I.getOpcode() == Instruction::SDiv ||
I.getOpcode() == Instruction::SRem;
}
bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
return isa<ICmpInst>(I.getOperand(0)) ?
cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
}
bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(BinaryOperator &I) const {
assert(isI16Ty(I.getType()) && "Op must be 16 bits");
if (I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::UDiv)
return false;
IRBuilder<> Builder(&I);
Builder.SetCurrentDebugLocation(I.getDebugLoc());
Type *I32Ty = getI32Ty(Builder, I.getType());
Value *ExtOp0 = nullptr;
Value *ExtOp1 = nullptr;
Value *ExtRes = nullptr;
Value *TruncRes = nullptr;
if (isSigned(I)) {
ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
} else {
ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
}
ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1));
TruncRes = Builder.CreateTrunc(ExtRes, getI16Ty(Builder, ExtRes->getType()));
I.replaceAllUsesWith(TruncRes);
I.eraseFromParent();
return true;
}
bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(ICmpInst &I) const {
assert(isI16Ty(I.getOperand(0)->getType()) && "Op0 must be 16 bits");
assert(isI16Ty(I.getOperand(1)->getType()) && "Op1 must be 16 bits");
IRBuilder<> Builder(&I);
Builder.SetCurrentDebugLocation(I.getDebugLoc());
Type *I32TyOp0 = getI32Ty(Builder, I.getOperand(0)->getType());
Type *I32TyOp1 = getI32Ty(Builder, I.getOperand(1)->getType());
Value *ExtOp0 = nullptr;
Value *ExtOp1 = nullptr;
Value *NewICmp = nullptr;
if (I.isSigned()) {
ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32TyOp0);
ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32TyOp1);
} else {
ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32TyOp0);
ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32TyOp1);
}
NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
I.replaceAllUsesWith(NewICmp);
I.eraseFromParent();
return true;
}
bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32Op(SelectInst &I) const {
assert(isI16Ty(I.getType()) && "Op must be 16 bits");
IRBuilder<> Builder(&I);
Builder.SetCurrentDebugLocation(I.getDebugLoc());
Type *I32Ty = getI32Ty(Builder, I.getType());
Value *ExtOp1 = nullptr;
Value *ExtOp2 = nullptr;
Value *ExtRes = nullptr;
Value *TruncRes = nullptr;
if (isSigned(I)) {
ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
} else {
ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
}
ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
TruncRes = Builder.CreateTrunc(ExtRes, getI16Ty(Builder, ExtRes->getType()));
I.replaceAllUsesWith(TruncRes);
I.eraseFromParent();
return true;
}
static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
if (!CNum)
@ -154,6 +354,37 @@ static bool hasUnsafeFPMath(const Function &F) {
return Attr.getValueAsString() == "true";
}
bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
bool Changed = false;
// TODO: Should we promote smaller types that will be legalized to i16?
if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
Changed |= promoteUniformI16OpToI32Op(I);
return Changed;
}
bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
bool Changed = false;
// TODO: Should we promote smaller types that will be legalized to i16?
if (ST->has16BitInsts() && isI16Ty(I.getOperand(0)->getType()) &&
isI16Ty(I.getOperand(1)->getType()) && DA->isUniform(&I))
Changed |= promoteUniformI16OpToI32Op(I);
return Changed;
}
bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
bool Changed = false;
// TODO: Should we promote smaller types that will be legalized to i16?
if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
Changed |= promoteUniformI16OpToI32Op(I);
return Changed;
}
bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
Mod = &M;
return false;

View File

@ -540,6 +540,10 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
// i16 is not desirable unless it is a load or a store.
if (VT == MVT::i16 && Op != ISD::LOAD && Op != ISD::STORE)
return false;
// SimplifySetCC uses this function to determine whether or not it should
// create setcc with i1 operands. We don't have instructions for i1 setcc.
if (VT == MVT::i1 && Op == ISD::SETCC)

View File

@ -0,0 +1,856 @@
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s
; SI-NOT: zext
; SI-NOT: sext
; SI-NOT: trunc
; VI-LABEL: @add_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = add i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @add_i16(i16 %a, i16 %b) {
%r = add i16 %a, %b
ret i16 %r
}
; VI-LABEL: @add_nsw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = add nsw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @add_nsw_i16(i16 %a, i16 %b) {
%r = add nsw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @add_nuw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = add nuw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @add_nuw_i16(i16 %a, i16 %b) {
%r = add nuw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @add_nuw_nsw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @add_nuw_nsw_i16(i16 %a, i16 %b) {
%r = add nuw nsw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @sub_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = sub i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @sub_i16(i16 %a, i16 %b) {
%r = sub i16 %a, %b
ret i16 %r
}
; VI-LABEL: @sub_nsw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @sub_nsw_i16(i16 %a, i16 %b) {
%r = sub nsw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @sub_nuw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = sub nuw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @sub_nuw_i16(i16 %a, i16 %b) {
%r = sub nuw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @sub_nuw_nsw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @sub_nuw_nsw_i16(i16 %a, i16 %b) {
%r = sub nuw nsw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @mul_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = mul i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @mul_i16(i16 %a, i16 %b) {
%r = mul i16 %a, %b
ret i16 %r
}
; VI-LABEL: @mul_nsw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = mul nsw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @mul_nsw_i16(i16 %a, i16 %b) {
%r = mul nsw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @mul_nuw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @mul_nuw_i16(i16 %a, i16 %b) {
%r = mul nuw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @mul_nuw_nsw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @mul_nuw_nsw_i16(i16 %a, i16 %b) {
%r = mul nuw nsw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @urem_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = urem i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @urem_i16(i16 %a, i16 %b) {
%r = urem i16 %a, %b
ret i16 %r
}
; VI-LABEL: @srem_i16(
; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = sext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = srem i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @srem_i16(i16 %a, i16 %b) {
%r = srem i16 %a, %b
ret i16 %r
}
; VI-LABEL: @shl_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = shl i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @shl_i16(i16 %a, i16 %b) {
%r = shl i16 %a, %b
ret i16 %r
}
; VI-LABEL: @shl_nsw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = shl nsw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @shl_nsw_i16(i16 %a, i16 %b) {
%r = shl nsw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @shl_nuw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = shl nuw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @shl_nuw_i16(i16 %a, i16 %b) {
%r = shl nuw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @shl_nuw_nsw_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @shl_nuw_nsw_i16(i16 %a, i16 %b) {
%r = shl nuw nsw i16 %a, %b
ret i16 %r
}
; VI-LABEL: @lshr_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @lshr_i16(i16 %a, i16 %b) {
%r = lshr i16 %a, %b
ret i16 %r
}
; VI-LABEL: @lshr_exact_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @lshr_exact_i16(i16 %a, i16 %b) {
%r = lshr exact i16 %a, %b
ret i16 %r
}
; VI-LABEL: @ashr_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @ashr_i16(i16 %a, i16 %b) {
%r = ashr i16 %a, %b
ret i16 %r
}
; VI-LABEL: @ashr_exact_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @ashr_exact_i16(i16 %a, i16 %b) {
%r = ashr exact i16 %a, %b
ret i16 %r
}
; VI-LABEL: @and_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @and_i16(i16 %a, i16 %b) {
%r = and i16 %a, %b
ret i16 %r
}
; VI-LABEL: @or_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @or_i16(i16 %a, i16 %b) {
%r = or i16 %a, %b
ret i16 %r
}
; VI-LABEL: @xor_i16(
; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32
; VI: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16
; VI: ret i16 %[[R_16]]
define i16 @xor_i16(i16 %a, i16 %b) {
%r = xor i16 %a, %b
ret i16 %r
}
; VI-LABEL: @select_eq_i16(
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
; VI: ret i16 %[[SEL_16]]
define i16 @select_eq_i16(i16 %a, i16 %b) {
%cmp = icmp eq i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
ret i16 %sel
}
; VI-LABEL: @select_ne_i16(
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
; VI: ret i16 %[[SEL_16]]
define i16 @select_ne_i16(i16 %a, i16 %b) {
%cmp = icmp ne i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
ret i16 %sel
}
; VI-LABEL: @select_ugt_i16(
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
; VI: ret i16 %[[SEL_16]]
define i16 @select_ugt_i16(i16 %a, i16 %b) {
%cmp = icmp ugt i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
ret i16 %sel
}
; VI-LABEL: @select_uge_i16(
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
; VI: ret i16 %[[SEL_16]]
define i16 @select_uge_i16(i16 %a, i16 %b) {
%cmp = icmp uge i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
ret i16 %sel
}
; VI-LABEL: @select_ult_i16(
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
; VI: ret i16 %[[SEL_16]]
define i16 @select_ult_i16(i16 %a, i16 %b) {
%cmp = icmp ult i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
ret i16 %sel
}
; VI-LABEL: @select_ule_i16(
; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32
; VI: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32
; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32
; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
; VI: ret i16 %[[SEL_16]]
define i16 @select_ule_i16(i16 %a, i16 %b) {
%cmp = icmp ule i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
ret i16 %sel
}
; VI-LABEL: @select_sgt_i16(
; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
; VI: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
; VI: ret i16 %[[SEL_16]]
define i16 @select_sgt_i16(i16 %a, i16 %b) {
%cmp = icmp sgt i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
ret i16 %sel
}
; VI-LABEL: @select_sge_i16(
; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
; VI: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
; VI: ret i16 %[[SEL_16]]
define i16 @select_sge_i16(i16 %a, i16 %b) {
%cmp = icmp sge i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
ret i16 %sel
}
; VI-LABEL: @select_slt_i16(
; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
; VI: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
; VI: ret i16 %[[SEL_16]]
define i16 @select_slt_i16(i16 %a, i16 %b) {
%cmp = icmp slt i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
ret i16 %sel
}
; VI-LABEL: @select_sle_i16(
; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32
; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32
; VI: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32
; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32
; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16
; VI: ret i16 %[[SEL_16]]
define i16 @select_sle_i16(i16 %a, i16 %b) {
%cmp = icmp sle i16 %a, %b
%sel = select i1 %cmp, i16 %a, i16 %b
ret i16 %sel
}
; VI-LABEL: @add_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = add <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @add_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = add <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @add_nsw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = add nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = add nsw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @add_nuw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = add nuw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = add nuw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @add_nuw_nsw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = add nuw nsw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @sub_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = sub <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @sub_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = sub <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @sub_nsw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = sub nsw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @sub_nuw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = sub nuw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = sub nuw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @sub_nuw_nsw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = sub nuw nsw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @mul_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = mul <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @mul_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = mul <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @mul_nsw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = mul nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = mul nsw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @mul_nuw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = mul nuw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @mul_nuw_nsw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = mul nuw nsw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @urem_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = urem <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @urem_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = urem <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @srem_3xi16(
; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = srem <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @srem_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = srem <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @shl_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = shl <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @shl_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = shl <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @shl_nsw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = shl nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = shl nsw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @shl_nuw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = shl nuw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = shl nuw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @shl_nuw_nsw_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = shl nuw nsw <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @lshr_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = lshr <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @lshr_exact_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = lshr exact <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @ashr_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = ashr <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @ashr_exact_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = ashr exact <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @and_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @and_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = and <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @or_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @or_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = or <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @xor_3xi16(
; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]]
; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16>
; VI: ret <3 x i16> %[[R_16]]
define <3 x i16> @xor_3xi16(<3 x i16> %a, <3 x i16> %b) {
%r = xor <3 x i16> %a, %b
ret <3 x i16> %r
}
; VI-LABEL: @select_eq_3xi16(
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
; VI: ret <3 x i16> %[[SEL_16]]
define <3 x i16> @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp eq <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
ret <3 x i16> %sel
}
; VI-LABEL: @select_ne_3xi16(
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
; VI: ret <3 x i16> %[[SEL_16]]
define <3 x i16> @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp ne <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
ret <3 x i16> %sel
}
; VI-LABEL: @select_ugt_3xi16(
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
; VI: ret <3 x i16> %[[SEL_16]]
define <3 x i16> @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp ugt <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
ret <3 x i16> %sel
}
; VI-LABEL: @select_uge_3xi16(
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
; VI: ret <3 x i16> %[[SEL_16]]
define <3 x i16> @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp uge <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
ret <3 x i16> %sel
}
; VI-LABEL: @select_ult_3xi16(
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
; VI: ret <3 x i16> %[[SEL_16]]
define <3 x i16> @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp ult <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
ret <3 x i16> %sel
}
; VI-LABEL: @select_ule_3xi16(
; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32>
; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
; VI: ret <3 x i16> %[[SEL_16]]
define <3 x i16> @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp ule <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
ret <3 x i16> %sel
}
; VI-LABEL: @select_sgt_3xi16(
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
; VI: ret <3 x i16> %[[SEL_16]]
define <3 x i16> @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp sgt <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
ret <3 x i16> %sel
}
; VI-LABEL: @select_sge_3xi16(
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
; VI: ret <3 x i16> %[[SEL_16]]
define <3 x i16> @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp sge <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
ret <3 x i16> %sel
}
; VI-LABEL: @select_slt_3xi16(
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
; VI: ret <3 x i16> %[[SEL_16]]
define <3 x i16> @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp slt <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
ret <3 x i16> %sel
}
; VI-LABEL: @select_sle_3xi16(
; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]]
; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32>
; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32>
; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]]
; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16>
; VI: ret <3 x i16> %[[SEL_16]]
define <3 x i16> @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) {
%cmp = icmp sle <3 x i16> %a, %b
%sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b
ret <3 x i16> %sel
}

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone
@ -17,13 +17,13 @@ declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; FUNC-LABEL: {{^}}s_ctlz_i32:
; SI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; SI-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]]
; SI-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}}
; SI-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]]
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; GCN-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]]
; GCN-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]]
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]]
; GCN: buffer_store_dword [[RESULT]]
; GCN: s_endpgm
; EG: FFBH_UINT
; EG: CNDE_INT
@ -34,12 +34,12 @@ define void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
}
; FUNC-LABEL: {{^}}v_ctlz_i32:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI-DAG: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]]
; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[CTLZ]], 32, vcc
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN-DAG: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]]
; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[CTLZ]], 32, vcc
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
; EG: FFBH_UINT
; EG: CNDE_INT
@ -51,11 +51,11 @@ define void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalia
}
; FUNC-LABEL: {{^}}v_ctlz_v2i32:
; SI: buffer_load_dwordx2
; SI: v_ffbh_u32_e32
; SI: v_ffbh_u32_e32
; SI: buffer_store_dwordx2
; SI: s_endpgm
; GCN: buffer_load_dwordx2
; GCN: v_ffbh_u32_e32
; GCN: v_ffbh_u32_e32
; GCN: buffer_store_dwordx2
; GCN: s_endpgm
; EG: FFBH_UINT
; EG: CNDE_INT
@ -69,13 +69,13 @@ define void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrsp
}
; FUNC-LABEL: {{^}}v_ctlz_v4i32:
; SI: buffer_load_dwordx4
; SI: v_ffbh_u32_e32
; SI: v_ffbh_u32_e32
; SI: v_ffbh_u32_e32
; SI: v_ffbh_u32_e32
; SI: buffer_store_dwordx4
; SI: s_endpgm
; GCN: buffer_load_dwordx4
; GCN: v_ffbh_u32_e32
; GCN: v_ffbh_u32_e32
; GCN: v_ffbh_u32_e32
; GCN: v_ffbh_u32_e32
; GCN: buffer_store_dwordx4
; GCN: s_endpgm
; EG-DAG: FFBH_UINT
@ -97,12 +97,12 @@ define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrsp
}
; FUNC-LABEL: {{^}}v_ctlz_i8:
; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
; SI-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
; SI-DAG: v_cndmask_b32_e64 [[CORRECTED_FFBH:v[0-9]+]], [[FFBH]], 32, vcc
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[CORRECTED_FFBH]]
; SI: buffer_store_byte [[RESULT]],
; GCN: buffer_load_ubyte [[VAL:v[0-9]+]],
; GCN-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
; GCN-DAG: v_cndmask_b32_e64 [[CORRECTED_FFBH:v[0-9]+]], [[FFBH]], 32, vcc
; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[CORRECTED_FFBH]]
; GCN: buffer_store_byte [[RESULT]],
define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
%val = load i8, i8 addrspace(1)* %valptr
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
@ -111,16 +111,16 @@ define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %
}
; FUNC-LABEL: {{^}}s_ctlz_i64:
; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; SI-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}}
; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
; SI-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[ADD]]
; SI-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]]
; SI-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
; GCN: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; GCN-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}}
; GCN-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
; GCN-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
; GCN-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
; GCN-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[ADD]]
; GCN-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]]
; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
define void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
store i64 %ctlz, i64 addrspace(1)* %out
@ -136,17 +136,17 @@ define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind
}
; FUNC-LABEL: {{^}}v_ctlz_i64:
; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
; SI-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
; SI-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
; SI-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]]
; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]]
; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]
; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc
; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
; GCN-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
; GCN-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]]
; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]]
; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]
; GCN-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
@ -170,10 +170,10 @@ define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)*
}
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_neg1:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
@ -184,10 +184,10 @@ define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)*
}
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_neg1:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
; GCN: buffer_load_dword [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
@ -199,11 +199,11 @@ define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspac
; TODO: Should be able to eliminate select here as well.
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_bitwidth:
; SI: buffer_load_dword
; SI: v_ffbh_u32_e32
; SI: v_cmp
; SI: v_cndmask
; SI: s_endpgm
; GCN: buffer_load_dword
; GCN: v_ffbh_u32_e32
; GCN: v_cmp
; GCN: v_cndmask
; GCN: s_endpgm
define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
@ -214,11 +214,11 @@ define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addr
}
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_bitwidth:
; SI: buffer_load_dword
; SI: v_ffbh_u32_e32
; SI: v_cmp
; SI: v_cndmask
; SI: s_endpgm
; GCN: buffer_load_dword
; GCN: v_ffbh_u32_e32
; GCN: v_cmp
; GCN: v_cndmask
; GCN: s_endpgm
define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%val = load i32, i32 addrspace(1)* %valptr
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
@ -229,9 +229,9 @@ define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addr
}
; FUNC-LABEL: {{^}}v_ctlz_i8_sel_eq_neg1:
; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; SI: buffer_store_byte [[FFBH]],
; GCN: buffer_load_ubyte [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; GCN: buffer_store_byte [[FFBH]],
define void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
%val = load i8, i8 addrspace(1)* %valptr
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
@ -255,10 +255,10 @@ define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addr
}
; FUNC-LABEL: {{^}}v_ctlz_i7_sel_eq_neg1:
; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]]
; SI: buffer_store_byte [[TRUNC]],
; GCN: buffer_load_ubyte [[VAL:v[0-9]+]],
; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; GCN: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]]
; GCN: buffer_store_byte [[TRUNC]],
define void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
%val = load i7, i7 addrspace(1)* %valptr
%ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone

View File

@ -1,11 +1,11 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
; FUNC-LABEL: {{^}}test_umul24_i32:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
; SI: v_mul_u32_u24
; GCN: v_mul_u32_u24
define void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = shl i32 %a, 8
@ -18,13 +18,10 @@ entry:
}
; FUNC-LABEL: {{^}}test_umul24_i16_sext:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
; The result must be sign-extended
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
; EG: 16
; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16
; SI: v_mul_u32_u24_e{{(32|64)}} [[VI_MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
; SI: v_bfe_i32 v{{[0-9]}}, [[VI_MUL]], 0, 16
; VI: s_mul_i32 [[SI_MUL:s[0-9]]], s{{[0-9]}}, s{{[0-9]}}
; VI: s_sext_i32_i16 s{{[0-9]}}, [[SI_MUL]]
define void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
entry:
%mul = mul i16 %a, %b
@ -33,10 +30,29 @@ entry:
ret void
}
; FUNC-LABEL: {{^}}test_umul24_i16_vgpr_sext:
; GCN: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
; GCN: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16
define void @test_umul24_i16_vgpr_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%tid.y = call i32 @llvm.amdgcn.workitem.id.y()
%ptr_a = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.x
%ptr_b = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.y
%a = load i16, i16 addrspace(1)* %ptr_a
%b = load i16, i16 addrspace(1)* %ptr_b
%mul = mul i16 %a, %b
%val = sext i16 %mul to i32
store i32 %val, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}test_umul24_i16:
; SI: s_and_b32
; SI: v_mul_u32_u24_e32
; SI: v_and_b32_e32
; VI: s_mul_i32
; VI: s_and_b32
; VI: v_mov_b32_e32
define void @test_umul24_i16(i32 addrspace(1)* %out, i16 %a, i16 %b) {
entry:
%mul = mul i16 %a, %b
@ -45,13 +61,25 @@ entry:
ret void
}
; FUNC-LABEL: {{^}}test_umul24_i8:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
; The result must be sign-extended
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
; FUNC-LABEL: {{^}}test_umul24_i16_vgpr:
; GCN: v_mul_u32_u24_e32
; GCN: v_and_b32_e32
define void @test_umul24_i16_vgpr(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
%tid.y = call i32 @llvm.amdgcn.workitem.id.y()
%ptr_a = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.x
%ptr_b = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.y
%a = load i16, i16 addrspace(1)* %ptr_a
%b = load i16, i16 addrspace(1)* %ptr_b
%mul = mul i16 %a, %b
%val = zext i16 %mul to i32
store i32 %val, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}test_umul24_i8:
; GCN: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
; GCN: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
define void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) {
entry:
%mul = mul i8 %a, %b
@ -61,11 +89,9 @@ entry:
}
; FUNC-LABEL: {{^}}test_umulhi24_i32_i64:
; SI-NOT: and
; SI: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]],
; SI-NEXT: buffer_store_dword [[RESULT]]
; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
; GCN-NOT: and
; GCN: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]],
; GCN-NEXT: buffer_store_dword [[RESULT]]
define void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%a.24 = and i32 %a, 16777215
@ -80,11 +106,9 @@ entry:
}
; FUNC-LABEL: {{^}}test_umulhi24:
; SI-NOT: and
; SI: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]],
; SI-NEXT: buffer_store_dword [[RESULT]]
; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
; GCN-NOT: and
; GCN: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]],
; GCN-NEXT: buffer_store_dword [[RESULT]]
define void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%a.24 = and i64 %a, 16777215
@ -96,18 +120,13 @@ entry:
ret void
}
; Multiply with 24-bit inputs and 64-bit output
; Multiply with 24-bit inputs and 64-bit output.
; FUNC-LABEL: {{^}}test_umul24_i64:
; EG; MUL_UINT24
; EG: MULHI
; SI-NOT: and
; SI-NOT: lshr
; SI-DAG: v_mul_u32_u24_e32
; SI-DAG: v_mul_hi_u32_u24_e32
; SI: buffer_store_dwordx2
; GCN-NOT: and
; GCN-NOT: lshr
; GCN-DAG: v_mul_u32_u24_e32
; GCN-DAG: v_mul_hi_u32_u24_e32
; GCN: buffer_store_dwordx2
define void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%tmp0 = shl i64 %a, 40
@ -119,12 +138,12 @@ entry:
ret void
}
; FIXME: Should be able to eliminate the and
; FIXME: Should be able to eliminate the and.
; FUNC-LABEL: {{^}}test_umul24_i64_square:
; SI: s_load_dword [[A:s[0-9]+]]
; SI: s_and_b32 [[TRUNC:s[0-9]+]], [[A]], 0xffffff{{$}}
; SI-DAG: v_mul_hi_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]]
; SI-DAG: v_mul_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]]
; GCN: s_load_dword [[A:s[0-9]+]]
; GCN: s_and_b32 [[TRUNC:s[0-9]+]], [[A]], 0xffffff{{$}}
; GCN-DAG: v_mul_hi_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]]
; GCN-DAG: v_mul_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]]
define void @test_umul24_i64_square(i64 addrspace(1)* %out, i64 %a) {
entry:
%tmp0 = shl i64 %a, 40
@ -135,10 +154,10 @@ entry:
}
; FUNC-LABEL: {{^}}test_umulhi16_i32:
; SI: s_and_b32
; SI: s_and_b32
; SI: v_mul_u32_u24_e32 [[MUL24:v[0-9]+]]
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, [[MUL24]]
; GCN: s_and_b32
; GCN: s_and_b32
; GCN: v_mul_u32_u24_e32 [[MUL24:v[0-9]+]]
; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, [[MUL24]]
define void @test_umulhi16_i32(i16 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%a.16 = and i32 %a, 65535
@ -151,16 +170,14 @@ entry:
}
; FUNC-LABEL: {{^}}test_umul24_i33:
; SI: s_load_dword s
; SI: s_load_dword s
; SI-NOT: and
; SI-NOT: lshr
; SI-DAG: v_mul_u32_u24_e32 v[[MUL_LO:[0-9]+]],
; SI-DAG: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]],
; SI-DAG: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]]
; SI: buffer_store_dwordx2 v{{\[}}[[MUL_LO]]:[[HI]]{{\]}}
; GCN: s_load_dword s
; GCN: s_load_dword s
; GCN-NOT: and
; GCN-NOT: lshr
; GCN-DAG: v_mul_u32_u24_e32 v[[MUL_LO:[0-9]+]],
; GCN-DAG: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]],
; GCN-DAG: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]]
; GCN: buffer_store_dwordx2 v{{\[}}[[MUL_LO]]:[[HI]]{{\]}}
define void @test_umul24_i33(i64 addrspace(1)* %out, i33 %a, i33 %b) {
entry:
%tmp0 = shl i33 %a, 9
@ -174,15 +191,13 @@ entry:
}
; FUNC-LABEL: {{^}}test_umulhi24_i33:
; SI: s_load_dword s
; SI: s_load_dword s
; SI-NOT: and
; SI-NOT: lshr
; SI: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]],
; SI-NEXT: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]]
; SI-NEXT: buffer_store_dword v[[HI]]
; GCN: s_load_dword s
; GCN: s_load_dword s
; GCN-NOT: and
; GCN-NOT: lshr
; GCN: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]],
; GCN-NEXT: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]]
; GCN-NEXT: buffer_store_dword v[[HI]]
define void @test_umulhi24_i33(i32 addrspace(1)* %out, i33 %a, i33 %b) {
entry:
%tmp0 = shl i33 %a, 9

View File

@ -0,0 +1,83 @@
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}test_umul24_i32:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
define void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%0 = shl i32 %a, 8
%a_24 = lshr i32 %0, 8
%1 = shl i32 %b, 8
%b_24 = lshr i32 %1, 8
%2 = mul i32 %a_24, %b_24
store i32 %2, i32 addrspace(1)* %out
ret void
}
; The result must be sign-extended.
; FUNC-LABEL: {{^}}test_umul24_i16_sext:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
; EG: 16
define void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
entry:
%mul = mul i16 %a, %b
%ext = sext i16 %mul to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}
; The result must be sign-extended.
; FUNC-LABEL: {{^}}test_umul24_i8:
; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
define void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) {
entry:
%mul = mul i8 %a, %b
%ext = sext i8 %mul to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}test_umulhi24_i32_i64:
; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
define void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) {
entry:
%a.24 = and i32 %a, 16777215
%b.24 = and i32 %b, 16777215
%a.24.i64 = zext i32 %a.24 to i64
%b.24.i64 = zext i32 %b.24 to i64
%mul48 = mul i64 %a.24.i64, %b.24.i64
%mul48.hi = lshr i64 %mul48, 32
%mul24hi = trunc i64 %mul48.hi to i32
store i32 %mul24hi, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}test_umulhi24:
; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
define void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%a.24 = and i64 %a, 16777215
%b.24 = and i64 %b, 16777215
%mul48 = mul i64 %a.24, %b.24
%mul48.hi = lshr i64 %mul48, 32
%mul24.hi = trunc i64 %mul48.hi to i32
store i32 %mul24.hi, i32 addrspace(1)* %out
ret void
}
; Multiply with 24-bit inputs and 64-bit output.
; FUNC-LABEL: {{^}}test_umul24_i64:
; EG; MUL_UINT24
; EG: MULHI
define void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%tmp0 = shl i64 %a, 40
%a_24 = lshr i64 %tmp0, 40
%tmp1 = shl i64 %b, 40
%b_24 = lshr i64 %tmp1, 40
%tmp2 = mul i64 %a_24, %b_24
store i64 %tmp2, i64 addrspace(1)* %out
ret void
}