[FastISel][AArch64] Add target-specific lowering for logical operations.

This change adds support for immediate and shift-left folding into logical
operations.

This fixes rdar://problem/18223183.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217118 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2014-09-04 01:29:18 +00:00
parent fa2dfaedf2
commit 68a4ab08b3
4 changed files with 307 additions and 34 deletions

View File

@ -114,6 +114,7 @@ class AArch64FastISel : public FastISel {
private:
// Selection routines.
bool selectAddSub(const Instruction *I);
bool selectLogicalOp(const Instruction *I);
bool SelectLoad(const Instruction *I);
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
@ -193,7 +194,14 @@ private:
unsigned RHSReg, bool RHSIsKill,
AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
bool WantResult = true);
unsigned emitAND_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
const Value *RHS);
unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
bool LHSIsKill, uint64_t Imm);
unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
uint64_t ShiftImm);
unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
@ -1222,22 +1230,83 @@ unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
WantResult);
}
// FIXME: This should be eventually generated automatically by tblgen.
unsigned AArch64FastISel::emitAND_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
uint64_t Imm) {
const TargetRegisterClass *RC = nullptr;
unsigned Opc = 0;
unsigned RegSize = 0;
unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
const Value *LHS, const Value *RHS) {
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
// Canonicalize immediates to the RHS first.
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
std::swap(LHS, RHS);
// Canonicalize shift immediate to the RHS.
if (isValueAvailable(LHS))
if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
if (isa<ConstantInt>(SI->getOperand(1)))
if (SI->getOpcode() == Instruction::Shl)
std::swap(LHS, RHS);
unsigned LHSReg = getRegForValue(LHS);
if (!LHSReg)
return 0;
bool LHSIsKill = hasTrivialKill(LHS);
unsigned ResultReg = 0;
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
uint64_t Imm = C->getZExtValue();
ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
}
if (ResultReg)
return ResultReg;
// Check if the shift can be folded into the instruction.
if (isValueAvailable(RHS))
if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
if (SI->getOpcode() == Instruction::Shl) {
uint64_t ShiftVal = C->getZExtValue();
unsigned RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return 0;
bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
RHSIsKill, ShiftVal);
}
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
return 0;
bool RHSIsKill = hasTrivialKill(RHS);
return fastEmit_rr(RetVT, RetVT, ISDOpc, LHSReg, LHSIsKill, RHSReg,
RHSIsKill);
}
unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
unsigned LHSReg, bool LHSIsKill,
uint64_t Imm) {
assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
"ISD nodes are not consecutive!");
static const unsigned OpcTable[3][2] = {
{ AArch64::ANDWri, AArch64::ANDXri },
{ AArch64::ORRWri, AArch64::ORRXri },
{ AArch64::EORWri, AArch64::EORXri }
};
const TargetRegisterClass *RC;
unsigned Opc;
unsigned RegSize;
switch (RetVT.SimpleTy) {
default:
return 0;
case MVT::i32:
Opc = AArch64::ANDWri;
case MVT::i32: {
unsigned Idx = ISDOpc - ISD::AND;
Opc = OpcTable[Idx][0];
RC = &AArch64::GPR32spRegClass;
RegSize = 32;
break;
}
case MVT::i64:
Opc = AArch64::ANDXri;
Opc = OpcTable[ISDOpc - ISD::AND][1];
RC = &AArch64::GPR64spRegClass;
RegSize = 64;
break;
@ -1250,6 +1319,40 @@ unsigned AArch64FastISel::emitAND_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
}
unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
unsigned LHSReg, bool LHSIsKill,
unsigned RHSReg, bool RHSIsKill,
uint64_t ShiftImm) {
assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
"ISD nodes are not consecutive!");
static const unsigned OpcTable[3][2] = {
{ AArch64::ANDWrs, AArch64::ANDXrs },
{ AArch64::ORRWrs, AArch64::ORRXrs },
{ AArch64::EORWrs, AArch64::EORXrs }
};
const TargetRegisterClass *RC;
unsigned Opc;
switch (RetVT.SimpleTy) {
default:
return 0;
case MVT::i32:
Opc = OpcTable[ISDOpc - ISD::AND][0];
RC = &AArch64::GPR32RegClass;
break;
case MVT::i64:
Opc = OpcTable[ISDOpc - ISD::AND][1];
RC = &AArch64::GPR64RegClass;
break;
}
return fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
}
unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
uint64_t Imm) {
return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
}
bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
MachineMemOperand *MMO) {
// Simplify this down to something we can handle.
@ -1316,7 +1419,7 @@ bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
// Loading an i1 requires special handling.
if (VTIsi1) {
unsigned ANDReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
ResultReg = ANDReg;
}
@ -1341,6 +1444,34 @@ bool AArch64FastISel::selectAddSub(const Instruction *I) {
return true;
}
bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
MVT VT;
if (!isTypeSupported(I->getType(), VT))
return false;
unsigned ISDOpc;
switch (I->getOpcode()) {
default:
llvm_unreachable("Unexpected opcode.");
case Instruction::And:
ISDOpc = ISD::AND;
break;
case Instruction::Or:
ISDOpc = ISD::OR;
break;
case Instruction::Xor:
ISDOpc = ISD::XOR;
break;
}
unsigned ResultReg =
emitLogicalOp(ISDOpc, VT, I->getOperand(0), I->getOperand(1));
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::SelectLoad(const Instruction *I) {
MVT VT;
// Verify we have a legal type before going any further. Currently, we handle
@ -1423,7 +1554,7 @@ bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
// Storing an i1 requires special handling.
if (VTIsi1 && SrcReg != AArch64::WZR) {
unsigned ANDReg = emitAND_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
SrcReg = ANDReg;
}
@ -1576,7 +1707,7 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) {
CondIsKill = true;
}
unsigned ANDReg = emitAND_ri(MVT::i32, CondReg, CondIsKill, 1);
unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
@ -1750,7 +1881,7 @@ bool AArch64FastISel::SelectSelect(const Instruction *I) {
bool CondIsKill = hasTrivialKill(Cond);
if (NeedTest) {
unsigned ANDReg = emitAND_ri(MVT::i32, CondReg, CondIsKill, 1);
unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
}
@ -2721,7 +2852,7 @@ bool AArch64FastISel::SelectTrunc(const Instruction *I) {
unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
AArch64::sub_32);
// Create the AND instruction which performs the actual truncation.
ResultReg = emitAND_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
assert(ResultReg && "Unexpected AND instruction emission failure.");
} else {
ResultReg = createResultReg(&AArch64::GPR32RegClass);
@ -2743,7 +2874,7 @@ unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
DestVT = MVT::i32;
if (isZExt) {
unsigned ResultReg = emitAND_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
assert(ResultReg && "Unexpected AND instruction emission failure.");
if (DestVT == MVT::i64) {
// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
@ -2823,13 +2954,13 @@ unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc) {
Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
Op1IsKill = true;
}
unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
Op1IsKill);
if (NeedTrunc)
ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
return ResultReg;
}
@ -2916,14 +3047,14 @@ unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc) {
Op0Reg = emitAND_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
Op0IsKill = Op1IsKill = true;
}
unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
Op1IsKill);
if (NeedTrunc)
ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
return ResultReg;
}
@ -3026,13 +3157,13 @@ unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc) {
Op0Reg = EmitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
Op0IsKill = Op1IsKill = true;
}
unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
Op1IsKill);
if (NeedTrunc)
ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
return ResultReg;
}
@ -3470,11 +3601,17 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
return selectBinaryOp(I, ISD::SRA);
return true;
case Instruction::And:
return selectBinaryOp(I, ISD::AND);
if (!selectLogicalOp(I))
return selectBinaryOp(I, ISD::AND);
return true;
case Instruction::Or:
return selectBinaryOp(I, ISD::OR);
if (!selectLogicalOp(I))
return selectBinaryOp(I, ISD::OR);
return true;
case Instruction::Xor:
return selectBinaryOp(I, ISD::XOR);
if (!selectLogicalOp(I))
return selectBinaryOp(I, ISD::XOR);
return true;
case Instruction::GetElementPtr:
return selectGetElementPtr(I);
case Instruction::Br:

View File

@ -137,11 +137,10 @@ declare void @foo1()
; rdar://15174028
define i32 @trunc64(i64 %foo) nounwind {
; CHECK: trunc64
; CHECK: orr [[REG:x[0-9]+]], xzr, #0x1
; CHECK: and [[REG2:x[0-9]+]], x0, [[REG]]
; CHECK: mov x[[REG3:[0-9]+]], [[REG2]]
; CHECK: and [[REG4:w[0-9]+]], w[[REG3]], #0x1
; CHECK: cmp [[REG4]], #0
; CHECK: and [[REG1:x[0-9]+]], x0, #0x1
; CHECK: mov x[[REG2:[0-9]+]], [[REG1]]
; CHECK: and [[REG3:w[0-9]+]], w[[REG2]], #0x1
; CHECK: cmp [[REG3]], #0
; CHECK: b.eq LBB5_2
%a = and i64 %foo, 1
%b = trunc i64 %a to i1

View File

@ -23,9 +23,8 @@ entry:
; CHECK: ldr [[REG5:x[0-9]+]], {{\[}}[[REG2]]{{\]}}
; CHECK: mul [[REG6:x[0-9]+]], [[REG5]], [[REG4]]
; CHECK: add [[REG7:x[0-9]+]], [[REG6]], [[REG3]]
; CHECK: orr [[REG8:x[0-9]+]], xzr, #0xffff
; CHECK: and [[REG9:x[0-9]+]], [[REG7]], [[REG8]]
; CHECK: str [[REG9]], {{\[}}[[REG1]]{{\]}}
; CHECK: and [[REG8:x[0-9]+]], [[REG7]], #0xffff
; CHECK: str [[REG8]], {{\[}}[[REG1]]{{\]}}
; CHECK: ldr {{x[0-9]+}}, {{\[}}[[REG1]]{{\]}}
%0 = load i64* @seed, align 8
%mul = mul nsw i64 %0, 1309

View File

@ -0,0 +1,138 @@
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=1 -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
; AND
define i32 @and_rr_i32(i32 %a, i32 %b) {
; CHECK-LABEL: and_rr_i32
; CHECK: and w0, w0, w1
%1 = and i32 %a, %b
ret i32 %1
}
define i64 @and_rr_i64(i64 %a, i64 %b) {
; CHECK-LABEL: and_rr_i64
; CHECK: and x0, x0, x1
%1 = and i64 %a, %b
ret i64 %1
}
define i32 @and_ri_i32(i32 %a) {
; CHECK-LABEL: and_ri_i32
; CHECK: and w0, w0, #0xff
%1 = and i32 %a, 255
ret i32 %1
}
define i64 @and_ri_i64(i64 %a) {
; CHECK-LABEL: and_ri_i64
; CHECK: and x0, x0, #0xff
%1 = and i64 %a, 255
ret i64 %1
}
define i32 @and_rs_i32(i32 %a, i32 %b) {
; CHECK-LABEL: and_rs_i32
; CHECK: and w0, w0, w1, lsl #8
%1 = shl i32 %b, 8
%2 = and i32 %a, %1
ret i32 %2
}
define i64 @and_rs_i64(i64 %a, i64 %b) {
; CHECK-LABEL: and_rs_i64
; CHECK: and x0, x0, x1, lsl #8
%1 = shl i64 %b, 8
%2 = and i64 %a, %1
ret i64 %2
}
; OR
define i32 @or_rr_i32(i32 %a, i32 %b) {
; CHECK-LABEL: or_rr_i32
; CHECK: orr w0, w0, w1
%1 = or i32 %a, %b
ret i32 %1
}
define i64 @or_rr_i64(i64 %a, i64 %b) {
; CHECK-LABEL: or_rr_i64
; CHECK: orr x0, x0, x1
%1 = or i64 %a, %b
ret i64 %1
}
define i32 @or_ri_i32(i32 %a) {
; CHECK-LABEL: or_ri_i32
; CHECK: orr w0, w0, #0xff
%1 = or i32 %a, 255
ret i32 %1
}
define i64 @or_ri_i64(i64 %a) {
; CHECK-LABEL: or_ri_i64
; CHECK: orr x0, x0, #0xff
%1 = or i64 %a, 255
ret i64 %1
}
define i32 @or_rs_i32(i32 %a, i32 %b) {
; CHECK-LABEL: or_rs_i32
; CHECK: orr w0, w0, w1, lsl #8
%1 = shl i32 %b, 8
%2 = or i32 %a, %1
ret i32 %2
}
define i64 @or_rs_i64(i64 %a, i64 %b) {
; CHECK-LABEL: or_rs_i64
; CHECK: orr x0, x0, x1, lsl #8
%1 = shl i64 %b, 8
%2 = or i64 %a, %1
ret i64 %2
}
; XOR
define i32 @xor_rr_i32(i32 %a, i32 %b) {
; CHECK-LABEL: xor_rr_i32
; CHECK: eor w0, w0, w1
%1 = xor i32 %a, %b
ret i32 %1
}
define i64 @xor_rr_i64(i64 %a, i64 %b) {
; CHECK-LABEL: xor_rr_i64
; CHECK: eor x0, x0, x1
%1 = xor i64 %a, %b
ret i64 %1
}
define i32 @xor_ri_i32(i32 %a) {
; CHECK-LABEL: xor_ri_i32
; CHECK: eor w0, w0, #0xff
%1 = xor i32 %a, 255
ret i32 %1
}
define i64 @xor_ri_i64(i64 %a) {
; CHECK-LABEL: xor_ri_i64
; CHECK: eor x0, x0, #0xff
%1 = xor i64 %a, 255
ret i64 %1
}
define i32 @xor_rs_i32(i32 %a, i32 %b) {
; CHECK-LABEL: xor_rs_i32
; CHECK: eor w0, w0, w1, lsl #8
%1 = shl i32 %b, 8
%2 = xor i32 %a, %1
ret i32 %2
}
define i64 @xor_rs_i64(i64 %a, i64 %b) {
; CHECK-LABEL: xor_rs_i64
; CHECK: eor x0, x0, x1, lsl #8
%1 = shl i64 %b, 8
%2 = xor i64 %a, %1
ret i64 %2
}