mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-29 14:40:39 +00:00
Generalize ExtendUsesToFormExtLoad to be usable for ANY_EXTEND,
in addition to ZERO_EXTEND and SIGN_EXTEND. Fix a bug in the way it checked for live-out values, and simplify the way it find users by using SDNode::use_iterator's (relatively) new features. Also, make it slightly more permissive on targets with free truncates. In SelectionDAGBuild, avoid creating ANY_EXTEND nodes that are larger than necessary. If the target's SwitchAmountTy has enough bits, use it. This exposes the truncate to optimization early, enabling more optimizations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68670 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7d770be047
commit
57fc82d409
@ -2874,7 +2874,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
|
||||
}
|
||||
|
||||
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
|
||||
// "fold ({s|z}ext (load x)) -> ({s|z}ext (truncate ({s|z}extload x)))"
|
||||
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
|
||||
// transformation. Returns true if extension are possible and the above
|
||||
// mentioned transformation is profitable.
|
||||
static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
|
||||
@ -2889,8 +2889,10 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
|
||||
SDNode *User = *UI;
|
||||
if (User == N)
|
||||
continue;
|
||||
if (UI.getUse().getResNo() != N0.getResNo())
|
||||
continue;
|
||||
// FIXME: Only extend SETCC N, N and SETCC N, c for now.
|
||||
if (User->getOpcode() == ISD::SETCC) {
|
||||
if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
|
||||
if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
|
||||
// Sign bits will be lost after a zext.
|
||||
@ -2906,32 +2908,25 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
|
||||
}
|
||||
if (Add)
|
||||
ExtendNodes.push_back(User);
|
||||
} else {
|
||||
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
|
||||
SDValue UseOp = User->getOperand(i);
|
||||
if (UseOp == N0) {
|
||||
// If truncate from extended type to original load type is free
|
||||
// on this target, then it's ok to extend a CopyToReg.
|
||||
if (isTruncFree && User->getOpcode() == ISD::CopyToReg)
|
||||
HasCopyToRegUses = true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// If truncates aren't free and there are users we can't
|
||||
// extend, it isn't worthwhile.
|
||||
if (!isTruncFree)
|
||||
return false;
|
||||
// Remember if this value is live-out.
|
||||
if (User->getOpcode() == ISD::CopyToReg)
|
||||
HasCopyToRegUses = true;
|
||||
}
|
||||
|
||||
if (HasCopyToRegUses) {
|
||||
bool BothLiveOut = false;
|
||||
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
|
||||
UI != UE; ++UI) {
|
||||
SDNode *User = *UI;
|
||||
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
|
||||
SDValue UseOp = User->getOperand(i);
|
||||
if (UseOp.getNode() == N && UseOp.getResNo() == 0) {
|
||||
BothLiveOut = true;
|
||||
break;
|
||||
}
|
||||
SDUse &Use = UI.getUse();
|
||||
if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
|
||||
BothLiveOut = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (BothLiveOut)
|
||||
@ -3013,8 +3008,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
|
||||
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
|
||||
if (DoXform) {
|
||||
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
||||
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(),
|
||||
VT, LN0->getChain(),
|
||||
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
|
||||
LN0->getChain(),
|
||||
LN0->getBasePtr(), LN0->getSrcValue(),
|
||||
LN0->getSrcValueOffset(),
|
||||
N0.getValueType(),
|
||||
@ -3034,8 +3029,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
|
||||
if (SOp == Trunc)
|
||||
Ops.push_back(ExtLoad);
|
||||
else
|
||||
Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
|
||||
VT, SOp));
|
||||
Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
|
||||
N->getDebugLoc(), VT, SOp));
|
||||
}
|
||||
|
||||
Ops.push_back(SetCC->getOperand(2));
|
||||
@ -3278,26 +3273,48 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
|
||||
}
|
||||
|
||||
// fold (aext (load x)) -> (aext (truncate (extload x)))
|
||||
if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
|
||||
if (ISD::isNON_EXTLoad(N0.getNode()) &&
|
||||
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
|
||||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
|
||||
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
||||
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
|
||||
LN0->getChain(),
|
||||
LN0->getBasePtr(), LN0->getSrcValue(),
|
||||
LN0->getSrcValueOffset(),
|
||||
N0.getValueType(),
|
||||
LN0->isVolatile(), LN0->getAlignment());
|
||||
CombineTo(N, ExtLoad);
|
||||
// Redirect any chain users to the new load.
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1),
|
||||
SDValue(ExtLoad.getNode(), 1));
|
||||
// If any node needs the original loaded value, recompute it.
|
||||
if (!LN0->use_empty())
|
||||
CombineTo(LN0, DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
|
||||
N0.getValueType(), ExtLoad),
|
||||
ExtLoad.getValue(1));
|
||||
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
||||
bool DoXform = true;
|
||||
SmallVector<SDNode*, 4> SetCCs;
|
||||
if (!N0.hasOneUse())
|
||||
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
|
||||
if (DoXform) {
|
||||
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
||||
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
|
||||
LN0->getChain(),
|
||||
LN0->getBasePtr(), LN0->getSrcValue(),
|
||||
LN0->getSrcValueOffset(),
|
||||
N0.getValueType(),
|
||||
LN0->isVolatile(), LN0->getAlignment());
|
||||
CombineTo(N, ExtLoad);
|
||||
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
|
||||
N0.getValueType(), ExtLoad);
|
||||
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
|
||||
|
||||
// Extend SetCC uses if necessary.
|
||||
for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
|
||||
SDNode *SetCC = SetCCs[i];
|
||||
SmallVector<SDValue, 4> Ops;
|
||||
|
||||
for (unsigned j = 0; j != 2; ++j) {
|
||||
SDValue SOp = SetCC->getOperand(j);
|
||||
if (SOp == Trunc)
|
||||
Ops.push_back(ExtLoad);
|
||||
else
|
||||
Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
|
||||
N->getDebugLoc(), VT, SOp));
|
||||
}
|
||||
|
||||
Ops.push_back(SetCC->getOperand(2));
|
||||
CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
|
||||
SetCC->getValueType(0),
|
||||
&Ops[0], Ops.size()));
|
||||
}
|
||||
|
||||
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
||||
}
|
||||
}
|
||||
|
||||
// fold (aext (zextload x)) -> (aext (truncate (zextload x)))
|
||||
|
@ -2190,8 +2190,24 @@ void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
|
||||
void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
|
||||
SDValue Op1 = getValue(I.getOperand(0));
|
||||
SDValue Op2 = getValue(I.getOperand(1));
|
||||
if (!isa<VectorType>(I.getType())) {
|
||||
if (TLI.getPointerTy().bitsLT(Op2.getValueType()))
|
||||
if (!isa<VectorType>(I.getType()) &&
|
||||
Op2.getValueType() != TLI.getShiftAmountTy()) {
|
||||
// If the operand is smaller than the shift count type, promote it.
|
||||
if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType()))
|
||||
Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
|
||||
TLI.getShiftAmountTy(), Op2);
|
||||
// If the operand is larger than the shift count type but the shift
|
||||
// count type has enough bits to represent any shift value, truncate
|
||||
// it now. This is a common case and it exposes the truncate to
|
||||
// optimization early.
|
||||
else if (TLI.getShiftAmountTy().getSizeInBits() >=
|
||||
Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
|
||||
Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
|
||||
TLI.getShiftAmountTy(), Op2);
|
||||
// Otherwise we'll need to temporarily settle for some other
|
||||
// convenient type; type legalization will make adjustments as
|
||||
// needed.
|
||||
else if (TLI.getPointerTy().bitsLT(Op2.getValueType()))
|
||||
Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
|
||||
TLI.getPointerTy(), Op2);
|
||||
else if (TLI.getPointerTy().bitsGT(Op2.getValueType()))
|
||||
|
@ -1,40 +0,0 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 | grep movw | not grep %e.x
|
||||
; PR2681
|
||||
|
||||
@g_491 = external global i32 ; <i32*> [#uses=1]
|
||||
@g_897 = external global i16 ; <i16*> [#uses=1]
|
||||
|
||||
define i32 @func_7(i16 signext %p_9) nounwind {
|
||||
entry:
|
||||
%p_9.addr = alloca i16 ; <i16*> [#uses=2]
|
||||
%l_1122 = alloca i16, align 2 ; <i16*> [#uses=1]
|
||||
%l_1128 = alloca i32, align 4 ; <i32*> [#uses=1]
|
||||
%l_1129 = alloca i32, align 4 ; <i32*> [#uses=1]
|
||||
%l_1130 = alloca i32, align 4 ; <i32*> [#uses=1]
|
||||
%tmp14 = load i16* %l_1122 ; <i16> [#uses=1]
|
||||
%conv15 = sext i16 %tmp14 to i32 ; <i32> [#uses=1]
|
||||
%tmp16 = load i16* %p_9.addr ; <i16> [#uses=1]
|
||||
%conv17 = sext i16 %tmp16 to i32 ; <i32> [#uses=1]
|
||||
%xor = xor i32 %conv15, %conv17 ; <i32> [#uses=1]
|
||||
%tmp18 = load i32* null ; <i32> [#uses=1]
|
||||
%or = or i32 %xor, %tmp18 ; <i32> [#uses=1]
|
||||
%conv19 = trunc i32 %or to i16 ; <i16> [#uses=1]
|
||||
%tmp28 = load i16* %p_9.addr ; <i16> [#uses=1]
|
||||
%tmp33 = load i16* @g_897 ; <i16> [#uses=1]
|
||||
%tmp34 = load i32* @g_491 ; <i32> [#uses=1]
|
||||
%conv35 = trunc i32 %tmp34 to i16 ; <i16> [#uses=1]
|
||||
%tmp36 = load i16* null ; <i16> [#uses=1]
|
||||
%conv37 = trunc i16 %tmp36 to i8 ; <i8> [#uses=1]
|
||||
%tmp38 = load i32* %l_1128 ; <i32> [#uses=1]
|
||||
%conv39 = sext i32 %tmp38 to i64 ; <i64> [#uses=1]
|
||||
%tmp42 = load i32* %l_1129 ; <i32> [#uses=1]
|
||||
%conv43 = trunc i32 %tmp42 to i16 ; <i16> [#uses=1]
|
||||
%tmp44 = load i32* %l_1130 ; <i32> [#uses=1]
|
||||
%conv45 = sext i32 %tmp44 to i64 ; <i64> [#uses=1]
|
||||
%call46 = call i32 @func_18( i16 zeroext 0, i16 zeroext 0, i16 zeroext %tmp33, i16 zeroext %conv35, i8 zeroext %conv37, i64 %conv39, i32 0, i16 zeroext %conv43, i64 %conv45, i8 zeroext 1 ) ; <i32> [#uses=0]
|
||||
%call48 = call i32 @func_18( i16 zeroext 0, i16 zeroext 0, i16 zeroext 0, i16 zeroext 1, i8 zeroext 0, i64 0, i32 1, i16 zeroext %tmp28, i64 0, i8 zeroext 1 ) ; <i32> [#uses=0]
|
||||
%call50 = call i32 @func_18( i16 zeroext 1, i16 zeroext 0, i16 zeroext 0, i16 zeroext 1, i8 zeroext 0, i64 0, i32 1, i16 zeroext %conv19, i64 0, i8 zeroext 1 ) ; <i32> [#uses=0]
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
declare i32 @func_18(i16 zeroext, i16 zeroext, i16 zeroext, i16 zeroext, i8 zeroext, i64, i32, i16 zeroext, i64, i8 zeroext)
|
47
test/CodeGen/X86/anyext-uses.ll
Normal file
47
test/CodeGen/X86/anyext-uses.ll
Normal file
@ -0,0 +1,47 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 > %t
|
||||
; RUN: grep mov %t | count 8
|
||||
; RUN: not grep implicit %t
|
||||
|
||||
; Avoid partial register updates; don't define an i8 register and read
|
||||
; the i32 super-register.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-apple-darwin9.6"
|
||||
%struct.RC4_KEY = type { i8, i8, [256 x i8] }
|
||||
|
||||
define void @foo(%struct.RC4_KEY* nocapture %key, i64 %len, i8* %indata, i8* %outdata) nounwind {
|
||||
entry:
|
||||
br label %bb24
|
||||
|
||||
bb24: ; preds = %bb24, %entry
|
||||
%0 = load i8* null, align 1 ; <i8> [#uses=1]
|
||||
%1 = zext i8 %0 to i64 ; <i64> [#uses=1]
|
||||
%2 = shl i64 %1, 32 ; <i64> [#uses=1]
|
||||
%3 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; <i8*> [#uses=1]
|
||||
%4 = load i8* %3, align 1 ; <i8> [#uses=2]
|
||||
%5 = add i8 %4, 0 ; <i8> [#uses=2]
|
||||
%6 = zext i8 %5 to i64 ; <i64> [#uses=0]
|
||||
%7 = load i8* null, align 1 ; <i8> [#uses=1]
|
||||
%8 = zext i8 %4 to i32 ; <i32> [#uses=1]
|
||||
%9 = zext i8 %7 to i32 ; <i32> [#uses=1]
|
||||
%10 = add i32 %9, %8 ; <i32> [#uses=1]
|
||||
%11 = and i32 %10, 255 ; <i32> [#uses=1]
|
||||
%12 = zext i32 %11 to i64 ; <i64> [#uses=1]
|
||||
%13 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %12 ; <i8*> [#uses=1]
|
||||
%14 = load i8* %13, align 1 ; <i8> [#uses=1]
|
||||
%15 = zext i8 %14 to i64 ; <i64> [#uses=1]
|
||||
%16 = shl i64 %15, 48 ; <i64> [#uses=1]
|
||||
%17 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; <i8*> [#uses=1]
|
||||
%18 = load i8* %17, align 1 ; <i8> [#uses=2]
|
||||
%19 = add i8 %18, %5 ; <i8> [#uses=1]
|
||||
%20 = zext i8 %19 to i64 ; <i64> [#uses=1]
|
||||
%21 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %20 ; <i8*> [#uses=1]
|
||||
store i8 %18, i8* %21, align 1
|
||||
%22 = or i64 0, %2 ; <i64> [#uses=1]
|
||||
%23 = or i64 %22, 0 ; <i64> [#uses=1]
|
||||
%24 = or i64 %23, %16 ; <i64> [#uses=1]
|
||||
%25 = or i64 %24, 0 ; <i64> [#uses=1]
|
||||
%26 = xor i64 %25, 0 ; <i64> [#uses=1]
|
||||
store i64 %26, i64* null, align 8
|
||||
br label %bb24
|
||||
}
|
34
test/CodeGen/X86/switch-zextload.ll
Normal file
34
test/CodeGen/X86/switch-zextload.ll
Normal file
@ -0,0 +1,34 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
|
||||
|
||||
; Do zextload, instead of a load and a separate zext.
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||
target triple = "i386-apple-darwin9.6"
|
||||
%struct.move_s = type { i32, i32, i32, i32, i32, i32 }
|
||||
%struct.node_t = type { i8, i8, i8, i8, i32, i32, %struct.node_t**, %struct.node_t*, %struct.move_s }
|
||||
|
||||
define fastcc void @set_proof_and_disproof_numbers(%struct.node_t* nocapture %node) nounwind {
|
||||
entry:
|
||||
%0 = load i8* null, align 1 ; <i8> [#uses=1]
|
||||
switch i8 %0, label %return [
|
||||
i8 2, label %bb31
|
||||
i8 0, label %bb80
|
||||
i8 1, label %bb82
|
||||
i8 3, label %bb84
|
||||
]
|
||||
|
||||
bb31: ; preds = %entry
|
||||
unreachable
|
||||
|
||||
bb80: ; preds = %entry
|
||||
ret void
|
||||
|
||||
bb82: ; preds = %entry
|
||||
ret void
|
||||
|
||||
bb84: ; preds = %entry
|
||||
ret void
|
||||
|
||||
return: ; preds = %entry
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user