Fix (bitcast (fabs x)), (bitcast (fneg x)) and (bitcast (fcopysign cst,

x)) combines for ppc_fp128, since signbit computation is more
complicated.

Discussion thread:
http://lists.llvm.org/pipermail/llvm-dev/2015-November/092863.html

Patch by Tim Shen!

llvm-svn: 255305
This commit is contained in:
Eric Christopher 2015-12-10 22:09:06 +00:00
parent 486717bbd5
commit 971f116a1c
2 changed files with 171 additions and 0 deletions

View File

@ -7244,6 +7244,12 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
return SDValue();
}
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
// On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
// and Lo parts; on big-endian machines it doesn't.
return DAG.getDataLayout().isBigEndian() ? 1 : 0;
}
SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@ -7310,6 +7316,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
//
// For ppc_fp128:
// fold (bitcast (fneg x)) ->
// flipbit = signbit
// (xor (bitcast x) (build_pair flipbit, flipbit))
// fold (bitcast (fabs x)) ->
// flipbit = (and (extract_element (bitcast x), 0), signbit)
// (xor (bitcast x) (build_pair flipbit, flipbit))
// This often reduces constant pool loads.
if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
@ -7320,6 +7334,29 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(NewConv.getNode());
SDLoc DL(N);
if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
assert(VT.getSizeInBits() == 128);
SDValue SignBit = DAG.getConstant(
APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
SDValue FlipBit;
if (N0.getOpcode() == ISD::FNEG) {
FlipBit = SignBit;
AddToWorklist(FlipBit.getNode());
} else {
assert(N0.getOpcode() == ISD::FABS);
SDValue Hi =
DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
SDLoc(NewConv)));
AddToWorklist(Hi.getNode());
FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
AddToWorklist(FlipBit.getNode());
}
SDValue FlipBits =
DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
AddToWorklist(FlipBits.getNode());
return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
}
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, DL, VT,
@ -7333,6 +7370,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// (or (and (bitconvert x), sign), (and cst, (not sign)))
// Note that we don't handle (copysign x, cst) because this can always be
// folded to an fneg or fabs.
//
// For ppc_fp128:
// fold (bitcast (fcopysign cst, x)) ->
// flipbit = (and (extract_element
// (xor (bitcast cst), (bitcast x)), 0),
// signbit)
// (xor (bitcast cst) (build_pair flipbit, flipbit))
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(0)) &&
VT.isInteger() && !VT.isVector()) {
@ -7361,6 +7405,30 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(X.getNode());
}
if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT,
N0.getOperand(0));
AddToWorklist(Cst.getNode());
SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT,
N0.getOperand(1));
AddToWorklist(X.getNode());
SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
AddToWorklist(XorResult.getNode());
SDValue XorResult64 = DAG.getNode(
ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
SDLoc(XorResult)));
AddToWorklist(XorResult64.getNode());
SDValue FlipBit =
DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
AddToWorklist(FlipBit.getNode());
SDValue FlipBits =
DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
AddToWorklist(FlipBits.getNode());
return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
}
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
X, DAG.getConstant(SignBit, SDLoc(X), VT));

View File

@ -0,0 +1,103 @@
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PPC64
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PPC64
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PPC64
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PPC64
; RUN: llc -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32
define i128 @test_abs(ppc_fp128 %x) nounwind {
entry:
; PPC64-LABEL: test_abs:
; PPC64-DAG: stxsdx 2, 0, [[ADDR_HI:[0-9]+]]
; PPC64-DAG: stxsdx 1, 0, [[ADDR_LO:[0-9]+]]
; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]]
; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]]
; PPC64-DAG: li [[MASK_REG:[0-9]+]], 1
; PPC64: sldi [[MASK_REG]], [[MASK_REG]], 63
; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]])
; PPC64-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]])
; PPC64: and [[FLIP_BIT:[0-9]+]], [[HI]], [[MASK_REG]]
; PPC64-DAG: xor 3, [[HI]], [[FLIP_BIT]]
; PPC64-DAG: xor 4, [[LO]], [[FLIP_BIT]]
; PPC64: blr
; PPC32-DAG: stfd 1, 24(1)
; PPC32-DAG: stfd 2, 16(1)
; PPC32: nop
; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1)
; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1)
; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1)
; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0
; PPC32-DAG: xor [[HI0]], [[HI0]], [[FLIP_BIT]]
; PPC32-DAG: xor [[LO0]], [[LO0]], [[FLIP_BIT]]
; PPC32: blr
%0 = tail call ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 %x)
%1 = bitcast ppc_fp128 %0 to i128
ret i128 %1
}
define i128 @test_neg(ppc_fp128 %x) nounwind {
entry:
; PPC64-LABEL: test_neg:
; PPC64-DAG: stxsdx 2, 0, [[ADDR_HI:[0-9]+]]
; PPC64-DAG: stxsdx 1, 0, [[ADDR_LO:[0-9]+]]
; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]]
; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]]
; PPC64-DAG: li [[FLIP_BIT:[0-9]+]], 1
; PPC64-DAG: sldi [[FLIP_BIT]], [[FLIP_BIT]], 63
; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]])
; PPC64-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]])
; PPC64-NOT: BARRIER
; PPC64-DAG: xor 3, [[HI]], [[FLIP_BIT]]
; PPC64-DAG: xor 4, [[LO]], [[FLIP_BIT]]
; PPC64: blr
; PPC32-DAG: stfd 1, 24(1)
; PPC32-DAG: stfd 2, 16(1)
; PPC32: nop
; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1)
; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1)
; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1)
; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
; PPC32-NOT: BARRIER
; PPC32-DAG: xoris [[HI0]], [[HI0]], 32768
; PPC32-DAG: xoris [[LO0]], [[LO0]], 32768
; PPC32: blr
%0 = fsub ppc_fp128 0xM80000000000000000000000000000000, %x
%1 = bitcast ppc_fp128 %0 to i128
ret i128 %1
}
define i128 @test_copysign(ppc_fp128 %x) nounwind {
entry:
; PPC64-LABEL: test_copysign:
; PPC64-DAG: stxsdx 1, 0, [[ADDR_REG:[0-9]+]]
; PPC64-DAG: addi [[ADDR_REG]], 1, [[OFFSET:-?[0-9]+]]
; PPC64-DAG: li [[SIGN:[0-9]+]], 1
; PPC64-DAG: sldi [[SIGN]], [[SIGN]], 63
; PPC64-DAG: li [[HI_TMP:[0-9]+]], 16399
; PPC64-DAG: sldi [[CST_HI:[0-9]+]], [[HI_TMP]], 48
; PPC64-DAG: li [[LO_TMP:[0-9]+]], 3019
; PPC64-DAG: sldi [[CST_LO:[0-9]+]], [[LO_TMP]], 52
; PPC64-NOT: BARRIER
; PPC64-DAG: ld [[X_HI:[0-9]+]], [[OFFSET]](1)
; PPC64-DAG: and [[NEW_HI_TMP:[0-9]+]], [[X_HI]], [[SIGN]]
; PPC64-DAG: or 3, [[NEW_HI_TMP]], [[CST_HI]]
; PPC64-DAG: xor 4, [[SIGN]], [[CST_LO]]
; PPC64: blr
; PPC32: stfd 1, [[STACK:[0-9]+]](1)
; PPC32: nop
; PPC32: lwz [[HI:[0-9]+]], [[STACK]](1)
; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI]], 0, 0, 0
; PPC32-NOT: BARRIER
; PPC32-DAG: oris {{[0-9]+}}, [[FLIP_BIT]], 16399
; PPC32-DAG: xoris {{[0-9]+}}, [[FLIP_BIT]], 48304
; PPC32: blr
%0 = tail call ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128 0xMBCB0000000000000400F000000000000, ppc_fp128 %x)
%1 = bitcast ppc_fp128 %0 to i128
ret i128 %1
}
declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128)
declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128, ppc_fp128)