2005-09-01 23:24:04 +00:00
|
|
|
//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
|
2005-09-01 00:19:25 +00:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-29 20:36:04 +00:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2005-09-01 00:19:25 +00:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
|
|
|
|
// both before and after the DAG is legalized.
|
2009-02-17 22:15:04 +00:00
|
|
|
//
|
2009-04-25 17:09:45 +00:00
|
|
|
// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
|
|
|
|
// primarily intended to handle simplification opportunities that are implicit
|
|
|
|
// in the LLVM IR and exposed by the various codegen lowering phases.
|
|
|
|
//
|
2005-09-01 00:19:25 +00:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "dagcombine"
|
|
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
2009-03-11 05:08:08 +00:00
|
|
|
#include "llvm/DerivedTypes.h"
|
2009-07-15 21:51:10 +00:00
|
|
|
#include "llvm/LLVMContext.h"
|
2008-01-25 07:20:16 +00:00
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
2009-03-11 05:08:08 +00:00
|
|
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
2007-05-16 06:37:59 +00:00
|
|
|
#include "llvm/Analysis/AliasAnalysis.h"
|
2007-05-07 21:27:48 +00:00
|
|
|
#include "llvm/Target/TargetData.h"
|
Infer alignment of loads and increase their alignment when we can tell they are
from the stack. This allows us to compile stack-align.ll to:
_test:
movsd LCPI1_0, %xmm0
movapd %xmm0, %xmm1
*** andpd 4(%esp), %xmm1
andpd _G, %xmm0
addsd %xmm1, %xmm0
movl 20(%esp), %eax
movsd %xmm0, (%eax)
ret
instead of:
_test:
movsd LCPI1_0, %xmm0
** movsd 4(%esp), %xmm1
** andpd %xmm0, %xmm1
andpd _G, %xmm0
addsd %xmm1, %xmm0
movl 20(%esp), %eax
movsd %xmm0, (%eax)
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46401 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-26 19:45:50 +00:00
|
|
|
#include "llvm/Target/TargetFrameInfo.h"
|
2005-09-01 00:19:25 +00:00
|
|
|
#include "llvm/Target/TargetLowering.h"
|
2007-05-07 21:27:48 +00:00
|
|
|
#include "llvm/Target/TargetMachine.h"
|
2007-01-08 23:04:05 +00:00
|
|
|
#include "llvm/Target/TargetOptions.h"
|
2007-05-16 06:37:59 +00:00
|
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
|
|
#include "llvm/ADT/Statistic.h"
|
2006-09-21 16:28:59 +00:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2007-05-16 06:37:59 +00:00
|
|
|
#include "llvm/Support/Debug.h"
|
2009-07-11 13:10:19 +00:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2007-05-16 06:37:59 +00:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2009-08-23 06:35:02 +00:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2005-09-09 23:53:39 +00:00
|
|
|
#include <algorithm>
|
2005-09-01 00:19:25 +00:00
|
|
|
using namespace llvm;
|
|
|
|
|
2006-12-19 22:41:21 +00:00
|
|
|
STATISTIC(NodesCombined , "Number of dag nodes combined");
|
|
|
|
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
|
|
|
|
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
|
2009-05-28 00:35:15 +00:00
|
|
|
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
|
2006-12-19 22:41:21 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
namespace {
|
2006-10-07 23:37:56 +00:00
|
|
|
static cl::opt<bool>
|
|
|
|
CombinerAA("combiner-alias-analysis", cl::Hidden,
|
2006-10-17 19:33:52 +00:00
|
|
|
cl::desc("Turn on alias analysis during testing"));
|
2006-10-12 15:22:24 +00:00
|
|
|
|
2006-10-18 19:08:31 +00:00
|
|
|
static cl::opt<bool>
|
|
|
|
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
|
|
|
|
cl::desc("Include global information in alias analysis"));
|
|
|
|
|
2006-10-05 15:07:25 +00:00
|
|
|
//------------------------------ DAGCombiner ---------------------------------//
|
|
|
|
|
2009-10-25 06:33:48 +00:00
|
|
|
class DAGCombiner {
|
2005-09-01 00:19:25 +00:00
|
|
|
SelectionDAG &DAG;
|
2009-01-15 19:20:50 +00:00
|
|
|
const TargetLowering &TLI;
|
2008-11-24 14:53:14 +00:00
|
|
|
CombineLevel Level;
|
2009-04-29 23:29:43 +00:00
|
|
|
CodeGenOpt::Level OptLevel;
|
2008-11-24 14:53:14 +00:00
|
|
|
bool LegalOperations;
|
|
|
|
bool LegalTypes;
|
2005-09-01 00:19:25 +00:00
|
|
|
|
|
|
|
// Worklist of all of the nodes that need to be simplified.
|
2008-08-29 22:21:44 +00:00
|
|
|
std::vector<SDNode*> WorkList;
|
2005-09-01 00:19:25 +00:00
|
|
|
|
2006-10-16 20:52:31 +00:00
|
|
|
// AA - Used for DAG load/store alias analysis.
|
|
|
|
AliasAnalysis &AA;
|
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
/// AddUsersToWorkList - When an instruction is simplified, add all users of
|
|
|
|
/// the instruction to the work lists because they might get more simplified
|
|
|
|
/// now.
|
|
|
|
///
|
|
|
|
void AddUsersToWorkList(SDNode *N) {
|
|
|
|
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
|
2005-09-01 23:24:04 +00:00
|
|
|
UI != UE; ++UI)
|
2008-07-27 20:43:25 +00:00
|
|
|
AddToWorkList(*UI);
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2007-10-08 17:57:15 +00:00
|
|
|
/// visit - call the node-specific routine that knows how to fold each
|
|
|
|
/// particular type of node.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue visit(SDNode *N);
|
2007-10-08 17:57:15 +00:00
|
|
|
|
2006-03-01 04:53:38 +00:00
|
|
|
public:
|
2006-10-04 16:53:27 +00:00
|
|
|
/// AddToWorkList - Add to the work list making sure it's instance is at the
|
|
|
|
/// the back (next to be processed.)
|
2006-03-01 04:03:14 +00:00
|
|
|
void AddToWorkList(SDNode *N) {
|
2006-10-04 16:53:27 +00:00
|
|
|
removeFromWorkList(N);
|
2006-03-01 04:03:14 +00:00
|
|
|
WorkList.push_back(N);
|
|
|
|
}
|
2006-10-04 16:53:27 +00:00
|
|
|
|
2008-02-03 06:49:24 +00:00
|
|
|
/// removeFromWorkList - remove all instances of N from the worklist.
|
|
|
|
///
|
|
|
|
void removeFromWorkList(SDNode *N) {
|
|
|
|
WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
|
|
|
|
WorkList.end());
|
Add support for CombineTo, allowing the dag combiner to replace nodes with
multiple results.
Use this support to implement trivial store->load forwarding, implementing
CodeGen/PowerPC/store-load-fwd.ll. Though this is the most simple case and
can be extended in the future, it is still useful. For example, it speeds
up 197.parser by 6.2% by avoiding an LSU reject in xalloc:
stw r6, lo16(l5_end_of_array)(r2)
addi r2, r5, -4
stwx r5, r4, r2
- lwzx r5, r4, r2
- rlwinm r5, r5, 0, 0, 30
stwx r5, r4, r2
lwz r2, -4(r4)
ori r2, r2, 1
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23690 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-10 22:04:48 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
|
Optimize some 64-bit multiplication by constants into two lea's or one lea + shl since imulq is slow (latency 5). e.g.
x * 40
=>
shlq $3, %rdi
leaq (%rdi,%rdi,4), %rax
This has the added benefit of allowing more multiply to be folded into addressing mode. e.g.
a * 24 + b
=>
leaq (%rdi,%rdi,2), %rax
leaq (%rsi,%rax,8), %rax
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67917 91177308-0d34-0410-b5e6-96231b3b80d8
2009-03-28 05:57:29 +00:00
|
|
|
bool AddTo = true);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
|
2006-10-13 23:32:28 +00:00
|
|
|
return CombineTo(N, &Res, 1, AddTo);
|
2006-03-01 04:53:38 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
|
Optimize some 64-bit multiplication by constants into two lea's or one lea + shl since imulq is slow (latency 5). e.g.
x * 40
=>
shlq $3, %rdi
leaq (%rdi,%rdi,4), %rax
This has the added benefit of allowing more multiply to be folded into addressing mode. e.g.
a * 24 + b
=>
leaq (%rdi,%rdi,2), %rax
leaq (%rsi,%rax,8), %rax
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67917 91177308-0d34-0410-b5e6-96231b3b80d8
2009-03-28 05:57:29 +00:00
|
|
|
bool AddTo = true) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue To[] = { Res0, Res1 };
|
2006-10-13 23:32:28 +00:00
|
|
|
return CombineTo(N, To, 2, AddTo);
|
2006-03-01 04:53:38 +00:00
|
|
|
}
|
2009-01-29 01:59:02 +00:00
|
|
|
|
|
|
|
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
|
2006-02-17 21:58:01 +00:00
|
|
|
/// SimplifyDemandedBits - Check the specified integer node value to see if
|
2006-03-01 19:55:35 +00:00
|
|
|
/// it can be simplified or if things it uses can be simplified by bit
|
2006-02-17 21:58:01 +00:00
|
|
|
/// propagation. If so, return true.
|
2008-07-27 21:46:04 +00:00
|
|
|
bool SimplifyDemandedBits(SDValue Op) {
|
2009-12-11 21:31:27 +00:00
|
|
|
unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
|
|
|
|
APInt Demanded = APInt::getAllOnesValue(BitWidth);
|
2008-02-27 00:25:32 +00:00
|
|
|
return SimplifyDemandedBits(Op, Demanded);
|
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
|
2005-10-10 22:31:19 +00:00
|
|
|
|
2006-11-11 00:39:41 +00:00
|
|
|
bool CombineToPreIndexedLoadStore(SDNode *N);
|
|
|
|
bool CombineToPostIndexedLoadStore(SDNode *N);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
|
|
|
|
2007-10-08 17:57:15 +00:00
|
|
|
/// combine - call the node-specific routine that knows how to fold each
|
|
|
|
/// particular type of node. If that doesn't do anything, try the
|
|
|
|
/// target-specific DAG combines.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue combine(SDNode *N);
|
2005-09-01 00:19:25 +00:00
|
|
|
|
|
|
|
// Visitation implementation - Implement dag node combining for different
|
|
|
|
// node types. The semantics are as follows:
|
|
|
|
// Return Value:
|
2008-08-29 22:21:44 +00:00
|
|
|
// SDValue.getNode() == 0 - No change was made
|
|
|
|
// SDValue.getNode() == N - N was replaced, is dead and has been handled.
|
|
|
|
// otherwise - N should be replaced by the returned Operand.
|
2005-09-01 00:19:25 +00:00
|
|
|
//
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue visitTokenFactor(SDNode *N);
|
|
|
|
SDValue visitMERGE_VALUES(SDNode *N);
|
|
|
|
SDValue visitADD(SDNode *N);
|
|
|
|
SDValue visitSUB(SDNode *N);
|
|
|
|
SDValue visitADDC(SDNode *N);
|
|
|
|
SDValue visitADDE(SDNode *N);
|
|
|
|
SDValue visitMUL(SDNode *N);
|
|
|
|
SDValue visitSDIV(SDNode *N);
|
|
|
|
SDValue visitUDIV(SDNode *N);
|
|
|
|
SDValue visitSREM(SDNode *N);
|
|
|
|
SDValue visitUREM(SDNode *N);
|
|
|
|
SDValue visitMULHU(SDNode *N);
|
|
|
|
SDValue visitMULHS(SDNode *N);
|
|
|
|
SDValue visitSMUL_LOHI(SDNode *N);
|
|
|
|
SDValue visitUMUL_LOHI(SDNode *N);
|
|
|
|
SDValue visitSDIVREM(SDNode *N);
|
|
|
|
SDValue visitUDIVREM(SDNode *N);
|
|
|
|
SDValue visitAND(SDNode *N);
|
|
|
|
SDValue visitOR(SDNode *N);
|
|
|
|
SDValue visitXOR(SDNode *N);
|
|
|
|
SDValue SimplifyVBinOp(SDNode *N);
|
|
|
|
SDValue visitSHL(SDNode *N);
|
|
|
|
SDValue visitSRA(SDNode *N);
|
|
|
|
SDValue visitSRL(SDNode *N);
|
|
|
|
SDValue visitCTLZ(SDNode *N);
|
|
|
|
SDValue visitCTTZ(SDNode *N);
|
|
|
|
SDValue visitCTPOP(SDNode *N);
|
|
|
|
SDValue visitSELECT(SDNode *N);
|
|
|
|
SDValue visitSELECT_CC(SDNode *N);
|
|
|
|
SDValue visitSETCC(SDNode *N);
|
|
|
|
SDValue visitSIGN_EXTEND(SDNode *N);
|
|
|
|
SDValue visitZERO_EXTEND(SDNode *N);
|
|
|
|
SDValue visitANY_EXTEND(SDNode *N);
|
|
|
|
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
|
|
|
|
SDValue visitTRUNCATE(SDNode *N);
|
|
|
|
SDValue visitBIT_CONVERT(SDNode *N);
|
|
|
|
SDValue visitBUILD_PAIR(SDNode *N);
|
|
|
|
SDValue visitFADD(SDNode *N);
|
|
|
|
SDValue visitFSUB(SDNode *N);
|
|
|
|
SDValue visitFMUL(SDNode *N);
|
|
|
|
SDValue visitFDIV(SDNode *N);
|
|
|
|
SDValue visitFREM(SDNode *N);
|
|
|
|
SDValue visitFCOPYSIGN(SDNode *N);
|
|
|
|
SDValue visitSINT_TO_FP(SDNode *N);
|
|
|
|
SDValue visitUINT_TO_FP(SDNode *N);
|
|
|
|
SDValue visitFP_TO_SINT(SDNode *N);
|
|
|
|
SDValue visitFP_TO_UINT(SDNode *N);
|
|
|
|
SDValue visitFP_ROUND(SDNode *N);
|
|
|
|
SDValue visitFP_ROUND_INREG(SDNode *N);
|
|
|
|
SDValue visitFP_EXTEND(SDNode *N);
|
|
|
|
SDValue visitFNEG(SDNode *N);
|
|
|
|
SDValue visitFABS(SDNode *N);
|
|
|
|
SDValue visitBRCOND(SDNode *N);
|
|
|
|
SDValue visitBR_CC(SDNode *N);
|
|
|
|
SDValue visitLOAD(SDNode *N);
|
|
|
|
SDValue visitSTORE(SDNode *N);
|
|
|
|
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
|
|
|
|
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
|
|
|
|
SDValue visitBUILD_VECTOR(SDNode *N);
|
|
|
|
SDValue visitCONCAT_VECTORS(SDNode *N);
|
|
|
|
SDValue visitVECTOR_SHUFFLE(SDNode *N);
|
|
|
|
|
|
|
|
SDValue XformToShuffleWithZero(SDNode *N);
|
2009-01-30 00:45:56 +00:00
|
|
|
SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
|
|
|
|
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
|
2009-01-30 23:59:18 +00:00
|
|
|
SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
|
2009-02-17 22:15:04 +00:00
|
|
|
SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
|
|
|
|
SDValue N3, ISD::CondCode CC,
|
2009-01-30 23:59:18 +00:00
|
|
|
bool NotExtCompare = false);
|
2009-08-10 22:56:29 +00:00
|
|
|
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
|
2009-02-03 00:47:48 +00:00
|
|
|
DebugLoc DL, bool foldBooleans = true);
|
2009-02-17 22:15:04 +00:00
|
|
|
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
|
2008-01-26 01:09:19 +00:00
|
|
|
unsigned HiOp);
|
2009-08-10 22:56:29 +00:00
|
|
|
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
|
|
|
|
SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue BuildSDIV(SDNode *N);
|
|
|
|
SDValue BuildUDIV(SDNode *N);
|
2009-01-30 21:14:50 +00:00
|
|
|
SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue ReduceLoadWidth(SDNode *N);
|
2009-05-28 00:35:15 +00:00
|
|
|
SDValue ReduceLoadOpStoreWidth(SDNode *N);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
|
|
|
|
/// looking for aliasing nodes and adding them to the Aliases vector.
|
2008-07-27 21:46:04 +00:00
|
|
|
void GatherAllAliases(SDNode *N, SDValue OriginalChain,
|
|
|
|
SmallVector<SDValue, 8> &Aliases);
|
2006-10-04 16:53:27 +00:00
|
|
|
|
2006-10-18 12:29:57 +00:00
|
|
|
/// isAlias - Return true if there is any possibility that the two addresses
|
|
|
|
/// overlap.
|
2008-07-27 21:46:04 +00:00
|
|
|
bool isAlias(SDValue Ptr1, int64_t Size1,
|
2006-10-18 12:29:57 +00:00
|
|
|
const Value *SrcValue1, int SrcValueOffset1,
|
2009-09-15 00:18:30 +00:00
|
|
|
unsigned SrcValueAlign1,
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Ptr2, int64_t Size2,
|
2009-09-15 00:18:30 +00:00
|
|
|
const Value *SrcValue2, int SrcValueOffset2,
|
|
|
|
unsigned SrcValueAlign2) const;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-11 13:47:09 +00:00
|
|
|
/// FindAliasInfo - Extracts the relevant alias information from the memory
|
|
|
|
/// node. Returns true if the operand was a load.
|
|
|
|
bool FindAliasInfo(SDNode *N,
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue &Ptr, int64_t &Size,
|
2009-09-15 00:18:30 +00:00
|
|
|
const Value *&SrcValue, int &SrcValueOffset,
|
|
|
|
unsigned &SrcValueAlignment) const;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-25 16:29:54 +00:00
|
|
|
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
|
2006-10-04 16:53:27 +00:00
|
|
|
/// looking for a better chain (aliasing node.)
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FindBetterChain(SDNode *N, SDValue Chain);
|
2009-01-31 15:50:11 +00:00
|
|
|
|
|
|
|
/// getShiftAmountTy - Returns a type large enough to hold any valid
|
|
|
|
/// shift amount - before type legalization these can be huge.
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT getShiftAmountTy() {
|
2009-01-31 15:50:11 +00:00
|
|
|
return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
|
|
|
|
}
|
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
public:
|
2009-04-29 23:29:43 +00:00
|
|
|
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
|
2006-10-16 20:52:31 +00:00
|
|
|
: DAG(D),
|
|
|
|
TLI(D.getTargetLoweringInfo()),
|
2008-11-24 14:53:14 +00:00
|
|
|
Level(Unrestricted),
|
2009-04-29 00:15:41 +00:00
|
|
|
OptLevel(OL),
|
2008-11-24 14:53:14 +00:00
|
|
|
LegalOperations(false),
|
|
|
|
LegalTypes(false),
|
2006-10-16 20:52:31 +00:00
|
|
|
AA(A) {}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
/// Run - runs the dag combiner on all nodes in the work list
|
2008-11-24 14:53:14 +00:00
|
|
|
void Run(CombineLevel AtLevel);
|
2005-09-01 00:19:25 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2008-02-03 06:49:24 +00:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
|
|
|
|
/// nodes from the worklist.
|
2009-10-25 06:33:48 +00:00
|
|
|
class WorkListRemover : public SelectionDAG::DAGUpdateListener {
|
2008-02-03 06:49:24 +00:00
|
|
|
DAGCombiner &DC;
|
|
|
|
public:
|
2008-02-20 16:44:09 +00:00
|
|
|
explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-06-11 11:42:12 +00:00
|
|
|
virtual void NodeDeleted(SDNode *N, SDNode *E) {
|
2008-02-03 06:49:24 +00:00
|
|
|
DC.removeFromWorkList(N);
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-02-03 06:49:24 +00:00
|
|
|
virtual void NodeUpdated(SDNode *N) {
|
|
|
|
// Ignore updates.
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2006-03-01 04:53:38 +00:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// TargetLowering::DAGCombinerInfo implementation
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
|
|
|
|
((DAGCombiner*)DC)->AddToWorkList(N);
|
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue TargetLowering::DAGCombinerInfo::
|
Optimize some 64-bit multiplication by constants into two lea's or one lea + shl since imulq is slow (latency 5). e.g.
x * 40
=>
shlq $3, %rdi
leaq (%rdi,%rdi,4), %rax
This has the added benefit of allowing more multiply to be folded into addressing mode. e.g.
a * 24 + b
=>
leaq (%rdi,%rdi,2), %rax
leaq (%rsi,%rax,8), %rax
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67917 91177308-0d34-0410-b5e6-96231b3b80d8
2009-03-28 05:57:29 +00:00
|
|
|
CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
|
|
|
|
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
|
2006-03-01 04:53:38 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue TargetLowering::DAGCombinerInfo::
|
Optimize some 64-bit multiplication by constants into two lea's or one lea + shl since imulq is slow (latency 5). e.g.
x * 40
=>
shlq $3, %rdi
leaq (%rdi,%rdi,4), %rax
This has the added benefit of allowing more multiply to be folded into addressing mode. e.g.
a * 24 + b
=>
leaq (%rdi,%rdi,2), %rax
leaq (%rsi,%rax,8), %rax
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67917 91177308-0d34-0410-b5e6-96231b3b80d8
2009-03-28 05:57:29 +00:00
|
|
|
CombineTo(SDNode *N, SDValue Res, bool AddTo) {
|
|
|
|
return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
|
2006-03-01 04:53:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue TargetLowering::DAGCombinerInfo::
|
Optimize some 64-bit multiplication by constants into two lea's or one lea + shl since imulq is slow (latency 5). e.g.
x * 40
=>
shlq $3, %rdi
leaq (%rdi,%rdi,4), %rax
This has the added benefit of allowing more multiply to be folded into addressing mode. e.g.
a * 24 + b
=>
leaq (%rdi,%rdi,2), %rax
leaq (%rsi,%rax,8), %rax
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67917 91177308-0d34-0410-b5e6-96231b3b80d8
2009-03-28 05:57:29 +00:00
|
|
|
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
|
|
|
|
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
|
2006-03-01 04:53:38 +00:00
|
|
|
}
|
|
|
|
|
2009-01-29 01:59:02 +00:00
|
|
|
void TargetLowering::DAGCombinerInfo::
|
|
|
|
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
|
|
|
|
return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
|
|
|
|
}
|
2006-03-01 04:53:38 +00:00
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Helper Functions
|
|
|
|
//===----------------------------------------------------------------------===//
|
2006-03-01 04:53:38 +00:00
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
/// isNegatibleForFree - Return 1 if we can compute the negated form of the
|
|
|
|
/// specified expression for the same cost as the expression itself, or 2 if we
|
|
|
|
/// can compute the negated form more cheaply than the expression itself.
|
2008-11-24 14:53:14 +00:00
|
|
|
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
|
2008-02-26 07:04:54 +00:00
|
|
|
unsigned Depth = 0) {
|
2007-10-16 23:38:29 +00:00
|
|
|
// No compile time optimizations on this type.
|
2009-08-11 20:47:22 +00:00
|
|
|
if (Op.getValueType() == MVT::ppcf128)
|
2007-10-16 23:38:29 +00:00
|
|
|
return 0;
|
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
// fneg is removable even if it has multiple uses.
|
|
|
|
if (Op.getOpcode() == ISD::FNEG) return 2;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
// Don't allow anything with multiple uses.
|
|
|
|
if (!Op.hasOneUse()) return 0;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-05-25 02:19:06 +00:00
|
|
|
// Don't recurse exponentially.
|
|
|
|
if (Depth > 6) return 0;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
switch (Op.getOpcode()) {
|
|
|
|
default: return false;
|
|
|
|
case ISD::ConstantFP:
|
2008-02-26 07:04:54 +00:00
|
|
|
// Don't invert constant FP values after legalize. The negated constant
|
|
|
|
// isn't necessarily legal.
|
2008-11-24 14:53:14 +00:00
|
|
|
return LegalOperations ? 0 : 1;
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
case ISD::FADD:
|
|
|
|
// FIXME: determine better conditions for this xform.
|
|
|
|
if (!UnsafeFPMath) return 0;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 23:10:18 +00:00
|
|
|
// fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
|
2008-11-24 14:53:14 +00:00
|
|
|
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
return V;
|
2009-01-30 23:10:18 +00:00
|
|
|
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
|
2008-11-24 14:53:14 +00:00
|
|
|
return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
case ISD::FSUB:
|
2009-02-17 22:15:04 +00:00
|
|
|
// We can't turn -(A-B) into B-A when we honor signed zeros.
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
if (!UnsafeFPMath) return 0;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 23:10:18 +00:00
|
|
|
// fold (fneg (fsub A, B)) -> (fsub B, A)
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
return 1;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
case ISD::FMUL:
|
|
|
|
case ISD::FDIV:
|
|
|
|
if (HonorSignDependentRoundingFPMath()) return 0;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 23:10:18 +00:00
|
|
|
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
|
2008-11-24 14:53:14 +00:00
|
|
|
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
return V;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-11-24 14:53:14 +00:00
|
|
|
return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
case ISD::FP_EXTEND:
|
|
|
|
case ISD::FP_ROUND:
|
|
|
|
case ISD::FSIN:
|
2008-11-24 14:53:14 +00:00
|
|
|
return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
}
|
|
|
|
}
|
2006-03-01 04:53:38 +00:00
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
/// GetNegatedExpression - If isNegatibleForFree returns true, this function
|
|
|
|
/// returns the newly negated expression.
|
2008-07-27 21:46:04 +00:00
|
|
|
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
2008-11-24 14:53:14 +00:00
|
|
|
bool LegalOperations, unsigned Depth = 0) {
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
// fneg is removable even if it has multiple uses.
|
|
|
|
if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
// Don't allow anything with multiple uses.
|
|
|
|
assert(Op.hasOneUse() && "Unknown reuse!");
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-05-25 02:19:06 +00:00
|
|
|
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
switch (Op.getOpcode()) {
|
2009-07-14 16:55:14 +00:00
|
|
|
default: llvm_unreachable("Unknown code");
|
2007-08-31 23:34:27 +00:00
|
|
|
case ISD::ConstantFP: {
|
|
|
|
APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
|
|
|
|
V.changeSign();
|
|
|
|
return DAG.getConstantFP(V, Op.getValueType());
|
|
|
|
}
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
case ISD::FADD:
|
|
|
|
// FIXME: determine better conditions for this xform.
|
|
|
|
assert(UnsafeFPMath);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 23:10:18 +00:00
|
|
|
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
|
2008-11-24 14:53:14 +00:00
|
|
|
if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
|
2009-01-30 00:45:56 +00:00
|
|
|
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
|
2009-02-17 22:15:04 +00:00
|
|
|
GetNegatedExpression(Op.getOperand(0), DAG,
|
2008-11-24 14:53:14 +00:00
|
|
|
LegalOperations, Depth+1),
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
Op.getOperand(1));
|
2009-01-30 23:10:18 +00:00
|
|
|
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
|
2009-01-30 00:45:56 +00:00
|
|
|
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
|
2009-02-17 22:15:04 +00:00
|
|
|
GetNegatedExpression(Op.getOperand(1), DAG,
|
2008-11-24 14:53:14 +00:00
|
|
|
LegalOperations, Depth+1),
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
Op.getOperand(0));
|
|
|
|
case ISD::FSUB:
|
2009-02-17 22:15:04 +00:00
|
|
|
// We can't turn -(A-B) into B-A when we honor signed zeros.
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
assert(UnsafeFPMath);
|
2007-07-02 15:48:56 +00:00
|
|
|
|
2009-01-30 23:10:18 +00:00
|
|
|
// fold (fneg (fsub 0, B)) -> B
|
2007-07-02 15:48:56 +00:00
|
|
|
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
|
2007-08-31 23:34:27 +00:00
|
|
|
if (N0CFP->getValueAPF().isZero())
|
2007-07-02 15:48:56 +00:00
|
|
|
return Op.getOperand(1);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 23:10:18 +00:00
|
|
|
// fold (fneg (fsub A, B)) -> (fsub B, A)
|
2009-01-30 00:45:56 +00:00
|
|
|
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
|
|
|
|
Op.getOperand(1), Op.getOperand(0));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
case ISD::FMUL:
|
|
|
|
case ISD::FDIV:
|
|
|
|
assert(!HonorSignDependentRoundingFPMath());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 23:10:18 +00:00
|
|
|
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
|
2008-11-24 14:53:14 +00:00
|
|
|
if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
|
2009-01-30 00:45:56 +00:00
|
|
|
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
|
2009-02-17 22:15:04 +00:00
|
|
|
GetNegatedExpression(Op.getOperand(0), DAG,
|
2008-11-24 14:53:14 +00:00
|
|
|
LegalOperations, Depth+1),
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
Op.getOperand(1));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 23:10:18 +00:00
|
|
|
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
|
2009-01-30 00:45:56 +00:00
|
|
|
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
Op.getOperand(0),
|
2008-02-26 07:04:54 +00:00
|
|
|
GetNegatedExpression(Op.getOperand(1), DAG,
|
2008-11-24 14:53:14 +00:00
|
|
|
LegalOperations, Depth+1));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
case ISD::FP_EXTEND:
|
|
|
|
case ISD::FSIN:
|
2009-01-30 00:45:56 +00:00
|
|
|
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
|
2009-02-17 22:15:04 +00:00
|
|
|
GetNegatedExpression(Op.getOperand(0), DAG,
|
2008-11-24 14:53:14 +00:00
|
|
|
LegalOperations, Depth+1));
|
2008-01-17 07:00:52 +00:00
|
|
|
case ISD::FP_ROUND:
|
2009-01-30 00:45:56 +00:00
|
|
|
return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
|
2009-02-17 22:15:04 +00:00
|
|
|
GetNegatedExpression(Op.getOperand(0), DAG,
|
2008-11-24 14:53:14 +00:00
|
|
|
LegalOperations, Depth+1),
|
2008-01-17 07:00:52 +00:00
|
|
|
Op.getOperand(1));
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
}
|
|
|
|
}
|
2006-03-01 04:53:38 +00:00
|
|
|
|
|
|
|
|
2005-09-01 23:24:04 +00:00
|
|
|
// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
|
|
|
|
// that selects between the values 1 and 0, making it equivalent to a setcc.
|
2009-02-17 22:15:04 +00:00
|
|
|
// Also, set the incoming LHS, RHS, and CC references to the appropriate
|
2005-09-02 21:18:40 +00:00
|
|
|
// nodes based on the type of node we are checking. This simplifies life a
|
|
|
|
// bit for the callers.
|
2008-07-27 21:46:04 +00:00
|
|
|
static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
|
|
|
|
SDValue &CC) {
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N.getOpcode() == ISD::SETCC) {
|
|
|
|
LHS = N.getOperand(0);
|
|
|
|
RHS = N.getOperand(1);
|
|
|
|
CC = N.getOperand(2);
|
2005-09-01 23:24:04 +00:00
|
|
|
return true;
|
2005-09-02 21:18:40 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N.getOpcode() == ISD::SELECT_CC &&
|
2005-09-01 00:19:25 +00:00
|
|
|
N.getOperand(2).getOpcode() == ISD::Constant &&
|
|
|
|
N.getOperand(3).getOpcode() == ISD::Constant &&
|
2008-03-13 22:13:53 +00:00
|
|
|
cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
|
2005-09-02 21:18:40 +00:00
|
|
|
cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
|
|
|
|
LHS = N.getOperand(0);
|
|
|
|
RHS = N.getOperand(1);
|
|
|
|
CC = N.getOperand(4);
|
2005-09-01 00:19:25 +00:00
|
|
|
return true;
|
2005-09-02 21:18:40 +00:00
|
|
|
}
|
2005-09-01 00:19:25 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2005-09-07 23:25:52 +00:00
|
|
|
// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
|
|
|
|
// one use. If this is true, it allows the users to invert the operation for
|
|
|
|
// free when it is profitable to do so.
|
2008-07-27 21:46:04 +00:00
|
|
|
static bool isOneUseSetCC(SDValue N) {
|
|
|
|
SDValue N0, N1, N2;
|
2008-08-28 21:40:38 +00:00
|
|
|
if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
|
2005-09-01 23:24:04 +00:00
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-01-30 00:45:56 +00:00
|
|
|
SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
|
|
|
|
SDValue N0, SDValue N1) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N0.getValueType();
|
2006-02-03 06:46:56 +00:00
|
|
|
if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
|
|
|
|
if (isa<ConstantSDNode>(N1)) {
|
2009-01-30 00:45:56 +00:00
|
|
|
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
|
2009-01-30 20:50:00 +00:00
|
|
|
SDValue OpNode =
|
|
|
|
DAG.FoldConstantArithmetic(Opc, VT,
|
|
|
|
cast<ConstantSDNode>(N0.getOperand(1)),
|
|
|
|
cast<ConstantSDNode>(N1));
|
2009-01-30 02:23:43 +00:00
|
|
|
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
|
2006-02-03 06:46:56 +00:00
|
|
|
} else if (N0.hasOneUse()) {
|
2009-01-30 00:45:56 +00:00
|
|
|
// reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
|
|
|
|
SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N1);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(OpNode.getNode());
|
2009-01-30 00:45:56 +00:00
|
|
|
return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
|
2006-02-03 06:46:56 +00:00
|
|
|
}
|
|
|
|
}
|
2009-01-30 00:45:56 +00:00
|
|
|
|
2006-02-03 06:46:56 +00:00
|
|
|
if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
|
|
|
|
if (isa<ConstantSDNode>(N0)) {
|
2009-01-30 00:45:56 +00:00
|
|
|
// reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
|
2009-01-30 20:50:00 +00:00
|
|
|
SDValue OpNode =
|
|
|
|
DAG.FoldConstantArithmetic(Opc, VT,
|
|
|
|
cast<ConstantSDNode>(N1.getOperand(1)),
|
|
|
|
cast<ConstantSDNode>(N0));
|
2009-01-30 02:23:43 +00:00
|
|
|
return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
|
2006-02-03 06:46:56 +00:00
|
|
|
} else if (N1.hasOneUse()) {
|
2009-01-30 00:45:56 +00:00
|
|
|
// reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
|
2009-01-30 02:23:43 +00:00
|
|
|
SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
|
2009-01-30 00:45:56 +00:00
|
|
|
N1.getOperand(0), N0);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(OpNode.getNode());
|
2009-01-30 00:45:56 +00:00
|
|
|
return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
|
2006-02-03 06:46:56 +00:00
|
|
|
}
|
|
|
|
}
|
2009-01-30 00:45:56 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-02-03 06:46:56 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
|
|
|
|
bool AddTo) {
|
2008-02-03 06:49:24 +00:00
|
|
|
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
|
|
|
|
++NodesCombined;
|
2009-08-23 06:35:02 +00:00
|
|
|
DEBUG(errs() << "\nReplacing.1 ";
|
|
|
|
N->dump(&DAG);
|
|
|
|
errs() << "\nWith: ";
|
|
|
|
To[0].getNode()->dump(&DAG);
|
|
|
|
errs() << " and " << NumTo-1 << " other values\n";
|
|
|
|
for (unsigned i = 0, e = NumTo; i != e; ++i)
|
2009-12-03 05:15:35 +00:00
|
|
|
assert((!To[i].getNode() ||
|
|
|
|
N->getValueType(i) == To[i].getValueType()) &&
|
2009-01-21 15:17:51 +00:00
|
|
|
"Cannot combine value to value of different type!"));
|
2008-02-03 06:49:24 +00:00
|
|
|
WorkListRemover DeadNodes(*this);
|
|
|
|
DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-02-03 06:49:24 +00:00
|
|
|
if (AddTo) {
|
|
|
|
// Push the new nodes and any users onto the worklist
|
|
|
|
for (unsigned i = 0, e = NumTo; i != e; ++i) {
|
Move 3 "(add (select cc, 0, c), x) -> (select cc, x, (add, x, c))"
related transformations out of target-specific dag combine into the
ARM backend. These were added by Evan in r37685 with no testcases
and only seems to help ARM (e.g. test/CodeGen/ARM/select_xform.ll).
Add some simple X86-specific (for now) DAG combines that turn things
like cond ? 8 : 0 -> (zext(cond) << 3). This happens frequently
with the recently added cp constant select optimization, but is a
very general xform. For example, we now compile the second example
in const-select.ll to:
_test:
movsd LCPI2_0, %xmm0
ucomisd 8(%esp), %xmm0
seta %al
movzbl %al, %eax
movl 4(%esp), %ecx
movsbl (%ecx,%eax,4), %eax
ret
instead of:
_test:
movl 4(%esp), %eax
leal 4(%eax), %ecx
movsd LCPI2_0, %xmm0
ucomisd 8(%esp), %xmm0
cmovbe %eax, %ecx
movsbl (%ecx), %eax
ret
This passes multisource and dejagnu.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@66779 91177308-0d34-0410-b5e6-96231b3b80d8
2009-03-12 06:52:53 +00:00
|
|
|
if (To[i].getNode()) {
|
|
|
|
AddToWorkList(To[i].getNode());
|
|
|
|
AddUsersToWorkList(To[i].getNode());
|
|
|
|
}
|
2008-02-03 06:49:24 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-19 21:44:21 +00:00
|
|
|
// Finally, if the node is now dead, remove it from the graph. The node
|
|
|
|
// may not be dead if the replacement process recursively simplified to
|
|
|
|
// something else needing this node.
|
|
|
|
if (N->use_empty()) {
|
|
|
|
// Nodes can be reintroduced into the worklist. Make sure we do not
|
|
|
|
// process a node that has been replaced.
|
|
|
|
removeFromWorkList(N);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-19 21:44:21 +00:00
|
|
|
// Finally, since the node is now dead, remove it from the graph.
|
|
|
|
DAG.DeleteNode(N);
|
|
|
|
}
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0);
|
2008-02-03 06:49:24 +00:00
|
|
|
}
|
|
|
|
|
2009-01-29 01:59:02 +00:00
|
|
|
void
|
|
|
|
DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
|
|
|
|
TLO) {
|
2009-02-17 22:15:04 +00:00
|
|
|
// Replace all uses. If any nodes become isomorphic to other nodes and
|
2008-02-03 06:49:24 +00:00
|
|
|
// are deleted, make sure to remove them from our worklist.
|
|
|
|
WorkListRemover DeadNodes(*this);
|
|
|
|
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
|
2009-01-29 01:59:02 +00:00
|
|
|
|
2008-02-03 06:49:24 +00:00
|
|
|
// Push the new node and any (possibly new) users onto the worklist.
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(TLO.New.getNode());
|
|
|
|
AddUsersToWorkList(TLO.New.getNode());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-02-03 06:49:24 +00:00
|
|
|
// Finally, if the node is now dead, remove it from the graph. The node
|
|
|
|
// may not be dead if the replacement process recursively simplified to
|
|
|
|
// something else needing this node.
|
2008-08-28 21:40:38 +00:00
|
|
|
if (TLO.Old.getNode()->use_empty()) {
|
|
|
|
removeFromWorkList(TLO.Old.getNode());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-02-03 06:49:24 +00:00
|
|
|
// If the operands of this node are only used by the node, they will now
|
|
|
|
// be dead. Make sure to visit them first to delete dead nodes early.
|
2008-08-28 21:40:38 +00:00
|
|
|
for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
|
|
|
|
if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
|
|
|
|
AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
DAG.DeleteNode(TLO.Old.getNode());
|
2008-02-03 06:49:24 +00:00
|
|
|
}
|
2009-01-29 01:59:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// SimplifyDemandedBits - Check the specified integer node value to see if
|
|
|
|
/// it can be simplified or if things it uses can be simplified by bit
|
|
|
|
/// propagation. If so, return true.
|
|
|
|
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
|
|
|
|
TargetLowering::TargetLoweringOpt TLO(DAG);
|
|
|
|
APInt KnownZero, KnownOne;
|
|
|
|
if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
|
|
|
|
return false;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-29 01:59:02 +00:00
|
|
|
// Revisit the node.
|
|
|
|
AddToWorkList(Op.getNode());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-29 01:59:02 +00:00
|
|
|
// Replace the old value with the new one.
|
|
|
|
++NodesCombined;
|
2009-08-23 06:35:02 +00:00
|
|
|
DEBUG(errs() << "\nReplacing.2 ";
|
|
|
|
TLO.Old.getNode()->dump(&DAG);
|
|
|
|
errs() << "\nWith: ";
|
|
|
|
TLO.New.getNode()->dump(&DAG);
|
|
|
|
errs() << '\n');
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-29 01:59:02 +00:00
|
|
|
CommitTargetLoweringOpt(TLO);
|
2008-02-03 06:49:24 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Main DAG Combiner implementation
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2008-11-24 14:53:14 +00:00
|
|
|
void DAGCombiner::Run(CombineLevel AtLevel) {
|
|
|
|
// set the instance variables, so that the various visit routines may use it.
|
|
|
|
Level = AtLevel;
|
|
|
|
LegalOperations = Level >= NoIllegalOperations;
|
|
|
|
LegalTypes = Level >= NoIllegalTypes;
|
2005-09-01 23:24:04 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
// Add all the dag nodes to the worklist.
|
|
|
|
WorkList.reserve(DAG.allnodes_size());
|
|
|
|
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
|
|
|
|
E = DAG.allnodes_end(); I != E; ++I)
|
|
|
|
WorkList.push_back(I);
|
2008-11-24 14:53:14 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
// Create a dummy node (which is not added to allnodes), that adds a reference
|
|
|
|
// to the root node, preventing it from being deleted, and tracking any
|
|
|
|
// changes of the root.
|
|
|
|
HandleSDNode Dummy(DAG.getRoot());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-17 19:33:52 +00:00
|
|
|
// The root of the dag may dangle to deleted nodes until the dag combiner is
|
|
|
|
// done. Set it to null to avoid confusion.
|
2008-07-27 21:46:04 +00:00
|
|
|
DAG.setRoot(SDValue());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
// while the worklist isn't empty, inspect the node on the end of it and
|
|
|
|
// try and combine it.
|
|
|
|
while (!WorkList.empty()) {
|
|
|
|
SDNode *N = WorkList.back();
|
|
|
|
WorkList.pop_back();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
// If N has no uses, it is dead. Make sure to revisit all N's operands once
|
|
|
|
// N is deleted from the DAG, since they too may now be dead or may have a
|
|
|
|
// reduced number of uses, allowing other xforms.
|
|
|
|
if (N->use_empty() && N != &Dummy) {
|
|
|
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
|
|
|
AddToWorkList(N->getOperand(i).getNode());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
DAG.DeleteNode(N);
|
|
|
|
continue;
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
SDValue RV = combine(N);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
if (RV.getNode() == 0)
|
|
|
|
continue;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
++NodesCombined;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
// If we get back the same node we passed in, rather than a new node or
|
|
|
|
// zero, we know that the node must have defined multiple values and
|
2009-02-17 22:15:04 +00:00
|
|
|
// CombineTo was used. Since CombineTo takes care of the worklist
|
2008-08-29 22:21:44 +00:00
|
|
|
// mechanics for us, we have no work to do in this case.
|
|
|
|
if (RV.getNode() == N)
|
|
|
|
continue;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
assert(N->getOpcode() != ISD::DELETED_NODE &&
|
|
|
|
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
|
|
|
|
"Node was deleted but visit returned new node!");
|
|
|
|
|
2009-08-23 06:35:02 +00:00
|
|
|
DEBUG(errs() << "\nReplacing.3 ";
|
|
|
|
N->dump(&DAG);
|
|
|
|
errs() << "\nWith: ";
|
|
|
|
RV.getNode()->dump(&DAG);
|
|
|
|
errs() << '\n');
|
2008-08-29 22:21:44 +00:00
|
|
|
WorkListRemover DeadNodes(*this);
|
|
|
|
if (N->getNumValues() == RV.getNode()->getNumValues())
|
|
|
|
DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
|
|
|
|
else {
|
|
|
|
assert(N->getValueType(0) == RV.getValueType() &&
|
|
|
|
N->getNumValues() == 1 && "Type mismatch");
|
|
|
|
SDValue OpV = RV;
|
|
|
|
DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
// Push the new node and any users onto the worklist
|
|
|
|
AddToWorkList(RV.getNode());
|
|
|
|
AddUsersToWorkList(RV.getNode());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-29 22:21:44 +00:00
|
|
|
// Add any uses of the old node to the worklist in case this node is the
|
|
|
|
// last one that uses them. They may become dead after this node is
|
|
|
|
// deleted.
|
|
|
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
|
|
|
AddToWorkList(N->getOperand(i).getNode());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-19 21:44:21 +00:00
|
|
|
// Finally, if the node is now dead, remove it from the graph. The node
|
|
|
|
// may not be dead if the replacement process recursively simplified to
|
|
|
|
// something else needing this node.
|
|
|
|
if (N->use_empty()) {
|
|
|
|
// Nodes can be reintroduced into the worklist. Make sure we do not
|
|
|
|
// process a node that has been replaced.
|
|
|
|
removeFromWorkList(N);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-19 21:44:21 +00:00
|
|
|
// Finally, since the node is now dead, remove it from the graph.
|
|
|
|
DAG.DeleteNode(N);
|
|
|
|
}
|
2008-08-29 22:21:44 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-10-05 06:35:28 +00:00
|
|
|
// If the root changed (e.g. it was a dead load, update the root).
|
|
|
|
DAG.setRoot(Dummy.getValue());
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visit(SDNode *N) {
|
2005-09-01 00:19:25 +00:00
|
|
|
switch(N->getOpcode()) {
|
|
|
|
default: break;
|
2005-09-01 00:33:32 +00:00
|
|
|
case ISD::TokenFactor: return visitTokenFactor(N);
|
2008-02-13 07:25:05 +00:00
|
|
|
case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
|
2005-09-02 21:18:40 +00:00
|
|
|
case ISD::ADD: return visitADD(N);
|
|
|
|
case ISD::SUB: return visitSUB(N);
|
2007-03-04 20:03:15 +00:00
|
|
|
case ISD::ADDC: return visitADDC(N);
|
|
|
|
case ISD::ADDE: return visitADDE(N);
|
2005-09-02 21:18:40 +00:00
|
|
|
case ISD::MUL: return visitMUL(N);
|
|
|
|
case ISD::SDIV: return visitSDIV(N);
|
|
|
|
case ISD::UDIV: return visitUDIV(N);
|
|
|
|
case ISD::SREM: return visitSREM(N);
|
|
|
|
case ISD::UREM: return visitUREM(N);
|
|
|
|
case ISD::MULHU: return visitMULHU(N);
|
|
|
|
case ISD::MULHS: return visitMULHS(N);
|
2007-10-08 17:57:15 +00:00
|
|
|
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
|
|
|
|
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
|
|
|
|
case ISD::SDIVREM: return visitSDIVREM(N);
|
|
|
|
case ISD::UDIVREM: return visitUDIVREM(N);
|
2005-09-02 21:18:40 +00:00
|
|
|
case ISD::AND: return visitAND(N);
|
|
|
|
case ISD::OR: return visitOR(N);
|
|
|
|
case ISD::XOR: return visitXOR(N);
|
|
|
|
case ISD::SHL: return visitSHL(N);
|
|
|
|
case ISD::SRA: return visitSRA(N);
|
|
|
|
case ISD::SRL: return visitSRL(N);
|
|
|
|
case ISD::CTLZ: return visitCTLZ(N);
|
|
|
|
case ISD::CTTZ: return visitCTTZ(N);
|
|
|
|
case ISD::CTPOP: return visitCTPOP(N);
|
2005-09-16 00:54:12 +00:00
|
|
|
case ISD::SELECT: return visitSELECT(N);
|
|
|
|
case ISD::SELECT_CC: return visitSELECT_CC(N);
|
|
|
|
case ISD::SETCC: return visitSETCC(N);
|
2005-09-02 21:18:40 +00:00
|
|
|
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
|
|
|
|
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
|
2006-05-05 05:58:59 +00:00
|
|
|
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
|
2005-09-02 21:18:40 +00:00
|
|
|
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
|
|
|
|
case ISD::TRUNCATE: return visitTRUNCATE(N);
|
constant fold bits_convert in getNode and in the dag combiner for fp<->int
conversions. This allows V8 to compiles this:
void %test() {
call float %test2( float 1.000000e+00, float 2.000000e+00, double 3.000000e+00, double* null )
ret void
}
into:
test:
save -96, %o6, %o6
sethi 0, %o3
sethi 1049088, %o2
sethi 1048576, %o1
sethi 1040384, %o0
or %g0, %o3, %o4
call test2
nop
restore %g0, %g0, %g0
retl
nop
instead of:
test:
save -112, %o6, %o6
sethi 0, %o4
sethi 1049088, %l0
st %o4, [%i6+-12]
st %l0, [%i6+-16]
ld [%i6+-12], %o3
ld [%i6+-16], %o2
sethi 1048576, %o1
sethi 1040384, %o0
call test2
nop
restore %g0, %g0, %g0
retl
nop
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@24980 91177308-0d34-0410-b5e6-96231b3b80d8
2005-12-23 05:30:37 +00:00
|
|
|
case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);
|
2008-05-12 23:04:07 +00:00
|
|
|
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
|
2005-09-28 22:28:18 +00:00
|
|
|
case ISD::FADD: return visitFADD(N);
|
|
|
|
case ISD::FSUB: return visitFSUB(N);
|
|
|
|
case ISD::FMUL: return visitFMUL(N);
|
|
|
|
case ISD::FDIV: return visitFDIV(N);
|
|
|
|
case ISD::FREM: return visitFREM(N);
|
2006-03-05 05:30:57 +00:00
|
|
|
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
|
2005-09-02 21:18:40 +00:00
|
|
|
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
|
|
|
|
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
|
|
|
|
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
|
|
|
|
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
|
|
|
|
case ISD::FP_ROUND: return visitFP_ROUND(N);
|
|
|
|
case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
|
|
|
|
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
|
|
|
|
case ISD::FNEG: return visitFNEG(N);
|
|
|
|
case ISD::FABS: return visitFABS(N);
|
2005-09-19 22:34:01 +00:00
|
|
|
case ISD::BRCOND: return visitBRCOND(N);
|
|
|
|
case ISD::BR_CC: return visitBR_CC(N);
|
Add support for CombineTo, allowing the dag combiner to replace nodes with
multiple results.
Use this support to implement trivial store->load forwarding, implementing
CodeGen/PowerPC/store-load-fwd.ll. Though this is the most simple case and
can be extended in the future, it is still useful. For example, it speeds
up 197.parser by 6.2% by avoiding an LSU reject in xalloc:
stw r6, lo16(l5_end_of_array)(r2)
addi r2, r5, -4
stwx r5, r4, r2
- lwzx r5, r4, r2
- rlwinm r5, r5, 0, 0, 30
stwx r5, r4, r2
lwz r2, -4(r4)
ori r2, r2, 1
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23690 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-10 22:04:48 +00:00
|
|
|
case ISD::LOAD: return visitLOAD(N);
|
2005-10-10 22:31:19 +00:00
|
|
|
case ISD::STORE: return visitSTORE(N);
|
2006-03-19 01:27:56 +00:00
|
|
|
case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
|
2007-10-06 08:19:55 +00:00
|
|
|
case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
|
2007-06-25 16:23:39 +00:00
|
|
|
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
|
|
|
|
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
|
2006-03-28 22:11:53 +00:00
|
|
|
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::combine(SDNode *N) {
|
|
|
|
SDValue RV = visit(N);
|
2007-10-08 17:57:15 +00:00
|
|
|
|
|
|
|
// If nothing happened, try a target-specific DAG combine.
|
2008-08-28 21:40:38 +00:00
|
|
|
if (RV.getNode() == 0) {
|
2007-10-08 17:57:15 +00:00
|
|
|
assert(N->getOpcode() != ISD::DELETED_NODE &&
|
|
|
|
"Node was deleted but visit returned NULL!");
|
|
|
|
|
|
|
|
if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
|
|
|
|
TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
|
|
|
|
|
|
|
|
// Expose the DAG combiner to the target combiner impls.
|
2009-02-17 22:15:04 +00:00
|
|
|
TargetLowering::DAGCombinerInfo
|
2009-07-24 18:22:59 +00:00
|
|
|
DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
|
2007-10-08 17:57:15 +00:00
|
|
|
|
|
|
|
RV = TLI.PerformDAGCombine(N, DagCombineInfo);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-02-17 22:15:04 +00:00
|
|
|
// If N is a commutative binary node, try commuting it to enable more
|
2008-03-22 01:55:50 +00:00
|
|
|
// sdisel CSE.
|
2009-02-17 22:15:04 +00:00
|
|
|
if (RV.getNode() == 0 &&
|
2008-03-22 01:55:50 +00:00
|
|
|
SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
|
|
|
|
N->getNumValues() == 1) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2009-01-30 01:13:16 +00:00
|
|
|
|
2008-03-22 01:55:50 +00:00
|
|
|
// Constant operands are canonicalized to RHS.
|
|
|
|
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Ops[] = { N1, N0 };
|
2008-03-22 01:55:50 +00:00
|
|
|
SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
|
|
|
|
Ops, 2);
|
2008-03-24 23:55:16 +00:00
|
|
|
if (CSENode)
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(CSENode, 0);
|
2008-03-22 01:55:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-10-08 17:57:15 +00:00
|
|
|
return RV;
|
2009-02-17 22:15:04 +00:00
|
|
|
}
|
2007-10-08 17:57:15 +00:00
|
|
|
|
Eliminate more token factors by taking advantage of transitivity:
if TF depends on A and B, and A depends on B, TF just needs to depend on
A. With Jim's alias-analysis stuff enabled, this compiles the testcase in
PR892 into:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %edx, 28(%esp)
movl %eax, 32(%esp)
movl %eax, 24(%esp)
movl %edx, 36(%esp)
movl 52(%esp), %ecx
movl %ecx, 4(%esp)
movl %eax, 8(%esp)
movl %edx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
instead of:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %eax, 24(%esp)
movl %edx, 28(%esp)
movl 24(%esp), %eax
movl %eax, 32(%esp)
movl 28(%esp), %eax
movl %eax, 36(%esp)
movl 32(%esp), %eax
movl 36(%esp), %ecx
movl 52(%esp), %edx
movl %edx, 4(%esp)
movl %eax, 8(%esp)
movl %ecx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30821 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-08 22:57:01 +00:00
|
|
|
/// getInputChainForNode - Given a node, return its input chain if it has one,
|
|
|
|
/// otherwise return a null sd operand.
|
2008-07-27 21:46:04 +00:00
|
|
|
static SDValue getInputChainForNode(SDNode *N) {
|
Eliminate more token factors by taking advantage of transitivity:
if TF depends on A and B, and A depends on B, TF just needs to depend on
A. With Jim's alias-analysis stuff enabled, this compiles the testcase in
PR892 into:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %edx, 28(%esp)
movl %eax, 32(%esp)
movl %eax, 24(%esp)
movl %edx, 36(%esp)
movl 52(%esp), %ecx
movl %ecx, 4(%esp)
movl %eax, 8(%esp)
movl %edx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
instead of:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %eax, 24(%esp)
movl %edx, 28(%esp)
movl 24(%esp), %eax
movl %eax, 32(%esp)
movl 28(%esp), %eax
movl %eax, 36(%esp)
movl 32(%esp), %eax
movl 36(%esp), %ecx
movl 52(%esp), %edx
movl %edx, 4(%esp)
movl %eax, 8(%esp)
movl %ecx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30821 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-08 22:57:01 +00:00
|
|
|
if (unsigned NumOps = N->getNumOperands()) {
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N->getOperand(0).getValueType() == MVT::Other)
|
Eliminate more token factors by taking advantage of transitivity:
if TF depends on A and B, and A depends on B, TF just needs to depend on
A. With Jim's alias-analysis stuff enabled, this compiles the testcase in
PR892 into:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %edx, 28(%esp)
movl %eax, 32(%esp)
movl %eax, 24(%esp)
movl %edx, 36(%esp)
movl 52(%esp), %ecx
movl %ecx, 4(%esp)
movl %eax, 8(%esp)
movl %edx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
instead of:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %eax, 24(%esp)
movl %edx, 28(%esp)
movl 24(%esp), %eax
movl %eax, 32(%esp)
movl 28(%esp), %eax
movl %eax, 36(%esp)
movl 32(%esp), %eax
movl 36(%esp), %ecx
movl 52(%esp), %edx
movl %edx, 4(%esp)
movl %eax, 8(%esp)
movl %ecx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30821 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-08 22:57:01 +00:00
|
|
|
return N->getOperand(0);
|
2009-08-11 20:47:22 +00:00
|
|
|
else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
|
Eliminate more token factors by taking advantage of transitivity:
if TF depends on A and B, and A depends on B, TF just needs to depend on
A. With Jim's alias-analysis stuff enabled, this compiles the testcase in
PR892 into:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %edx, 28(%esp)
movl %eax, 32(%esp)
movl %eax, 24(%esp)
movl %edx, 36(%esp)
movl 52(%esp), %ecx
movl %ecx, 4(%esp)
movl %eax, 8(%esp)
movl %edx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
instead of:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %eax, 24(%esp)
movl %edx, 28(%esp)
movl 24(%esp), %eax
movl %eax, 32(%esp)
movl 28(%esp), %eax
movl %eax, 36(%esp)
movl 32(%esp), %eax
movl 36(%esp), %ecx
movl 52(%esp), %edx
movl %edx, 4(%esp)
movl %eax, 8(%esp)
movl %ecx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30821 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-08 22:57:01 +00:00
|
|
|
return N->getOperand(NumOps-1);
|
|
|
|
for (unsigned i = 1; i < NumOps-1; ++i)
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N->getOperand(i).getValueType() == MVT::Other)
|
Eliminate more token factors by taking advantage of transitivity:
if TF depends on A and B, and A depends on B, TF just needs to depend on
A. With Jim's alias-analysis stuff enabled, this compiles the testcase in
PR892 into:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %edx, 28(%esp)
movl %eax, 32(%esp)
movl %eax, 24(%esp)
movl %edx, 36(%esp)
movl 52(%esp), %ecx
movl %ecx, 4(%esp)
movl %eax, 8(%esp)
movl %edx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
instead of:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %eax, 24(%esp)
movl %edx, 28(%esp)
movl 24(%esp), %eax
movl %eax, 32(%esp)
movl 28(%esp), %eax
movl %eax, 36(%esp)
movl 32(%esp), %eax
movl 36(%esp), %ecx
movl 52(%esp), %edx
movl %edx, 4(%esp)
movl %eax, 8(%esp)
movl %ecx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30821 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-08 22:57:01 +00:00
|
|
|
return N->getOperand(i);
|
|
|
|
}
|
2009-01-30 01:13:16 +00:00
|
|
|
return SDValue();
|
Eliminate more token factors by taking advantage of transitivity:
if TF depends on A and B, and A depends on B, TF just needs to depend on
A. With Jim's alias-analysis stuff enabled, this compiles the testcase in
PR892 into:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %edx, 28(%esp)
movl %eax, 32(%esp)
movl %eax, 24(%esp)
movl %edx, 36(%esp)
movl 52(%esp), %ecx
movl %ecx, 4(%esp)
movl %eax, 8(%esp)
movl %edx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
instead of:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %eax, 24(%esp)
movl %edx, 28(%esp)
movl 24(%esp), %eax
movl %eax, 32(%esp)
movl 28(%esp), %eax
movl %eax, 36(%esp)
movl 32(%esp), %eax
movl 36(%esp), %ecx
movl 52(%esp), %edx
movl %edx, 4(%esp)
movl %eax, 8(%esp)
movl %ecx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30821 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-08 22:57:01 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
|
Eliminate more token factors by taking advantage of transitivity:
if TF depends on A and B, and A depends on B, TF just needs to depend on
A. With Jim's alias-analysis stuff enabled, this compiles the testcase in
PR892 into:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %edx, 28(%esp)
movl %eax, 32(%esp)
movl %eax, 24(%esp)
movl %edx, 36(%esp)
movl 52(%esp), %ecx
movl %ecx, 4(%esp)
movl %eax, 8(%esp)
movl %edx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
instead of:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %eax, 24(%esp)
movl %edx, 28(%esp)
movl 24(%esp), %eax
movl %eax, 32(%esp)
movl 28(%esp), %eax
movl %eax, 36(%esp)
movl 32(%esp), %eax
movl 36(%esp), %ecx
movl 52(%esp), %edx
movl %edx, 4(%esp)
movl %eax, 8(%esp)
movl %ecx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30821 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-08 22:57:01 +00:00
|
|
|
// If N has two operands, where one has an input chain equal to the other,
|
|
|
|
// the 'other' chain is redundant.
|
|
|
|
if (N->getNumOperands() == 2) {
|
2008-08-28 21:40:38 +00:00
|
|
|
if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
|
Eliminate more token factors by taking advantage of transitivity:
if TF depends on A and B, and A depends on B, TF just needs to depend on
A. With Jim's alias-analysis stuff enabled, this compiles the testcase in
PR892 into:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %edx, 28(%esp)
movl %eax, 32(%esp)
movl %eax, 24(%esp)
movl %edx, 36(%esp)
movl 52(%esp), %ecx
movl %ecx, 4(%esp)
movl %eax, 8(%esp)
movl %edx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
instead of:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %eax, 24(%esp)
movl %edx, 28(%esp)
movl 24(%esp), %eax
movl %eax, 32(%esp)
movl 28(%esp), %eax
movl %eax, 36(%esp)
movl 32(%esp), %eax
movl 36(%esp), %ecx
movl 52(%esp), %edx
movl %edx, 4(%esp)
movl %eax, 8(%esp)
movl %ecx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30821 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-08 22:57:01 +00:00
|
|
|
return N->getOperand(0);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
|
Eliminate more token factors by taking advantage of transitivity:
if TF depends on A and B, and A depends on B, TF just needs to depend on
A. With Jim's alias-analysis stuff enabled, this compiles the testcase in
PR892 into:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %edx, 28(%esp)
movl %eax, 32(%esp)
movl %eax, 24(%esp)
movl %edx, 36(%esp)
movl 52(%esp), %ecx
movl %ecx, 4(%esp)
movl %eax, 8(%esp)
movl %edx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
instead of:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %eax, 24(%esp)
movl %edx, 28(%esp)
movl 24(%esp), %eax
movl %eax, 32(%esp)
movl 28(%esp), %eax
movl %eax, 36(%esp)
movl 32(%esp), %eax
movl 36(%esp), %ecx
movl 52(%esp), %edx
movl %edx, 4(%esp)
movl %eax, 8(%esp)
movl %ecx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30821 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-08 22:57:01 +00:00
|
|
|
return N->getOperand(1);
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-05-16 06:37:59 +00:00
|
|
|
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
|
2008-07-27 21:46:04 +00:00
|
|
|
SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
|
2009-02-17 22:15:04 +00:00
|
|
|
SmallPtrSet<SDNode*, 16> SeenOps;
|
2007-05-16 06:37:59 +00:00
|
|
|
bool Changed = false; // If we should replace this token factor.
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
// Start out with this token factor.
|
2006-09-25 16:29:54 +00:00
|
|
|
TFs.push_back(N);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-07 23:37:56 +00:00
|
|
|
// Iterate through token factors. The TFs grows when new token factors are
|
2006-10-05 15:07:25 +00:00
|
|
|
// encountered.
|
|
|
|
for (unsigned i = 0; i < TFs.size(); ++i) {
|
|
|
|
SDNode *TF = TFs[i];
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
// Check each of the operands.
|
|
|
|
for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Op = TF->getOperand(i);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
switch (Op.getOpcode()) {
|
|
|
|
case ISD::EntryToken:
|
2006-10-05 15:07:25 +00:00
|
|
|
// Entry tokens don't need to be added to the list. They are
|
|
|
|
// rededundant.
|
|
|
|
Changed = true;
|
2006-10-04 16:53:27 +00:00
|
|
|
break;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
case ISD::TokenFactor:
|
2009-09-15 00:18:30 +00:00
|
|
|
if (Op.hasOneUse() &&
|
2008-08-28 21:40:38 +00:00
|
|
|
std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
|
2006-10-04 16:53:27 +00:00
|
|
|
// Queue up for processing.
|
2008-08-28 21:40:38 +00:00
|
|
|
TFs.push_back(Op.getNode());
|
2006-10-04 16:53:27 +00:00
|
|
|
// Clean up in case the token factor is removed.
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Op.getNode());
|
2006-10-04 16:53:27 +00:00
|
|
|
Changed = true;
|
|
|
|
break;
|
2006-09-25 16:29:54 +00:00
|
|
|
}
|
2006-10-04 16:53:27 +00:00
|
|
|
// Fall thru
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
default:
|
2007-05-16 06:37:59 +00:00
|
|
|
// Only add if it isn't already in the list.
|
2008-08-28 21:40:38 +00:00
|
|
|
if (SeenOps.insert(Op.getNode()))
|
2006-10-05 15:07:25 +00:00
|
|
|
Ops.push_back(Op);
|
2007-05-16 06:37:59 +00:00
|
|
|
else
|
|
|
|
Changed = true;
|
2006-10-04 16:53:27 +00:00
|
|
|
break;
|
2006-09-25 16:29:54 +00:00
|
|
|
}
|
|
|
|
}
|
2006-10-04 16:53:27 +00:00
|
|
|
}
|
2009-09-15 00:18:30 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Result;
|
2006-10-04 16:53:27 +00:00
|
|
|
|
|
|
|
// If we've change things around then replace token factor.
|
|
|
|
if (Changed) {
|
2008-01-29 13:02:09 +00:00
|
|
|
if (Ops.empty()) {
|
2006-10-04 16:53:27 +00:00
|
|
|
// The entry token is the only possible outcome.
|
|
|
|
Result = DAG.getEntryNode();
|
|
|
|
} else {
|
|
|
|
// New and improved token factor.
|
2009-01-30 01:13:16 +00:00
|
|
|
Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
|
2009-08-11 20:47:22 +00:00
|
|
|
MVT::Other, &Ops[0], Ops.size());
|
2005-10-13 03:11:28 +00:00
|
|
|
}
|
2009-01-30 01:13:16 +00:00
|
|
|
|
2006-10-13 23:32:28 +00:00
|
|
|
// Don't add users to work list.
|
|
|
|
return CombineTo(N, Result, false);
|
2005-10-13 03:11:28 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
return Result;
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-02-13 07:25:05 +00:00
|
|
|
/// MERGE_VALUES can always be eliminated.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
|
2008-02-13 07:25:05 +00:00
|
|
|
WorkListRemover DeadNodes(*this);
|
2009-08-10 23:43:19 +00:00
|
|
|
// Replacing results may cause a different MERGE_VALUES to suddenly
|
|
|
|
// be CSE'd with N, and carry its uses with it. Iterate until no
|
|
|
|
// uses remain, to ensure that the node can be safely deleted.
|
|
|
|
do {
|
|
|
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
|
|
|
|
&DeadNodes);
|
|
|
|
} while (!N->use_empty());
|
2008-02-13 07:25:05 +00:00
|
|
|
removeFromWorkList(N);
|
|
|
|
DAG.DeleteNode(N);
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2008-02-13 07:25:05 +00:00
|
|
|
}
|
|
|
|
|
Remove this xform:
(shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2)
Replace it with:
(add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
This fixes test/CodeGen/ARM/smul.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@33361 91177308-0d34-0410-b5e6-96231b3b80d8
2007-01-19 17:51:44 +00:00
|
|
|
static
|
2009-01-30 02:23:43 +00:00
|
|
|
SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
|
|
|
|
SelectionDAG &DAG) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N0.getValueType();
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue N00 = N0.getOperand(0);
|
|
|
|
SDValue N01 = N0.getOperand(1);
|
Remove this xform:
(shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2)
Replace it with:
(add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
This fixes test/CodeGen/ARM/smul.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@33361 91177308-0d34-0410-b5e6-96231b3b80d8
2007-01-19 17:51:44 +00:00
|
|
|
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
|
2009-01-30 02:23:43 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
|
Remove this xform:
(shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2)
Replace it with:
(add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
This fixes test/CodeGen/ARM/smul.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@33361 91177308-0d34-0410-b5e6-96231b3b80d8
2007-01-19 17:51:44 +00:00
|
|
|
isa<ConstantSDNode>(N00.getOperand(1))) {
|
2009-01-30 02:23:43 +00:00
|
|
|
// fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
|
|
|
|
N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
|
|
|
|
DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
|
|
|
|
N00.getOperand(0), N01),
|
|
|
|
DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
|
|
|
|
N00.getOperand(1), N01));
|
|
|
|
return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
|
Remove this xform:
(shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2)
Replace it with:
(add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
This fixes test/CodeGen/ARM/smul.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@33361 91177308-0d34-0410-b5e6-96231b3b80d8
2007-01-19 17:51:44 +00:00
|
|
|
}
|
2009-01-30 02:23:43 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
Remove this xform:
(shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2)
Replace it with:
(add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
This fixes test/CodeGen/ARM/smul.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@33361 91177308-0d34-0410-b5e6-96231b3b80d8
2007-01-19 17:51:44 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitADD(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N0.getValueType();
|
2007-06-25 16:23:39 +00:00
|
|
|
|
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2008-12-10 22:36:00 +00:00
|
|
|
|
2007-07-03 14:03:57 +00:00
|
|
|
// fold (add x, undef) -> undef
|
2007-07-10 15:19:29 +00:00
|
|
|
if (N0.getOpcode() == ISD::UNDEF)
|
|
|
|
return N0;
|
|
|
|
if (N1.getOpcode() == ISD::UNDEF)
|
2007-07-03 14:03:57 +00:00
|
|
|
return N1;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (add c1, c2) -> c1+c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C)
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
|
2005-09-07 23:25:52 +00:00
|
|
|
// canonicalize constant to RHS
|
2005-10-18 00:28:13 +00:00
|
|
|
if (N0C && !N1C)
|
2009-01-30 02:31:17 +00:00
|
|
|
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (add x, 0) -> x
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
Teach DAGCombine to fold constant offsets into GlobalAddress nodes,
and add a TargetLowering hook for it to use to determine when this
is legal (i.e. not in PIC mode, etc.)
This allows instruction selection to emit folded constant offsets
in more cases, such as the included testcase, eliminating the need
for explicit arithmetic instructions.
This eliminates the need for the C++ code in X86ISelDAGToDAG.cpp
that attempted to achieve the same effect, but wasn't as effective.
Also, fix handling of offsets in GlobalAddressSDNodes in several
places, including changing GlobalAddressSDNode's offset from
int to int64_t.
The Mips, Alpha, Sparc, and CellSPU targets appear to be
unaware of GlobalAddress offsets currently, so set the hook to
false on those targets.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@57748 91177308-0d34-0410-b5e6-96231b3b80d8
2008-10-18 02:06:02 +00:00
|
|
|
// fold (add Sym, c) -> Sym+c
|
|
|
|
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
|
2008-11-24 14:53:14 +00:00
|
|
|
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
|
Teach DAGCombine to fold constant offsets into GlobalAddress nodes,
and add a TargetLowering hook for it to use to determine when this
is legal (i.e. not in PIC mode, etc.)
This allows instruction selection to emit folded constant offsets
in more cases, such as the included testcase, eliminating the need
for explicit arithmetic instructions.
This eliminates the need for the C++ code in X86ISelDAGToDAG.cpp
that attempted to achieve the same effect, but wasn't as effective.
Also, fix handling of offsets in GlobalAddressSDNodes in several
places, including changing GlobalAddressSDNode's offset from
int to int64_t.
The Mips, Alpha, Sparc, and CellSPU targets appear to be
unaware of GlobalAddress offsets currently, so set the hook to
false on those targets.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@57748 91177308-0d34-0410-b5e6-96231b3b80d8
2008-10-18 02:06:02 +00:00
|
|
|
GA->getOpcode() == ISD::GlobalAddress)
|
|
|
|
return DAG.getGlobalAddress(GA->getGlobal(), VT,
|
|
|
|
GA->getOffset() +
|
|
|
|
(uint64_t)N1C->getSExtValue());
|
2006-01-12 20:22:43 +00:00
|
|
|
// fold ((c1-A)+c2) -> (c1+c2)-A
|
|
|
|
if (N1C && N0.getOpcode() == ISD::SUB)
|
|
|
|
if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
|
2009-01-30 02:31:17 +00:00
|
|
|
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
|
2008-03-13 22:13:53 +00:00
|
|
|
DAG.getConstant(N1C->getAPIntValue()+
|
|
|
|
N0C->getAPIntValue(), VT),
|
2006-01-12 20:22:43 +00:00
|
|
|
N0.getOperand(1));
|
2006-02-03 06:46:56 +00:00
|
|
|
// reassociate add
|
2009-01-30 00:45:56 +00:00
|
|
|
SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (RADD.getNode() != 0)
|
2006-02-03 06:46:56 +00:00
|
|
|
return RADD;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold ((0-A) + B) -> B-A
|
|
|
|
if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
|
|
|
|
cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
|
2009-01-30 02:31:17 +00:00
|
|
|
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (A + (0-B)) -> A-B
|
|
|
|
if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
|
|
|
|
cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
|
2009-01-30 02:31:17 +00:00
|
|
|
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
|
2005-09-28 22:28:18 +00:00
|
|
|
// fold (A+(B-A)) -> B
|
|
|
|
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
|
2005-09-06 04:43:02 +00:00
|
|
|
return N1.getOperand(0);
|
2008-11-27 00:43:21 +00:00
|
|
|
// fold ((B-A)+A) -> B
|
|
|
|
if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
|
|
|
|
return N0.getOperand(0);
|
2008-12-02 01:30:54 +00:00
|
|
|
// fold (A+(B-(A+C))) to (B-C)
|
|
|
|
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
|
2009-01-30 02:31:17 +00:00
|
|
|
N0 == N1.getOperand(1).getOperand(0))
|
|
|
|
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
|
2008-12-02 01:30:54 +00:00
|
|
|
N1.getOperand(1).getOperand(1));
|
|
|
|
// fold (A+(B-(C+A))) to (B-C)
|
|
|
|
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
|
2009-01-30 02:31:17 +00:00
|
|
|
N0 == N1.getOperand(1).getOperand(1))
|
|
|
|
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
|
2008-12-02 01:30:54 +00:00
|
|
|
N1.getOperand(1).getOperand(0));
|
2008-12-23 23:47:22 +00:00
|
|
|
// fold (A+((B-A)+or-C)) to (B+or-C)
|
2008-12-02 18:40:40 +00:00
|
|
|
if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
|
|
|
|
N1.getOperand(0).getOpcode() == ISD::SUB &&
|
2009-01-30 02:31:17 +00:00
|
|
|
N0 == N1.getOperand(0).getOperand(1))
|
|
|
|
return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
|
|
|
|
N1.getOperand(0).getOperand(0), N1.getOperand(1));
|
2008-12-02 18:40:40 +00:00
|
|
|
|
2008-12-02 01:30:54 +00:00
|
|
|
// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
|
|
|
|
if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
|
|
|
|
SDValue N00 = N0.getOperand(0);
|
|
|
|
SDValue N01 = N0.getOperand(1);
|
|
|
|
SDValue N10 = N1.getOperand(0);
|
|
|
|
SDValue N11 = N1.getOperand(1);
|
2009-01-30 02:31:17 +00:00
|
|
|
|
|
|
|
if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
|
|
|
|
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
|
|
|
|
DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
|
|
|
|
DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
|
2008-12-02 01:30:54 +00:00
|
|
|
}
|
2006-03-13 06:51:27 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
|
|
|
|
return SDValue(N, 0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-03-13 06:51:27 +00:00
|
|
|
// fold (a+b) -> (a|b) iff a and b share no bits.
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isInteger() && !VT.isVector()) {
|
2008-02-20 16:33:30 +00:00
|
|
|
APInt LHSZero, LHSOne;
|
|
|
|
APInt RHSZero, RHSOne;
|
2008-06-06 12:08:01 +00:00
|
|
|
APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
|
2007-06-22 14:59:07 +00:00
|
|
|
DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
|
2009-01-30 02:31:17 +00:00
|
|
|
|
2008-02-20 16:33:30 +00:00
|
|
|
if (LHSZero.getBoolValue()) {
|
2007-06-22 14:59:07 +00:00
|
|
|
DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-03-13 06:51:27 +00:00
|
|
|
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
|
|
|
|
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
|
|
|
|
if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
|
|
|
|
(LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
|
2009-01-30 02:31:17 +00:00
|
|
|
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
|
2006-03-13 06:51:27 +00:00
|
|
|
}
|
|
|
|
}
|
2006-11-06 08:14:30 +00:00
|
|
|
|
Remove this xform:
(shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2)
Replace it with:
(add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
This fixes test/CodeGen/ARM/smul.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@33361 91177308-0d34-0410-b5e6-96231b3b80d8
2007-01-19 17:51:44 +00:00
|
|
|
// fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
|
2008-08-28 21:40:38 +00:00
|
|
|
if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
|
2009-01-30 02:23:43 +00:00
|
|
|
SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Result.getNode()) return Result;
|
Remove this xform:
(shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2)
Replace it with:
(add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
This fixes test/CodeGen/ARM/smul.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@33361 91177308-0d34-0410-b5e6-96231b3b80d8
2007-01-19 17:51:44 +00:00
|
|
|
}
|
2008-08-28 21:40:38 +00:00
|
|
|
if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
|
2009-01-30 02:23:43 +00:00
|
|
|
SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Result.getNode()) return Result;
|
Remove this xform:
(shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2)
Replace it with:
(add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
This fixes test/CodeGen/ARM/smul.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@33361 91177308-0d34-0410-b5e6-96231b3b80d8
2007-01-19 17:51:44 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitADDC(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2007-03-04 20:03:15 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N0.getValueType();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-03-04 20:03:15 +00:00
|
|
|
// If the flag result is dead, turn this into an ADD.
|
|
|
|
if (N->hasNUsesOfValue(0, 1))
|
2009-01-30 02:38:00 +00:00
|
|
|
return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
|
2009-06-02 03:12:52 +00:00
|
|
|
DAG.getNode(ISD::CARRY_FALSE,
|
2009-08-11 20:47:22 +00:00
|
|
|
N->getDebugLoc(), MVT::Flag));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-03-04 20:03:15 +00:00
|
|
|
// canonicalize constant to RHS.
|
2008-06-23 15:29:14 +00:00
|
|
|
if (N0C && !N1C)
|
2009-01-30 02:38:00 +00:00
|
|
|
return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-03-04 20:40:38 +00:00
|
|
|
// fold (addc x, 0) -> x + no carry out
|
|
|
|
if (N1C && N1C->isNullValue())
|
2009-06-02 03:12:52 +00:00
|
|
|
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
|
2009-08-11 20:47:22 +00:00
|
|
|
N->getDebugLoc(), MVT::Flag));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-06-02 03:12:52 +00:00
|
|
|
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
|
2008-02-20 16:33:30 +00:00
|
|
|
APInt LHSZero, LHSOne;
|
|
|
|
APInt RHSZero, RHSOne;
|
2008-06-06 12:08:01 +00:00
|
|
|
APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
|
2007-06-22 14:59:07 +00:00
|
|
|
DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
|
2009-01-30 02:38:00 +00:00
|
|
|
|
2008-02-20 16:33:30 +00:00
|
|
|
if (LHSZero.getBoolValue()) {
|
2007-06-22 14:59:07 +00:00
|
|
|
DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-03-04 20:40:38 +00:00
|
|
|
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
|
|
|
|
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
|
|
|
|
if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
|
|
|
|
(LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
|
2009-01-30 02:38:00 +00:00
|
|
|
return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
|
2009-06-02 03:12:52 +00:00
|
|
|
DAG.getNode(ISD::CARRY_FALSE,
|
2009-08-11 20:47:22 +00:00
|
|
|
N->getDebugLoc(), MVT::Flag));
|
2007-03-04 20:40:38 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-03-04 20:03:15 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitADDE(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
|
|
|
SDValue CarryIn = N->getOperand(2);
|
2007-03-04 20:03:15 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-03-04 20:03:15 +00:00
|
|
|
// canonicalize constant to RHS
|
2008-06-23 15:29:14 +00:00
|
|
|
if (N0C && !N1C)
|
2009-01-30 02:38:00 +00:00
|
|
|
return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
|
|
|
|
N1, N0, CarryIn);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-03-04 20:40:38 +00:00
|
|
|
// fold (adde x, y, false) -> (addc x, y)
|
2009-06-02 03:12:52 +00:00
|
|
|
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
|
|
|
|
return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-03-04 20:03:15 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSUB(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2008-08-28 21:40:38 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N0.getValueType();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2008-12-10 22:36:00 +00:00
|
|
|
|
2005-10-17 01:07:11 +00:00
|
|
|
// fold (sub x, x) -> 0
|
2008-03-12 07:02:50 +00:00
|
|
|
if (N0 == N1)
|
2005-10-17 01:07:11 +00:00
|
|
|
return DAG.getConstant(0, N->getValueType(0));
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (sub c1, c2) -> c1-c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C)
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
|
2005-10-11 06:07:15 +00:00
|
|
|
// fold (sub x, c) -> (add x, -c)
|
|
|
|
if (N1C)
|
2009-01-30 02:42:10 +00:00
|
|
|
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
|
2008-03-13 22:13:53 +00:00
|
|
|
DAG.getConstant(-N1C->getAPIntValue(), VT));
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (A+B)-A -> B
|
2005-09-28 22:28:18 +00:00
|
|
|
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0.getOperand(1);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (A+B)-B -> A
|
2005-09-28 22:28:18 +00:00
|
|
|
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
|
2009-02-17 22:15:04 +00:00
|
|
|
return N0.getOperand(0);
|
2008-12-23 23:47:22 +00:00
|
|
|
// fold ((A+(B+or-C))-B) -> A+or-C
|
2008-12-16 22:13:49 +00:00
|
|
|
if (N0.getOpcode() == ISD::ADD &&
|
2008-12-23 23:01:27 +00:00
|
|
|
(N0.getOperand(1).getOpcode() == ISD::SUB ||
|
|
|
|
N0.getOperand(1).getOpcode() == ISD::ADD) &&
|
2008-12-16 22:13:49 +00:00
|
|
|
N0.getOperand(1).getOperand(0) == N1)
|
2009-01-30 02:42:10 +00:00
|
|
|
return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N0.getOperand(1).getOperand(1));
|
2008-12-23 23:01:27 +00:00
|
|
|
// fold ((A+(C+B))-B) -> A+C
|
|
|
|
if (N0.getOpcode() == ISD::ADD &&
|
|
|
|
N0.getOperand(1).getOpcode() == ISD::ADD &&
|
|
|
|
N0.getOperand(1).getOperand(1) == N1)
|
2009-01-30 02:42:10 +00:00
|
|
|
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N0.getOperand(1).getOperand(0));
|
2008-12-23 01:59:54 +00:00
|
|
|
// fold ((A-(B-C))-C) -> A-B
|
|
|
|
if (N0.getOpcode() == ISD::SUB &&
|
|
|
|
N0.getOperand(1).getOpcode() == ISD::SUB &&
|
|
|
|
N0.getOperand(1).getOperand(1) == N1)
|
2009-01-30 02:42:10 +00:00
|
|
|
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N0.getOperand(1).getOperand(0));
|
|
|
|
|
2007-07-03 14:03:57 +00:00
|
|
|
// If either operand of a sub is undef, the result is undef
|
2007-07-10 15:19:29 +00:00
|
|
|
if (N0.getOpcode() == ISD::UNDEF)
|
|
|
|
return N0;
|
|
|
|
if (N1.getOpcode() == ISD::UNDEF)
|
|
|
|
return N1;
|
2007-06-25 16:23:39 +00:00
|
|
|
|
Teach DAGCombine to fold constant offsets into GlobalAddress nodes,
and add a TargetLowering hook for it to use to determine when this
is legal (i.e. not in PIC mode, etc.)
This allows instruction selection to emit folded constant offsets
in more cases, such as the included testcase, eliminating the need
for explicit arithmetic instructions.
This eliminates the need for the C++ code in X86ISelDAGToDAG.cpp
that attempted to achieve the same effect, but wasn't as effective.
Also, fix handling of offsets in GlobalAddressSDNodes in several
places, including changing GlobalAddressSDNode's offset from
int to int64_t.
The Mips, Alpha, Sparc, and CellSPU targets appear to be
unaware of GlobalAddress offsets currently, so set the hook to
false on those targets.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@57748 91177308-0d34-0410-b5e6-96231b3b80d8
2008-10-18 02:06:02 +00:00
|
|
|
// If the relocation model supports it, consider symbol offsets.
|
|
|
|
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
|
2008-11-24 14:53:14 +00:00
|
|
|
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
|
Teach DAGCombine to fold constant offsets into GlobalAddress nodes,
and add a TargetLowering hook for it to use to determine when this
is legal (i.e. not in PIC mode, etc.)
This allows instruction selection to emit folded constant offsets
in more cases, such as the included testcase, eliminating the need
for explicit arithmetic instructions.
This eliminates the need for the C++ code in X86ISelDAGToDAG.cpp
that attempted to achieve the same effect, but wasn't as effective.
Also, fix handling of offsets in GlobalAddressSDNodes in several
places, including changing GlobalAddressSDNode's offset from
int to int64_t.
The Mips, Alpha, Sparc, and CellSPU targets appear to be
unaware of GlobalAddress offsets currently, so set the hook to
false on those targets.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@57748 91177308-0d34-0410-b5e6-96231b3b80d8
2008-10-18 02:06:02 +00:00
|
|
|
// fold (sub Sym, c) -> Sym-c
|
|
|
|
if (N1C && GA->getOpcode() == ISD::GlobalAddress)
|
|
|
|
return DAG.getGlobalAddress(GA->getGlobal(), VT,
|
|
|
|
GA->getOffset() -
|
|
|
|
(uint64_t)N1C->getSExtValue());
|
|
|
|
// fold (sub Sym+c1, Sym+c2) -> c1-c2
|
|
|
|
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
|
|
|
|
if (GA->getGlobal() == GB->getGlobal())
|
|
|
|
return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
|
|
|
|
VT);
|
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitMUL(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N0.getValueType();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-07-03 14:03:57 +00:00
|
|
|
// fold (mul x, undef) -> 0
|
2007-07-10 14:20:37 +00:00
|
|
|
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
|
2007-07-03 14:03:57 +00:00
|
|
|
return DAG.getConstant(0, VT);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (mul c1, c2) -> c1*c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C)
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
|
2005-09-07 23:25:52 +00:00
|
|
|
// canonicalize constant to RHS
|
2005-10-18 00:28:13 +00:00
|
|
|
if (N0C && !N1C)
|
2009-01-30 02:45:56 +00:00
|
|
|
return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (mul x, 0) -> 0
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N1;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (mul x, -1) -> 0-x
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isAllOnesValue())
|
2009-01-30 02:45:56 +00:00
|
|
|
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
|
|
|
|
DAG.getConstant(0, VT), N0);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (mul x, (1 << c)) -> x << c
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N1C && N1C->getAPIntValue().isPowerOf2())
|
2009-01-30 02:45:56 +00:00
|
|
|
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
|
2008-03-13 22:13:53 +00:00
|
|
|
DAG.getConstant(N1C->getAPIntValue().logBase2(),
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2005-10-30 06:41:49 +00:00
|
|
|
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
|
2009-03-09 20:22:18 +00:00
|
|
|
if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
|
|
|
|
unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
|
2009-02-17 22:15:04 +00:00
|
|
|
// FIXME: If the input is something that is easily negated (e.g. a
|
2005-10-30 06:41:49 +00:00
|
|
|
// single-use add), we should put the negate there.
|
2009-01-30 02:45:56 +00:00
|
|
|
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
|
|
|
|
DAG.getConstant(0, VT),
|
2009-01-30 02:49:26 +00:00
|
|
|
DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
|
2009-03-09 20:22:18 +00:00
|
|
|
DAG.getConstant(Log2Val, getShiftAmountTy())));
|
|
|
|
}
|
2006-03-01 03:44:24 +00:00
|
|
|
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
|
2009-01-30 02:49:26 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::SHL &&
|
2006-03-01 03:44:24 +00:00
|
|
|
isa<ConstantSDNode>(N0.getOperand(1))) {
|
2009-01-30 02:45:56 +00:00
|
|
|
SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
|
|
|
|
N1, N0.getOperand(1));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(C3.getNode());
|
2009-01-30 02:45:56 +00:00
|
|
|
return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), C3);
|
2006-03-01 03:44:24 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-03-01 03:44:24 +00:00
|
|
|
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
|
|
|
|
// use.
|
|
|
|
{
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Sh(0,0), Y(0,0);
|
2006-03-01 03:44:24 +00:00
|
|
|
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
|
|
|
|
if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
|
2008-08-28 21:40:38 +00:00
|
|
|
N0.getNode()->hasOneUse()) {
|
2006-03-01 03:44:24 +00:00
|
|
|
Sh = N0; Y = N1;
|
2009-02-17 22:15:04 +00:00
|
|
|
} else if (N1.getOpcode() == ISD::SHL &&
|
2008-08-30 19:29:20 +00:00
|
|
|
isa<ConstantSDNode>(N1.getOperand(1)) &&
|
|
|
|
N1.getNode()->hasOneUse()) {
|
2006-03-01 03:44:24 +00:00
|
|
|
Sh = N1; Y = N0;
|
|
|
|
}
|
2009-01-30 02:49:26 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Sh.getNode()) {
|
2009-01-30 02:45:56 +00:00
|
|
|
SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
|
|
|
|
Sh.getOperand(0), Y);
|
|
|
|
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
|
|
|
|
Mul, Sh.getOperand(1));
|
2006-03-01 03:44:24 +00:00
|
|
|
}
|
|
|
|
}
|
2009-01-30 02:49:26 +00:00
|
|
|
|
fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
fold (shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2)
This allows us to compile CodeGen/PowerPC/addi-reassoc.ll into:
_test1:
slwi r2, r4, 4
add r2, r2, r3
lwz r3, 36(r2)
blr
_test2:
mulli r2, r4, 5
add r2, r2, r3
lbz r2, 11(r2)
extsb r3, r2
blr
instead of:
_test1:
addi r2, r4, 2
slwi r2, r2, 4
add r2, r3, r2
lwz r3, 4(r2)
blr
_test2:
addi r2, r4, 2
mulli r2, r2, 5
add r2, r3, r2
lbz r2, 1(r2)
extsb r3, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26535 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-04 23:33:26 +00:00
|
|
|
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
|
2009-01-30 02:45:56 +00:00
|
|
|
isa<ConstantSDNode>(N0.getOperand(1)))
|
|
|
|
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
|
|
|
|
DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N1),
|
|
|
|
DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
|
|
|
|
N0.getOperand(1), N1));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-02-03 06:46:56 +00:00
|
|
|
// reassociate mul
|
2009-01-30 00:45:56 +00:00
|
|
|
SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (RMUL.getNode() != 0)
|
2006-02-03 06:46:56 +00:00
|
|
|
return RMUL;
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSDIV(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2008-08-28 21:40:38 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2005-09-01 00:19:25 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (sdiv c1, c2) -> c1/c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C && !N1C->isNullValue())
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
|
2005-10-21 00:02:42 +00:00
|
|
|
// fold (sdiv X, 1) -> X
|
2008-09-26 21:54:37 +00:00
|
|
|
if (N1C && N1C->getSExtValue() == 1LL)
|
2005-10-21 00:02:42 +00:00
|
|
|
return N0;
|
|
|
|
// fold (sdiv X, -1) -> 0-X
|
|
|
|
if (N1C && N1C->isAllOnesValue())
|
2009-01-30 02:52:17 +00:00
|
|
|
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
|
|
|
|
DAG.getConstant(0, VT), N0);
|
2005-10-07 06:10:46 +00:00
|
|
|
// If we know the sign bits of both operands are zero, strength reduce to a
|
|
|
|
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
|
2008-06-06 12:08:01 +00:00
|
|
|
if (!VT.isVector()) {
|
2008-02-25 21:11:39 +00:00
|
|
|
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
|
2009-01-30 02:52:17 +00:00
|
|
|
return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
|
|
|
|
N0, N1);
|
2008-01-27 23:32:17 +00:00
|
|
|
}
|
2006-02-17 07:26:20 +00:00
|
|
|
// fold (sdiv X, pow2) -> simple ops after legalize
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
|
2009-02-17 22:15:04 +00:00
|
|
|
(isPowerOf2_64(N1C->getSExtValue()) ||
|
2008-09-26 21:54:37 +00:00
|
|
|
isPowerOf2_64(-N1C->getSExtValue()))) {
|
2005-10-21 00:02:42 +00:00
|
|
|
// If dividing by powers of two is cheap, then don't perform the following
|
|
|
|
// fold.
|
|
|
|
if (TLI.isPow2DivCheap())
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2009-01-30 02:52:17 +00:00
|
|
|
|
2008-09-26 21:54:37 +00:00
|
|
|
int64_t pow2 = N1C->getSExtValue();
|
2005-10-21 00:02:42 +00:00
|
|
|
int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
|
2006-02-16 08:02:36 +00:00
|
|
|
unsigned lg2 = Log2_64(abs2);
|
2009-01-30 02:52:17 +00:00
|
|
|
|
2006-02-16 08:02:36 +00:00
|
|
|
// Splat the sign bit into the register
|
2009-01-30 02:52:17 +00:00
|
|
|
SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
|
|
|
|
DAG.getConstant(VT.getSizeInBits()-1,
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(SGN.getNode());
|
2009-01-30 02:52:17 +00:00
|
|
|
|
2006-02-16 08:02:36 +00:00
|
|
|
// Add (N0 < 0) ? abs2 - 1 : 0;
|
2009-01-30 02:52:17 +00:00
|
|
|
SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
|
|
|
|
DAG.getConstant(VT.getSizeInBits() - lg2,
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2009-01-30 02:52:17 +00:00
|
|
|
SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(SRL.getNode());
|
|
|
|
AddToWorkList(ADD.getNode()); // Divide by pow2
|
2009-01-30 02:52:17 +00:00
|
|
|
SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
|
2009-01-31 15:50:11 +00:00
|
|
|
DAG.getConstant(lg2, getShiftAmountTy()));
|
2009-01-30 02:52:17 +00:00
|
|
|
|
2005-10-21 00:02:42 +00:00
|
|
|
// If we're dividing by a positive value, we're done. Otherwise, we must
|
|
|
|
// negate the result.
|
|
|
|
if (pow2 > 0)
|
|
|
|
return SRA;
|
2009-01-30 02:52:17 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(SRA.getNode());
|
2009-01-30 02:52:17 +00:00
|
|
|
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
|
|
|
|
DAG.getConstant(0, VT), SRA);
|
2005-10-21 00:02:42 +00:00
|
|
|
}
|
2009-01-30 02:52:17 +00:00
|
|
|
|
2005-10-20 02:15:44 +00:00
|
|
|
// if integer divide is expensive and we satisfy the requirements, emit an
|
|
|
|
// alternate sequence.
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
|
2005-10-22 18:50:15 +00:00
|
|
|
!TLI.isIntDivCheap()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Op = BuildSDIV(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Op.getNode()) return Op;
|
2005-10-20 02:15:44 +00:00
|
|
|
}
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2007-07-03 14:03:57 +00:00
|
|
|
// undef / X -> 0
|
|
|
|
if (N0.getOpcode() == ISD::UNDEF)
|
|
|
|
return DAG.getConstant(0, VT);
|
|
|
|
// X / undef -> undef
|
|
|
|
if (N1.getOpcode() == ISD::UNDEF)
|
|
|
|
return N1;
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitUDIV(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2008-08-28 21:40:38 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (udiv c1, c2) -> c1/c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C && !N1C->isNullValue())
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (udiv x, (1 << c)) -> x >>u c
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N1C && N1C->getAPIntValue().isPowerOf2())
|
2009-02-17 22:15:04 +00:00
|
|
|
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
|
2008-03-13 22:13:53 +00:00
|
|
|
DAG.getConstant(N1C->getAPIntValue().logBase2(),
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2006-02-05 07:20:23 +00:00
|
|
|
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
|
|
|
|
if (N1.getOpcode() == ISD::SHL) {
|
|
|
|
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
|
2008-03-13 22:13:53 +00:00
|
|
|
if (SHC->getAPIntValue().isPowerOf2()) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT ADDVT = N1.getOperand(1).getValueType();
|
2009-01-30 02:55:25 +00:00
|
|
|
SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
|
|
|
|
N1.getOperand(1),
|
|
|
|
DAG.getConstant(SHC->getAPIntValue()
|
|
|
|
.logBase2(),
|
|
|
|
ADDVT));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Add.getNode());
|
2009-01-30 02:55:25 +00:00
|
|
|
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
|
2006-02-05 07:20:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2005-10-20 02:15:44 +00:00
|
|
|
// fold (udiv x, c) -> alternate
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Op = BuildUDIV(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Op.getNode()) return Op;
|
2005-10-22 18:50:15 +00:00
|
|
|
}
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2007-07-03 14:03:57 +00:00
|
|
|
// undef / X -> 0
|
|
|
|
if (N0.getOpcode() == ISD::UNDEF)
|
|
|
|
return DAG.getConstant(0, VT);
|
|
|
|
// X / undef -> undef
|
|
|
|
if (N1.getOpcode() == ISD::UNDEF)
|
|
|
|
return N1;
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSREM(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (srem c1, c2) -> c1%c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C && !N1C->isNullValue())
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
|
2005-10-10 21:26:48 +00:00
|
|
|
// If we know the sign bits of both operands are zero, strength reduce to a
|
|
|
|
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
|
2008-06-06 12:08:01 +00:00
|
|
|
if (!VT.isVector()) {
|
2008-02-25 21:11:39 +00:00
|
|
|
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
|
2009-01-30 02:57:00 +00:00
|
|
|
return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
|
2008-01-27 23:21:58 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-11-26 23:46:11 +00:00
|
|
|
// If X/C can be simplified by the division-by-constant logic, lower
|
|
|
|
// X%C to the equivalent of X-X/C*C.
|
Lower X%C into X/C+stuff. This allows the 'division by a constant' logic to
apply to rems as well as divs. This fixes PR945 and speeds up ReedSolomon
from 14.57s to 10.90s (which is now faster than gcc).
It compiles CodeGen/X86/rem.ll into:
_test1:
subl $4, %esp
movl %esi, (%esp)
movl $2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
imull %ecx
addl %esi, %edx
movl %edx, %eax
shrl $31, %eax
sarl $7, %edx
addl %eax, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
_test2:
movl 4(%esp), %eax
movl %eax, %ecx
sarl $31, %ecx
shrl $24, %ecx
addl %eax, %ecx
andl $4294967040, %ecx
subl %ecx, %eax
ret
_test3:
subl $4, %esp
movl %esi, (%esp)
movl $2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
mull %ecx
shrl $7, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
instead of div/idiv instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30920 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-12 20:58:32 +00:00
|
|
|
if (N1C && !N1C->isNullValue()) {
|
2009-01-30 02:57:00 +00:00
|
|
|
SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Div.getNode());
|
|
|
|
SDValue OptimizedDiv = combine(Div.getNode());
|
|
|
|
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
|
2009-01-30 02:57:00 +00:00
|
|
|
SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
|
|
|
|
OptimizedDiv, N1);
|
|
|
|
SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Mul.getNode());
|
2007-11-26 23:46:11 +00:00
|
|
|
return Sub;
|
|
|
|
}
|
Lower X%C into X/C+stuff. This allows the 'division by a constant' logic to
apply to rems as well as divs. This fixes PR945 and speeds up ReedSolomon
from 14.57s to 10.90s (which is now faster than gcc).
It compiles CodeGen/X86/rem.ll into:
_test1:
subl $4, %esp
movl %esi, (%esp)
movl $2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
imull %ecx
addl %esi, %edx
movl %edx, %eax
shrl $31, %eax
sarl $7, %edx
addl %eax, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
_test2:
movl 4(%esp), %eax
movl %eax, %ecx
sarl $31, %ecx
shrl $24, %ecx
addl %eax, %ecx
andl $4294967040, %ecx
subl %ecx, %eax
ret
_test3:
subl $4, %esp
movl %esi, (%esp)
movl $2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
mull %ecx
shrl $7, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
instead of div/idiv instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30920 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-12 20:58:32 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-07-03 14:03:57 +00:00
|
|
|
// undef % X -> 0
|
|
|
|
if (N0.getOpcode() == ISD::UNDEF)
|
|
|
|
return DAG.getConstant(0, VT);
|
|
|
|
// X % undef -> undef
|
|
|
|
if (N1.getOpcode() == ISD::UNDEF)
|
|
|
|
return N1;
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitUREM(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (urem c1, c2) -> c1%c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C && !N1C->isNullValue())
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
|
2005-10-10 21:26:48 +00:00
|
|
|
// fold (urem x, pow2) -> (and x, pow2-1)
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
|
2009-01-30 02:57:00 +00:00
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
|
2008-03-13 22:13:53 +00:00
|
|
|
DAG.getConstant(N1C->getAPIntValue()-1,VT));
|
2006-02-05 07:36:48 +00:00
|
|
|
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
|
|
|
|
if (N1.getOpcode() == ISD::SHL) {
|
|
|
|
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
|
2008-03-13 22:13:53 +00:00
|
|
|
if (SHC->getAPIntValue().isPowerOf2()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Add =
|
2009-01-30 02:57:00 +00:00
|
|
|
DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
|
2008-06-06 12:08:01 +00:00
|
|
|
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
|
2008-03-13 22:13:53 +00:00
|
|
|
VT));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Add.getNode());
|
2009-01-30 02:57:00 +00:00
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
|
2006-02-05 07:36:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-11-26 23:46:11 +00:00
|
|
|
// If X/C can be simplified by the division-by-constant logic, lower
|
|
|
|
// X%C to the equivalent of X-X/C*C.
|
Lower X%C into X/C+stuff. This allows the 'division by a constant' logic to
apply to rems as well as divs. This fixes PR945 and speeds up ReedSolomon
from 14.57s to 10.90s (which is now faster than gcc).
It compiles CodeGen/X86/rem.ll into:
_test1:
subl $4, %esp
movl %esi, (%esp)
movl $2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
imull %ecx
addl %esi, %edx
movl %edx, %eax
shrl $31, %eax
sarl $7, %edx
addl %eax, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
_test2:
movl 4(%esp), %eax
movl %eax, %ecx
sarl $31, %ecx
shrl $24, %ecx
addl %eax, %ecx
andl $4294967040, %ecx
subl %ecx, %eax
ret
_test3:
subl $4, %esp
movl %esi, (%esp)
movl $2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
mull %ecx
shrl $7, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
instead of div/idiv instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30920 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-12 20:58:32 +00:00
|
|
|
if (N1C && !N1C->isNullValue()) {
|
2009-01-30 02:57:00 +00:00
|
|
|
SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
|
2008-09-08 16:59:01 +00:00
|
|
|
AddToWorkList(Div.getNode());
|
2008-08-28 21:40:38 +00:00
|
|
|
SDValue OptimizedDiv = combine(Div.getNode());
|
|
|
|
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
|
2009-01-30 02:57:00 +00:00
|
|
|
SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
|
|
|
|
OptimizedDiv, N1);
|
|
|
|
SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Mul.getNode());
|
2007-11-26 23:46:11 +00:00
|
|
|
return Sub;
|
|
|
|
}
|
Lower X%C into X/C+stuff. This allows the 'division by a constant' logic to
apply to rems as well as divs. This fixes PR945 and speeds up ReedSolomon
from 14.57s to 10.90s (which is now faster than gcc).
It compiles CodeGen/X86/rem.ll into:
_test1:
subl $4, %esp
movl %esi, (%esp)
movl $2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
imull %ecx
addl %esi, %edx
movl %edx, %eax
shrl $31, %eax
sarl $7, %edx
addl %eax, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
_test2:
movl 4(%esp), %eax
movl %eax, %ecx
sarl $31, %ecx
shrl $24, %ecx
addl %eax, %ecx
andl $4294967040, %ecx
subl %ecx, %eax
ret
_test3:
subl $4, %esp
movl %esi, (%esp)
movl $2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
mull %ecx
shrl $7, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
instead of div/idiv instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30920 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-12 20:58:32 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-07-03 14:03:57 +00:00
|
|
|
// undef % X -> 0
|
|
|
|
if (N0.getOpcode() == ISD::UNDEF)
|
|
|
|
return DAG.getConstant(0, VT);
|
|
|
|
// X % undef -> undef
|
|
|
|
if (N1.getOpcode() == ISD::UNDEF)
|
|
|
|
return N1;
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitMULHS(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (mulhs x, 0) -> 0
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N1;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (mulhs x, 1) -> (sra x, size(x)-1)
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N1C && N1C->getAPIntValue() == 1)
|
2009-01-30 03:00:18 +00:00
|
|
|
return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
|
|
|
|
DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2007-07-03 14:03:57 +00:00
|
|
|
// fold (mulhs x, undef) -> 0
|
2007-07-10 14:20:37 +00:00
|
|
|
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
|
2007-07-03 14:03:57 +00:00
|
|
|
return DAG.getConstant(0, VT);
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitMULHU(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (mulhu x, 0) -> 0
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N1;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (mulhu x, 1) -> 0
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N1C && N1C->getAPIntValue() == 1)
|
2005-09-06 04:43:02 +00:00
|
|
|
return DAG.getConstant(0, N0.getValueType());
|
2007-07-03 14:03:57 +00:00
|
|
|
// fold (mulhu x, undef) -> 0
|
2007-07-10 14:20:37 +00:00
|
|
|
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
|
2007-07-03 14:03:57 +00:00
|
|
|
return DAG.getConstant(0, VT);
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2007-10-08 17:57:15 +00:00
|
|
|
/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
|
|
|
|
/// compute two values. LoOp and HiOp give the opcodes for the two computations
|
|
|
|
/// that are being performed. Return true if a simplification was made.
|
|
|
|
///
|
2009-02-17 22:15:04 +00:00
|
|
|
SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
|
2008-07-27 21:46:04 +00:00
|
|
|
unsigned HiOp) {
|
2007-10-08 17:57:15 +00:00
|
|
|
// If the high half is not needed, just compute the low half.
|
2007-11-08 09:25:29 +00:00
|
|
|
bool HiExists = N->hasAnyUseOfValue(1);
|
|
|
|
if (!HiExists &&
|
2008-11-24 14:53:14 +00:00
|
|
|
(!LegalOperations ||
|
2007-10-08 17:57:15 +00:00
|
|
|
TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
|
2009-01-30 03:08:40 +00:00
|
|
|
SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
|
|
|
|
N->op_begin(), N->getNumOperands());
|
2008-01-26 01:09:19 +00:00
|
|
|
return CombineTo(N, Res, Res);
|
2007-10-08 17:57:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// If the low half is not needed, just compute the high half.
|
2007-11-08 09:25:29 +00:00
|
|
|
bool LoExists = N->hasAnyUseOfValue(0);
|
|
|
|
if (!LoExists &&
|
2008-11-24 14:53:14 +00:00
|
|
|
(!LegalOperations ||
|
2007-10-08 17:57:15 +00:00
|
|
|
TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
|
2009-01-30 03:08:40 +00:00
|
|
|
SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
|
|
|
|
N->op_begin(), N->getNumOperands());
|
2008-01-26 01:09:19 +00:00
|
|
|
return CombineTo(N, Res, Res);
|
2007-10-08 17:57:15 +00:00
|
|
|
}
|
|
|
|
|
2007-11-08 09:25:29 +00:00
|
|
|
// If both halves are used, return as it is.
|
|
|
|
if (LoExists && HiExists)
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-11-08 09:25:29 +00:00
|
|
|
|
|
|
|
// If the two computed results can be simplified separately, separate them.
|
|
|
|
if (LoExists) {
|
2009-01-30 03:08:40 +00:00
|
|
|
SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
|
|
|
|
N->op_begin(), N->getNumOperands());
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Lo.getNode());
|
|
|
|
SDValue LoOpt = combine(Lo.getNode());
|
|
|
|
if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
|
2008-11-24 14:53:14 +00:00
|
|
|
(!LegalOperations ||
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
|
2008-01-26 01:09:19 +00:00
|
|
|
return CombineTo(N, LoOpt, LoOpt);
|
2007-10-08 17:57:15 +00:00
|
|
|
}
|
2007-11-08 09:25:29 +00:00
|
|
|
|
|
|
|
if (HiExists) {
|
2009-01-30 03:08:40 +00:00
|
|
|
SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
|
2008-11-24 14:53:14 +00:00
|
|
|
N->op_begin(), N->getNumOperands());
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Hi.getNode());
|
|
|
|
SDValue HiOpt = combine(Hi.getNode());
|
|
|
|
if (HiOpt.getNode() && HiOpt != Hi &&
|
2008-11-24 14:53:14 +00:00
|
|
|
(!LegalOperations ||
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
|
2008-01-26 01:09:19 +00:00
|
|
|
return CombineTo(N, HiOpt, HiOpt);
|
2007-10-08 17:57:15 +00:00
|
|
|
}
|
2009-01-30 03:08:40 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-10-08 17:57:15 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
|
|
|
|
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Res.getNode()) return Res;
|
2007-10-08 17:57:15 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-10-08 17:57:15 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
|
|
|
|
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Res.getNode()) return Res;
|
2007-10-08 17:57:15 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-10-08 17:57:15 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
|
|
|
|
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Res.getNode()) return Res;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-10-08 17:57:15 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
|
|
|
|
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Res.getNode()) return Res;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-10-08 17:57:15 +00:00
|
|
|
}
|
|
|
|
|
2006-05-05 05:51:50 +00:00
|
|
|
/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
|
|
|
|
/// two operands of the same opcode, try to simplify it.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N0.getValueType();
|
2006-05-05 05:51:50 +00:00
|
|
|
assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Implement:
// fold (and (sext x), (sext y)) -> (sext (and x, y))
// fold (or (sext x), (sext y)) -> (sext (or x, y))
// fold (xor (sext x), (sext y)) -> (sext (xor x, y))
// fold (and (aext x), (aext y)) -> (aext (and x, y))
// fold (or (aext x), (aext y)) -> (aext (or x, y))
// fold (xor (aext x), (aext y)) -> (aext (xor x, y))
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28123 91177308-0d34-0410-b5e6-96231b3b80d8
2006-05-05 06:31:05 +00:00
|
|
|
// For each of OP in AND/OR/XOR:
|
|
|
|
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
|
|
|
|
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
|
|
|
|
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68576 91177308-0d34-0410-b5e6-96231b3b80d8
2009-04-08 00:15:30 +00:00
|
|
|
// fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
|
2009-12-03 07:11:29 +00:00
|
|
|
//
|
|
|
|
// do not sink logical op inside of a vector extend, since it may combine
|
|
|
|
// into a vsetcc.
|
Implement:
// fold (and (sext x), (sext y)) -> (sext (and x, y))
// fold (or (sext x), (sext y)) -> (sext (or x, y))
// fold (xor (sext x), (sext y)) -> (sext (xor x, y))
// fold (and (aext x), (aext y)) -> (aext (and x, y))
// fold (or (aext x), (aext y)) -> (aext (or x, y))
// fold (xor (aext x), (aext y)) -> (aext (xor x, y))
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28123 91177308-0d34-0410-b5e6-96231b3b80d8
2006-05-05 06:31:05 +00:00
|
|
|
if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68576 91177308-0d34-0410-b5e6-96231b3b80d8
2009-04-08 00:15:30 +00:00
|
|
|
N0.getOpcode() == ISD::SIGN_EXTEND ||
|
|
|
|
(N0.getOpcode() == ISD::TRUNCATE &&
|
|
|
|
!TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
|
2009-12-03 07:11:29 +00:00
|
|
|
!VT.isVector() &&
|
2009-08-08 20:42:17 +00:00
|
|
|
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
|
|
|
|
(!LegalOperations ||
|
|
|
|
TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) {
|
2009-01-30 19:25:47 +00:00
|
|
|
SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
|
|
|
|
N0.getOperand(0).getValueType(),
|
|
|
|
N0.getOperand(0), N1.getOperand(0));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(ORNode.getNode());
|
2009-01-30 19:25:47 +00:00
|
|
|
return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);
|
2006-05-05 05:51:50 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Pull and through and/or/xor. This compiles some bitfield code to:
mov EAX, DWORD PTR [ESP + 4]
mov ECX, DWORD PTR [EAX]
mov EDX, ECX
add EDX, EDX
or EDX, ECX
and EDX, -2147483648
and ECX, 2147483647
or EDX, ECX
mov DWORD PTR [EAX], EDX
ret
instead of:
sub ESP, 4
mov DWORD PTR [ESP], ESI
mov EAX, DWORD PTR [ESP + 8]
mov ECX, DWORD PTR [EAX]
mov EDX, ECX
add EDX, EDX
mov ESI, ECX
and ESI, -2147483648
and EDX, -2147483648
or EDX, ESI
and ECX, 2147483647
or EDX, ECX
mov DWORD PTR [EAX], EDX
mov ESI, DWORD PTR [ESP]
add ESP, 4
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28122 91177308-0d34-0410-b5e6-96231b3b80d8
2006-05-05 06:10:43 +00:00
|
|
|
// For each of OP in SHL/SRL/SRA/AND...
|
|
|
|
// fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
|
|
|
|
// fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
|
|
|
|
// fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
|
2006-05-05 05:51:50 +00:00
|
|
|
if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
|
Pull and through and/or/xor. This compiles some bitfield code to:
mov EAX, DWORD PTR [ESP + 4]
mov ECX, DWORD PTR [EAX]
mov EDX, ECX
add EDX, EDX
or EDX, ECX
and EDX, -2147483648
and ECX, 2147483647
or EDX, ECX
mov DWORD PTR [EAX], EDX
ret
instead of:
sub ESP, 4
mov DWORD PTR [ESP], ESI
mov EAX, DWORD PTR [ESP + 8]
mov ECX, DWORD PTR [EAX]
mov EDX, ECX
add EDX, EDX
mov ESI, ECX
and ESI, -2147483648
and EDX, -2147483648
or EDX, ESI
and ECX, 2147483647
or EDX, ECX
mov DWORD PTR [EAX], EDX
mov ESI, DWORD PTR [ESP]
add ESP, 4
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28122 91177308-0d34-0410-b5e6-96231b3b80d8
2006-05-05 06:10:43 +00:00
|
|
|
N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
|
2006-05-05 05:51:50 +00:00
|
|
|
N0.getOperand(1) == N1.getOperand(1)) {
|
2009-01-30 19:25:47 +00:00
|
|
|
SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
|
|
|
|
N0.getOperand(0).getValueType(),
|
|
|
|
N0.getOperand(0), N1.getOperand(0));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(ORNode.getNode());
|
2009-01-30 19:25:47 +00:00
|
|
|
return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
|
|
|
|
ORNode, N0.getOperand(1));
|
2006-05-05 05:51:50 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-05-05 05:51:50 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitAND(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
|
|
|
SDValue LL, LR, RL, RR, CC0, CC1;
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N1.getValueType();
|
2008-06-06 12:08:01 +00:00
|
|
|
unsigned BitWidth = VT.getSizeInBits();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-07-03 14:03:57 +00:00
|
|
|
// fold (and x, undef) -> 0
|
2007-07-10 14:20:37 +00:00
|
|
|
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
|
2007-07-03 14:03:57 +00:00
|
|
|
return DAG.getConstant(0, VT);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (and c1, c2) -> c1&c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C)
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
|
2005-09-07 23:25:52 +00:00
|
|
|
// canonicalize constant to RHS
|
2005-10-18 00:28:13 +00:00
|
|
|
if (N0C && !N1C)
|
2009-02-01 11:19:36 +00:00
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (and x, -1) -> x
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isAllOnesValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
|
|
|
// if (and x, c) is known to be zero, return 0
|
2008-07-27 21:46:04 +00:00
|
|
|
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
|
2008-02-25 21:11:39 +00:00
|
|
|
APInt::getAllOnesValue(BitWidth)))
|
2005-09-06 04:43:02 +00:00
|
|
|
return DAG.getConstant(0, VT);
|
2006-02-03 06:46:56 +00:00
|
|
|
// reassociate and
|
2009-01-30 00:45:56 +00:00
|
|
|
SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (RAND.getNode() != 0)
|
2006-02-03 06:46:56 +00:00
|
|
|
return RAND;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
|
2005-11-02 18:42:59 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::OR)
|
2005-09-01 00:19:25 +00:00
|
|
|
if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
|
2008-03-13 22:13:53 +00:00
|
|
|
if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N1;
|
Turn any_extend nodes into zero_extend nodes when it allows us to remove an
and instruction. This allows us to compile stuff like this:
bool %X(int %X) {
%Y = add int %X, 14
%Z = setne int %Y, 12345
ret bool %Z
}
to this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
ret
instead of this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
andl $1, %eax
ret
This occurs quite a bit with the X86 backend. For example, 25 times in
lambda, 30 times in 177.mesa, 14 times in galgel, 70 times in fma3d,
25 times in vpr, several hundred times in gcc, ~45 times in crafty,
~60 times in parser, ~140 times in eon, 110 times in perlbmk, 55 on gap,
16 times on bzip2, 14 times on twolf, and 1-2 times in many other SPEC2K
programs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25901 91177308-0d34-0410-b5e6-96231b3b80d8
2006-02-02 07:17:31 +00:00
|
|
|
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
|
|
|
|
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue N0Op0 = N0.getOperand(0);
|
2008-02-25 21:11:39 +00:00
|
|
|
APInt Mask = ~N1C->getAPIntValue();
|
|
|
|
Mask.trunc(N0Op0.getValueSizeInBits());
|
|
|
|
if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
|
2009-01-30 20:43:18 +00:00
|
|
|
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
|
|
|
|
N0.getValueType(), N0Op0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-03-01 21:47:21 +00:00
|
|
|
// Replace uses of the AND with uses of the Zero extend node.
|
|
|
|
CombineTo(N, Zext);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Turn any_extend nodes into zero_extend nodes when it allows us to remove an
and instruction. This allows us to compile stuff like this:
bool %X(int %X) {
%Y = add int %X, 14
%Z = setne int %Y, 12345
ret bool %Z
}
to this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
ret
instead of this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
andl $1, %eax
ret
This occurs quite a bit with the X86 backend. For example, 25 times in
lambda, 30 times in 177.mesa, 14 times in galgel, 70 times in fma3d,
25 times in vpr, several hundred times in gcc, ~45 times in crafty,
~60 times in parser, ~140 times in eon, 110 times in perlbmk, 55 on gap,
16 times on bzip2, 14 times on twolf, and 1-2 times in many other SPEC2K
programs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25901 91177308-0d34-0410-b5e6-96231b3b80d8
2006-02-02 07:17:31 +00:00
|
|
|
// We actually want to replace all uses of the any_extend with the
|
|
|
|
// zero_extend, to avoid duplicating things. This will later cause this
|
|
|
|
// AND to be folded.
|
2008-08-28 21:40:38 +00:00
|
|
|
CombineTo(N0.getNode(), Zext);
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
Turn any_extend nodes into zero_extend nodes when it allows us to remove an
and instruction. This allows us to compile stuff like this:
bool %X(int %X) {
%Y = add int %X, 14
%Z = setne int %Y, 12345
ret bool %Z
}
to this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
ret
instead of this:
_X:
cmpl $12331, 4(%esp)
setne %al
movzbl %al, %eax
andl $1, %eax
ret
This occurs quite a bit with the X86 backend. For example, 25 times in
lambda, 30 times in 177.mesa, 14 times in galgel, 70 times in fma3d,
25 times in vpr, several hundred times in gcc, ~45 times in crafty,
~60 times in parser, ~140 times in eon, 110 times in perlbmk, 55 on gap,
16 times on bzip2, 14 times on twolf, and 1-2 times in many other SPEC2K
programs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25901 91177308-0d34-0410-b5e6-96231b3b80d8
2006-02-02 07:17:31 +00:00
|
|
|
}
|
|
|
|
}
|
2005-09-09 19:49:52 +00:00
|
|
|
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
|
|
|
|
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
|
|
|
|
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
|
|
|
|
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-09 19:49:52 +00:00
|
|
|
if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
|
2008-06-06 12:08:01 +00:00
|
|
|
LL.getValueType().isInteger()) {
|
2009-01-30 20:43:18 +00:00
|
|
|
// fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
|
2008-03-13 22:13:53 +00:00
|
|
|
if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
|
2009-01-30 20:43:18 +00:00
|
|
|
SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
|
|
|
|
LR.getValueType(), LL, RL);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(ORNode.getNode());
|
2009-01-30 20:43:18 +00:00
|
|
|
return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
|
2005-09-09 19:49:52 +00:00
|
|
|
}
|
2009-01-30 20:43:18 +00:00
|
|
|
// fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
|
2005-09-09 19:49:52 +00:00
|
|
|
if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
|
2009-01-30 20:43:18 +00:00
|
|
|
SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),
|
|
|
|
LR.getValueType(), LL, RL);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(ANDNode.getNode());
|
2009-01-30 20:43:18 +00:00
|
|
|
return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
|
2005-09-09 19:49:52 +00:00
|
|
|
}
|
2009-01-30 20:43:18 +00:00
|
|
|
// fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
|
2005-09-09 19:49:52 +00:00
|
|
|
if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
|
2009-01-30 20:43:18 +00:00
|
|
|
SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
|
|
|
|
LR.getValueType(), LL, RL);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(ORNode.getNode());
|
2009-01-30 20:43:18 +00:00
|
|
|
return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
|
2005-09-09 19:49:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// canonicalize equivalent to ll == rl
|
|
|
|
if (LL == RR && LR == RL) {
|
|
|
|
Op1 = ISD::getSetCCSwappedOperands(Op1);
|
|
|
|
std::swap(RL, RR);
|
|
|
|
}
|
|
|
|
if (LL == RL && LR == RR) {
|
2008-06-06 12:08:01 +00:00
|
|
|
bool isInteger = LL.getValueType().isInteger();
|
2005-09-09 19:49:52 +00:00
|
|
|
ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
|
2008-10-28 07:11:07 +00:00
|
|
|
if (Result != ISD::SETCC_INVALID &&
|
2008-11-24 14:53:14 +00:00
|
|
|
(!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
|
2009-01-30 20:43:18 +00:00
|
|
|
return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
|
|
|
|
LL, LR, Result);
|
2005-09-09 19:49:52 +00:00
|
|
|
}
|
|
|
|
}
|
2006-05-05 05:51:50 +00:00
|
|
|
|
2009-01-30 20:43:18 +00:00
|
|
|
// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
|
2006-05-05 05:51:50 +00:00
|
|
|
if (N0.getOpcode() == N1.getOpcode()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Tmp.getNode()) return Tmp;
|
2005-09-16 00:54:12 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-02-03 22:24:05 +00:00
|
|
|
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
|
|
|
|
// fold (and (sra)) -> (and (srl)) when possible.
|
2008-06-06 12:08:01 +00:00
|
|
|
if (!VT.isVector() &&
|
2008-07-27 21:46:04 +00:00
|
|
|
SimplifyDemandedBits(SDValue(N, 0)))
|
|
|
|
return SDValue(N, 0);
|
2005-10-13 03:11:28 +00:00
|
|
|
// fold (zext_inreg (extload x)) -> (zextload x)
|
2008-08-28 21:40:38 +00:00
|
|
|
if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
|
2006-10-09 20:57:25 +00:00
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2009-09-23 21:02:20 +00:00
|
|
|
EVT MemVT = LN0->getMemoryVT();
|
2005-10-13 18:34:58 +00:00
|
|
|
// If we zero all the possible extended bits, then we can turn this into
|
|
|
|
// a zextload if we are running before legalize or the operation is legal.
|
2008-02-25 21:11:39 +00:00
|
|
|
unsigned BitWidth = N1.getValueSizeInBits();
|
|
|
|
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
|
2009-09-23 21:02:20 +00:00
|
|
|
BitWidth - MemVT.getSizeInBits())) &&
|
2008-11-24 14:53:14 +00:00
|
|
|
((!LegalOperations && !LN0->isVolatile()) ||
|
2009-09-23 21:02:20 +00:00
|
|
|
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
|
2009-01-30 20:43:18 +00:00
|
|
|
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
|
|
|
|
LN0->getChain(), LN0->getBasePtr(),
|
|
|
|
LN0->getSrcValue(),
|
2009-09-23 21:02:20 +00:00
|
|
|
LN0->getSrcValueOffset(), MemVT,
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->isVolatile(), LN0->getAlignment());
|
2006-03-01 04:03:14 +00:00
|
|
|
AddToWorkList(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2005-10-13 03:11:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
|
2008-08-28 21:40:38 +00:00
|
|
|
if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
|
2007-03-07 08:07:03 +00:00
|
|
|
N0.hasOneUse()) {
|
2006-10-09 20:57:25 +00:00
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2009-09-23 21:02:20 +00:00
|
|
|
EVT MemVT = LN0->getMemoryVT();
|
2005-10-13 18:34:58 +00:00
|
|
|
// If we zero all the possible extended bits, then we can turn this into
|
|
|
|
// a zextload if we are running before legalize or the operation is legal.
|
2008-02-25 21:11:39 +00:00
|
|
|
unsigned BitWidth = N1.getValueSizeInBits();
|
|
|
|
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
|
2009-09-23 21:02:20 +00:00
|
|
|
BitWidth - MemVT.getSizeInBits())) &&
|
2008-11-24 14:53:14 +00:00
|
|
|
((!LegalOperations && !LN0->isVolatile()) ||
|
2009-09-23 21:02:20 +00:00
|
|
|
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
|
2009-01-30 20:43:18 +00:00
|
|
|
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
|
|
|
|
LN0->getChain(),
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->getBasePtr(), LN0->getSrcValue(),
|
2009-09-23 21:02:20 +00:00
|
|
|
LN0->getSrcValueOffset(), MemVT,
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->isVolatile(), LN0->getAlignment());
|
2006-03-01 04:03:14 +00:00
|
|
|
AddToWorkList(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2005-10-13 03:11:28 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Compile:
unsigned foo4(unsigned short *P) { return *P & 255; }
unsigned foo5(short *P) { return *P & 255; }
to:
_foo4:
lbz r3,1(r3)
blr
_foo5:
lbz r3,1(r3)
blr
not:
_foo4:
lhz r2, 0(r3)
rlwinm r3, r2, 0, 24, 31
blr
_foo5:
lhz r2, 0(r3)
rlwinm r3, r2, 0, 24, 31
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26419 91177308-0d34-0410-b5e6-96231b3b80d8
2006-02-28 06:49:37 +00:00
|
|
|
// fold (and (load x), 255) -> (zextload x, i8)
|
|
|
|
// fold (and (extload x, i16), 255) -> (zextload x, i8)
|
2006-10-09 20:57:25 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::LOAD) {
|
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
|
|
|
if (LN0->getExtensionType() != ISD::SEXTLOAD &&
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
LN0->isUnindexed() && N0.hasOneUse() &&
|
|
|
|
// Do not change the width of a volatile load.
|
|
|
|
!LN0->isVolatile()) {
|
2009-08-11 20:47:22 +00:00
|
|
|
EVT ExtVT = MVT::Other;
|
2008-06-09 11:32:28 +00:00
|
|
|
uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
|
|
|
|
if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
|
2009-08-12 00:36:31 +00:00
|
|
|
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
|
2008-06-09 11:32:28 +00:00
|
|
|
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT LoadedVT = LN0->getMemoryVT();
|
2009-01-30 20:43:18 +00:00
|
|
|
|
2008-06-16 08:14:38 +00:00
|
|
|
// Do not generate loads of non-round integer types since these can
|
|
|
|
// be expensive (and would be wrong if the type is not byte sized).
|
2009-08-11 20:47:22 +00:00
|
|
|
if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
|
2009-08-10 22:56:29 +00:00
|
|
|
(!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
|
|
|
|
EVT PtrType = N0.getOperand(1).getValueType();
|
2009-01-30 20:43:18 +00:00
|
|
|
|
2006-10-09 20:57:25 +00:00
|
|
|
// For big endian targets, we need to add an offset to the pointer to
|
|
|
|
// load the correct bytes. For little endian systems, we merely need to
|
|
|
|
// read fewer bytes from the same pointer.
|
2009-09-23 21:07:02 +00:00
|
|
|
unsigned LVTStoreBytes = LoadedVT.getStoreSize();
|
|
|
|
unsigned EVTStoreBytes = ExtVT.getStoreSize();
|
2007-11-09 08:57:19 +00:00
|
|
|
unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
|
2007-10-28 12:59:45 +00:00
|
|
|
unsigned Alignment = LN0->getAlignment();
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue NewPtr = LN0->getBasePtr();
|
2009-01-30 20:43:18 +00:00
|
|
|
|
2008-02-11 10:37:04 +00:00
|
|
|
if (TLI.isBigEndian()) {
|
2009-01-31 03:12:48 +00:00
|
|
|
NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
|
2009-01-30 20:43:18 +00:00
|
|
|
NewPtr, DAG.getConstant(PtrOff, PtrType));
|
2007-10-28 12:59:45 +00:00
|
|
|
Alignment = MinAlign(Alignment, PtrOff);
|
|
|
|
}
|
2009-01-30 20:43:18 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(NewPtr.getNode());
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Load =
|
2009-01-30 20:43:18 +00:00
|
|
|
DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
|
|
|
|
NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
|
2009-08-10 22:56:29 +00:00
|
|
|
ExtVT, LN0->isVolatile(), Alignment);
|
2006-10-09 20:57:25 +00:00
|
|
|
AddToWorkList(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
CombineTo(N0.getNode(), Load, Load.getValue(1));
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2006-10-09 20:57:25 +00:00
|
|
|
}
|
Fold "and (LOAD P), 255" -> zextload. This allows us to compile:
unsigned foo3(unsigned *P) { return *P & 255; }
as:
_foo3:
lbz r3, 3(r3)
blr
instead of:
_foo3:
lwz r2, 0(r3)
rlwinm r3, r2, 0, 24, 31
blr
and:
unsigned short foo2(float a) { return a; }
as:
_foo2:
fctiwz f0, f1
stfd f0, -8(r1)
lhz r3, -2(r1)
blr
instead of:
_foo2:
fctiwz f0, f1
stfd f0, -8(r1)
lwz r2, -4(r1)
rlwinm r3, r2, 0, 16, 31
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26417 91177308-0d34-0410-b5e6-96231b3b80d8
2006-02-28 06:35:35 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitOR(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
|
|
|
SDValue LL, LR, RL, RR, CC0, CC1;
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N1.getValueType();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-07-03 14:03:57 +00:00
|
|
|
// fold (or x, undef) -> -1
|
2009-12-03 07:11:29 +00:00
|
|
|
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) {
|
|
|
|
EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
|
|
|
|
return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
|
|
|
|
}
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (or c1, c2) -> c1|c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C)
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
|
2005-09-07 23:25:52 +00:00
|
|
|
// canonicalize constant to RHS
|
2005-10-18 00:28:13 +00:00
|
|
|
if (N0C && !N1C)
|
2009-01-30 20:59:34 +00:00
|
|
|
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (or x, 0) -> x
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (or x, -1) -> -1
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isAllOnesValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N1;
|
|
|
|
// fold (or x, c) -> c iff (x & ~c) == 0
|
2008-02-25 21:11:39 +00:00
|
|
|
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
|
2005-09-06 04:43:02 +00:00
|
|
|
return N1;
|
2006-02-03 06:46:56 +00:00
|
|
|
// reassociate or
|
2009-01-30 00:45:56 +00:00
|
|
|
SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (ROR.getNode() != 0)
|
2006-02-03 06:46:56 +00:00
|
|
|
return ROR;
|
|
|
|
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
|
2008-08-28 21:40:38 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
|
2005-10-27 05:06:38 +00:00
|
|
|
isa<ConstantSDNode>(N0.getOperand(1))) {
|
|
|
|
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
|
2009-01-30 20:59:34 +00:00
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
|
|
|
|
DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N1),
|
|
|
|
DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
|
2005-09-08 20:18:10 +00:00
|
|
|
}
|
2005-09-09 19:49:52 +00:00
|
|
|
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
|
|
|
|
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
|
|
|
|
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
|
|
|
|
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-09 19:49:52 +00:00
|
|
|
if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
|
2008-06-06 12:08:01 +00:00
|
|
|
LL.getValueType().isInteger()) {
|
2009-01-30 20:59:34 +00:00
|
|
|
// fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
|
|
|
|
// fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
|
2009-02-17 22:15:04 +00:00
|
|
|
if (cast<ConstantSDNode>(LR)->isNullValue() &&
|
2005-09-09 19:49:52 +00:00
|
|
|
(Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
|
2009-01-30 20:59:34 +00:00
|
|
|
SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),
|
|
|
|
LR.getValueType(), LL, RL);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(ORNode.getNode());
|
2009-01-30 20:59:34 +00:00
|
|
|
return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
|
2005-09-09 19:49:52 +00:00
|
|
|
}
|
2009-01-30 20:59:34 +00:00
|
|
|
// fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
|
|
|
|
// fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
|
2009-02-17 22:15:04 +00:00
|
|
|
if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
|
2005-09-09 19:49:52 +00:00
|
|
|
(Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
|
2009-01-30 20:59:34 +00:00
|
|
|
SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),
|
|
|
|
LR.getValueType(), LL, RL);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(ANDNode.getNode());
|
2009-01-30 20:59:34 +00:00
|
|
|
return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
|
2005-09-09 19:49:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// canonicalize equivalent to ll == rl
|
|
|
|
if (LL == RR && LR == RL) {
|
|
|
|
Op1 = ISD::getSetCCSwappedOperands(Op1);
|
|
|
|
std::swap(RL, RR);
|
|
|
|
}
|
|
|
|
if (LL == RL && LR == RR) {
|
2008-06-06 12:08:01 +00:00
|
|
|
bool isInteger = LL.getValueType().isInteger();
|
2005-09-09 19:49:52 +00:00
|
|
|
ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
|
2008-10-28 07:11:07 +00:00
|
|
|
if (Result != ISD::SETCC_INVALID &&
|
2008-11-24 14:53:14 +00:00
|
|
|
(!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
|
2009-01-30 20:59:34 +00:00
|
|
|
return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
|
|
|
|
LL, LR, Result);
|
2005-09-09 19:49:52 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 20:59:34 +00:00
|
|
|
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
|
2006-05-05 05:51:50 +00:00
|
|
|
if (N0.getOpcode() == N1.getOpcode()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Tmp.getNode()) return Tmp;
|
2006-02-01 07:19:44 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 20:59:34 +00:00
|
|
|
// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
|
2006-09-14 21:11:37 +00:00
|
|
|
if (N0.getOpcode() == ISD::AND &&
|
|
|
|
N1.getOpcode() == ISD::AND &&
|
|
|
|
N0.getOperand(1).getOpcode() == ISD::Constant &&
|
|
|
|
N1.getOperand(1).getOpcode() == ISD::Constant &&
|
|
|
|
// Don't increase # computations.
|
2008-08-28 21:40:38 +00:00
|
|
|
(N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
|
2006-09-14 21:11:37 +00:00
|
|
|
// We can only do this xform if we know that bits from X that are set in C2
|
|
|
|
// but not in C1 are already zero. Likewise for Y.
|
2008-02-25 21:11:39 +00:00
|
|
|
const APInt &LHSMask =
|
|
|
|
cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
|
|
|
|
const APInt &RHSMask =
|
|
|
|
cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-22 14:59:07 +00:00
|
|
|
if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
|
|
|
|
DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
|
2009-01-30 20:59:34 +00:00
|
|
|
SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N1.getOperand(0));
|
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,
|
|
|
|
DAG.getConstant(LHSMask | RHSMask, VT));
|
2006-09-14 21:11:37 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-14 20:50:57 +00:00
|
|
|
// See if this is some rotate idiom.
|
2009-01-30 21:14:50 +00:00
|
|
|
if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(Rot, 0);
|
2006-05-05 05:51:50 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-09-14 20:50:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
|
2008-07-27 21:46:04 +00:00
|
|
|
static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
|
2006-09-14 20:50:57 +00:00
|
|
|
if (Op.getOpcode() == ISD::AND) {
|
2006-11-02 20:25:50 +00:00
|
|
|
if (isa<ConstantSDNode>(Op.getOperand(1))) {
|
2006-09-14 20:50:57 +00:00
|
|
|
Mask = Op.getOperand(1);
|
|
|
|
Op = Op.getOperand(0);
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-14 20:50:57 +00:00
|
|
|
if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
|
|
|
|
Shift = Op;
|
|
|
|
return true;
|
|
|
|
}
|
2009-01-30 20:59:34 +00:00
|
|
|
|
2009-02-17 22:15:04 +00:00
|
|
|
return false;
|
2006-09-14 20:50:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
|
|
|
|
// idioms for rotate, and if the target supports rotation instructions, generate
|
|
|
|
// a rot[lr].
|
2009-01-30 21:14:50 +00:00
|
|
|
SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = LHS.getValueType();
|
2006-09-14 20:50:57 +00:00
|
|
|
if (!TLI.isTypeLegal(VT)) return 0;
|
|
|
|
|
|
|
|
// The target must have at least one rotate flavor.
|
2009-01-28 17:46:25 +00:00
|
|
|
bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
|
|
|
|
bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
|
2006-09-14 20:50:57 +00:00
|
|
|
if (!HasROTL && !HasROTR) return 0;
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
|
2006-09-14 20:50:57 +00:00
|
|
|
// Match "(X shl/srl V1) & V2" where V2 may not be present.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue LHSShift; // The shift.
|
|
|
|
SDValue LHSMask; // AND value if any.
|
2006-09-14 20:50:57 +00:00
|
|
|
if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
|
|
|
|
return 0; // Not part of a rotate.
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue RHSShift; // The shift.
|
|
|
|
SDValue RHSMask; // AND value if any.
|
2006-09-14 20:50:57 +00:00
|
|
|
if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
|
|
|
|
return 0; // Not part of a rotate.
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-14 20:50:57 +00:00
|
|
|
if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
|
|
|
|
return 0; // Not shifting the same value.
|
|
|
|
|
|
|
|
if (LHSShift.getOpcode() == RHSShift.getOpcode())
|
|
|
|
return 0; // Shifts must disagree.
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-14 20:50:57 +00:00
|
|
|
// Canonicalize shl to left side in a shl/srl pair.
|
|
|
|
if (RHSShift.getOpcode() == ISD::SHL) {
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
std::swap(LHSShift, RHSShift);
|
|
|
|
std::swap(LHSMask , RHSMask );
|
|
|
|
}
|
|
|
|
|
2008-06-06 12:08:01 +00:00
|
|
|
unsigned OpSizeInBits = VT.getSizeInBits();
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue LHSShiftArg = LHSShift.getOperand(0);
|
|
|
|
SDValue LHSShiftAmt = LHSShift.getOperand(1);
|
|
|
|
SDValue RHSShiftAmt = RHSShift.getOperand(1);
|
2006-09-14 20:50:57 +00:00
|
|
|
|
|
|
|
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
|
|
|
|
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
|
2007-04-02 21:36:32 +00:00
|
|
|
if (LHSShiftAmt.getOpcode() == ISD::Constant &&
|
|
|
|
RHSShiftAmt.getOpcode() == ISD::Constant) {
|
2008-09-12 16:56:44 +00:00
|
|
|
uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
|
|
|
|
uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
|
2006-09-14 20:50:57 +00:00
|
|
|
if ((LShVal + RShVal) != OpSizeInBits)
|
|
|
|
return 0;
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Rot;
|
2006-09-14 20:50:57 +00:00
|
|
|
if (HasROTL)
|
2009-01-30 21:14:50 +00:00
|
|
|
Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
|
2006-09-14 20:50:57 +00:00
|
|
|
else
|
2009-01-30 21:14:50 +00:00
|
|
|
Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-14 20:50:57 +00:00
|
|
|
// If there is an AND of either shifted operand, apply it to the result.
|
2008-08-28 21:40:38 +00:00
|
|
|
if (LHSMask.getNode() || RHSMask.getNode()) {
|
2008-03-03 23:51:38 +00:00
|
|
|
APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
if (LHSMask.getNode()) {
|
2008-03-03 23:51:38 +00:00
|
|
|
APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
|
|
|
|
Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
|
2006-08-31 07:41:12 +00:00
|
|
|
}
|
2008-08-28 21:40:38 +00:00
|
|
|
if (RHSMask.getNode()) {
|
2008-03-03 23:51:38 +00:00
|
|
|
APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
|
|
|
|
Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
|
2006-09-14 20:50:57 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 21:14:50 +00:00
|
|
|
Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
|
2006-09-14 20:50:57 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
return Rot.getNode();
|
2006-09-14 20:50:57 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-14 20:50:57 +00:00
|
|
|
// If there is a mask here, and we have a variable shift, we can't be sure
|
|
|
|
// that we're masking out the right stuff.
|
2008-08-28 21:40:38 +00:00
|
|
|
if (LHSMask.getNode() || RHSMask.getNode())
|
2006-09-14 20:50:57 +00:00
|
|
|
return 0;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-14 20:50:57 +00:00
|
|
|
// fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
|
|
|
|
// fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
|
2007-04-02 21:36:32 +00:00
|
|
|
if (RHSShiftAmt.getOpcode() == ISD::SUB &&
|
|
|
|
LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
|
2009-02-17 22:15:04 +00:00
|
|
|
if (ConstantSDNode *SUBC =
|
2007-04-02 21:36:32 +00:00
|
|
|
dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
|
2008-03-13 22:13:53 +00:00
|
|
|
if (SUBC->getAPIntValue() == OpSizeInBits) {
|
2006-09-14 20:50:57 +00:00
|
|
|
if (HasROTL)
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(ISD::ROTL, DL, VT,
|
|
|
|
LHSShiftArg, LHSShiftAmt).getNode();
|
2006-09-14 20:50:57 +00:00
|
|
|
else
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(ISD::ROTR, DL, VT,
|
|
|
|
LHSShiftArg, RHSShiftAmt).getNode();
|
2008-02-20 11:10:28 +00:00
|
|
|
}
|
2006-08-31 07:41:12 +00:00
|
|
|
}
|
2006-01-11 21:21:00 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-14 20:50:57 +00:00
|
|
|
// fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
|
|
|
|
// fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
|
2007-04-02 21:36:32 +00:00
|
|
|
if (LHSShiftAmt.getOpcode() == ISD::SUB &&
|
|
|
|
RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
|
2009-02-17 22:15:04 +00:00
|
|
|
if (ConstantSDNode *SUBC =
|
2007-04-02 21:36:32 +00:00
|
|
|
dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
|
2008-03-13 22:13:53 +00:00
|
|
|
if (SUBC->getAPIntValue() == OpSizeInBits) {
|
2008-08-31 01:13:31 +00:00
|
|
|
if (HasROTR)
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(ISD::ROTR, DL, VT,
|
|
|
|
LHSShiftArg, RHSShiftAmt).getNode();
|
2008-08-31 01:13:31 +00:00
|
|
|
else
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(ISD::ROTL, DL, VT,
|
|
|
|
LHSShiftArg, LHSShiftAmt).getNode();
|
2008-02-20 11:10:28 +00:00
|
|
|
}
|
2007-04-02 21:36:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-10-17 01:23:35 +00:00
|
|
|
// Look for sign/zext/any-extended or truncate cases:
|
2007-04-02 21:36:32 +00:00
|
|
|
if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
|
|
|
|
|| LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
|
2008-10-17 01:23:35 +00:00
|
|
|
|| LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
|
|
|
|
|| LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
|
2007-04-02 21:36:32 +00:00
|
|
|
(RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
|
|
|
|
|| RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
|
2008-10-17 01:23:35 +00:00
|
|
|
|| RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
|
|
|
|
|| RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
|
|
|
|
SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
|
2007-04-02 21:36:32 +00:00
|
|
|
if (RExtOp0.getOpcode() == ISD::SUB &&
|
|
|
|
RExtOp0.getOperand(1) == LExtOp0) {
|
|
|
|
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
|
- Fix comment so that it describes how the code really works:
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
// (rotl x, y)
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
// (rotr x, (sub 32, y))
Example: (x == 0xDEADBEEF and y == 4)
(x << 4) | (x >> 28)
=> 0xEADBEEF0 | 0x0000000D
=> 0xEADBEEFD
(rotl x, 4)
=> 0xEADBEEFD
(rotr x, 28)
=> 0xEADBEEFD
- Fix comment and code for second version. It wasn't using the rot* propertly.
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
// (rotr x, y)
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
// (rotl x, (sub 32, y))
(x << 28) | (x >> 4)
=> 0xD0000000 | 0x0DEADBEE
=> 0xDDEADBEE
(rotl x, 4)
=> 0xEADBEEFD
(rotr x, 28)
=> (0xEADBEEFD)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55575 91177308-0d34-0410-b5e6-96231b3b80d8
2008-08-31 00:37:27 +00:00
|
|
|
// (rotl x, y)
|
2007-04-02 21:36:32 +00:00
|
|
|
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
|
- Fix comment so that it describes how the code really works:
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
// (rotl x, y)
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
// (rotr x, (sub 32, y))
Example: (x == 0xDEADBEEF and y == 4)
(x << 4) | (x >> 28)
=> 0xEADBEEF0 | 0x0000000D
=> 0xEADBEEFD
(rotl x, 4)
=> 0xEADBEEFD
(rotr x, 28)
=> 0xEADBEEFD
- Fix comment and code for second version. It wasn't using the rot* propertly.
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
// (rotr x, y)
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
// (rotl x, (sub 32, y))
(x << 28) | (x >> 4)
=> 0xD0000000 | 0x0DEADBEE
=> 0xDDEADBEE
(rotl x, 4)
=> 0xEADBEEFD
(rotr x, 28)
=> (0xEADBEEFD)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55575 91177308-0d34-0410-b5e6-96231b3b80d8
2008-08-31 00:37:27 +00:00
|
|
|
// (rotr x, (sub 32, y))
|
2008-10-17 01:23:35 +00:00
|
|
|
if (ConstantSDNode *SUBC =
|
|
|
|
dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
|
2008-03-13 22:13:53 +00:00
|
|
|
if (SUBC->getAPIntValue() == OpSizeInBits) {
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
|
|
|
|
LHSShiftArg,
|
2008-08-30 19:29:20 +00:00
|
|
|
HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
|
2007-04-02 21:36:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (LExtOp0.getOpcode() == ISD::SUB &&
|
|
|
|
RExtOp0 == LExtOp0.getOperand(1)) {
|
2009-02-17 22:15:04 +00:00
|
|
|
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
|
- Fix comment so that it describes how the code really works:
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
// (rotl x, y)
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
// (rotr x, (sub 32, y))
Example: (x == 0xDEADBEEF and y == 4)
(x << 4) | (x >> 28)
=> 0xEADBEEF0 | 0x0000000D
=> 0xEADBEEFD
(rotl x, 4)
=> 0xEADBEEFD
(rotr x, 28)
=> 0xEADBEEFD
- Fix comment and code for second version. It wasn't using the rot* propertly.
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
// (rotr x, y)
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
// (rotl x, (sub 32, y))
(x << 28) | (x >> 4)
=> 0xD0000000 | 0x0DEADBEE
=> 0xDDEADBEE
(rotl x, 4)
=> 0xEADBEEFD
(rotr x, 28)
=> (0xEADBEEFD)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55575 91177308-0d34-0410-b5e6-96231b3b80d8
2008-08-31 00:37:27 +00:00
|
|
|
// (rotr x, y)
|
2008-08-31 01:04:56 +00:00
|
|
|
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
|
- Fix comment so that it describes how the code really works:
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
// (rotl x, y)
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
// (rotr x, (sub 32, y))
Example: (x == 0xDEADBEEF and y == 4)
(x << 4) | (x >> 28)
=> 0xEADBEEF0 | 0x0000000D
=> 0xEADBEEFD
(rotl x, 4)
=> 0xEADBEEFD
(rotr x, 28)
=> 0xEADBEEFD
- Fix comment and code for second version. It wasn't using the rot* propertly.
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
// (rotr x, y)
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext r))) ->
// (rotl x, (sub 32, y))
(x << 28) | (x >> 4)
=> 0xD0000000 | 0x0DEADBEE
=> 0xDDEADBEE
(rotl x, 4)
=> 0xEADBEEFD
(rotr x, 28)
=> (0xEADBEEFD)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55575 91177308-0d34-0410-b5e6-96231b3b80d8
2008-08-31 00:37:27 +00:00
|
|
|
// (rotl x, (sub 32, y))
|
2008-10-17 01:23:35 +00:00
|
|
|
if (ConstantSDNode *SUBC =
|
|
|
|
dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
|
2008-03-13 22:13:53 +00:00
|
|
|
if (SUBC->getAPIntValue() == OpSizeInBits) {
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
|
|
|
|
LHSShiftArg,
|
2008-08-31 01:04:56 +00:00
|
|
|
HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
|
2007-04-02 21:36:32 +00:00
|
|
|
}
|
|
|
|
}
|
2006-09-14 20:50:57 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-14 20:50:57 +00:00
|
|
|
return 0;
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitXOR(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
|
|
|
SDValue LHS, RHS, CC;
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N0.getValueType();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-03-25 20:08:07 +00:00
|
|
|
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
|
|
|
|
if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
|
|
|
|
return DAG.getConstant(0, VT);
|
2007-07-03 14:03:57 +00:00
|
|
|
// fold (xor x, undef) -> undef
|
2007-07-10 15:19:29 +00:00
|
|
|
if (N0.getOpcode() == ISD::UNDEF)
|
|
|
|
return N0;
|
|
|
|
if (N1.getOpcode() == ISD::UNDEF)
|
2007-07-03 14:03:57 +00:00
|
|
|
return N1;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (xor c1, c2) -> c1^c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C)
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
|
2005-09-07 23:25:52 +00:00
|
|
|
// canonicalize constant to RHS
|
2005-10-18 00:28:13 +00:00
|
|
|
if (N0C && !N1C)
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (xor x, 0) -> x
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
2006-02-03 06:46:56 +00:00
|
|
|
// reassociate xor
|
2009-01-30 00:45:56 +00:00
|
|
|
SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (RXOR.getNode() != 0)
|
2006-02-03 06:46:56 +00:00
|
|
|
return RXOR;
|
2008-11-11 08:25:46 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold !(x cc y) -> (x !cc y)
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
|
2008-06-06 12:08:01 +00:00
|
|
|
bool isInt = LHS.getValueType().isInteger();
|
2005-09-02 21:18:40 +00:00
|
|
|
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
|
|
|
|
isInt);
|
2008-11-11 08:25:46 +00:00
|
|
|
|
2008-11-24 14:53:14 +00:00
|
|
|
if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
|
2008-11-11 08:25:46 +00:00
|
|
|
switch (N0.getOpcode()) {
|
|
|
|
default:
|
2009-07-14 16:55:14 +00:00
|
|
|
llvm_unreachable("Unhandled SetCC Equivalent!");
|
2008-11-11 08:25:46 +00:00
|
|
|
case ISD::SETCC:
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
|
2008-11-11 08:25:46 +00:00
|
|
|
case ISD::SELECT_CC:
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),
|
2008-11-11 08:25:46 +00:00
|
|
|
N0.getOperand(3), NotCC);
|
|
|
|
}
|
|
|
|
}
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
2008-11-11 08:25:46 +00:00
|
|
|
|
2007-09-10 21:39:07 +00:00
|
|
|
// fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
|
2008-08-30 19:29:20 +00:00
|
|
|
N0.getNode()->hasOneUse() &&
|
|
|
|
isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue V = N0.getOperand(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,
|
2007-10-10 09:54:50 +00:00
|
|
|
DAG.getConstant(1, V.getValueType()));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(V.getNode());
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);
|
2007-09-10 21:39:07 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 21:14:50 +00:00
|
|
|
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
|
2005-09-07 23:25:52 +00:00
|
|
|
(N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
|
2005-09-07 23:25:52 +00:00
|
|
|
if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
|
|
|
|
unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
|
2009-01-30 21:14:50 +00:00
|
|
|
LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
|
|
|
|
RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
}
|
2009-01-30 21:14:50 +00:00
|
|
|
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N1C && N1C->isAllOnesValue() &&
|
2005-09-07 23:25:52 +00:00
|
|
|
(N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
|
2005-09-07 23:25:52 +00:00
|
|
|
if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
|
|
|
|
unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
|
2009-01-30 21:14:50 +00:00
|
|
|
LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
|
|
|
|
RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
}
|
2009-01-30 21:14:50 +00:00
|
|
|
// fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
|
2005-09-08 20:18:10 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::XOR) {
|
|
|
|
ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
|
|
|
|
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
|
|
|
|
if (N00C)
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),
|
|
|
|
DAG.getConstant(N1C->getAPIntValue() ^
|
2008-03-13 22:13:53 +00:00
|
|
|
N00C->getAPIntValue(), VT));
|
2005-09-08 20:18:10 +00:00
|
|
|
if (N01C)
|
2009-01-30 21:14:50 +00:00
|
|
|
return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),
|
|
|
|
DAG.getConstant(N1C->getAPIntValue() ^
|
2008-03-13 22:13:53 +00:00
|
|
|
N01C->getAPIntValue(), VT));
|
2005-09-08 20:18:10 +00:00
|
|
|
}
|
|
|
|
// fold (xor x, x) -> 0
|
2006-03-28 19:11:05 +00:00
|
|
|
if (N0 == N1) {
|
2008-06-06 12:08:01 +00:00
|
|
|
if (!VT.isVector()) {
|
2006-03-28 19:11:05 +00:00
|
|
|
return DAG.getConstant(0, VT);
|
2008-11-24 14:53:14 +00:00
|
|
|
} else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
|
2006-03-28 19:11:05 +00:00
|
|
|
// Produce a vector of zeros.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue El = DAG.getConstant(0, VT.getVectorElementType());
|
|
|
|
std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
|
2009-02-25 22:49:59 +00:00
|
|
|
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
|
|
|
|
&Ops[0], Ops.size());
|
2006-03-28 19:11:05 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-05-05 05:51:50 +00:00
|
|
|
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
|
|
|
|
if (N0.getOpcode() == N1.getOpcode()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Tmp.getNode()) return Tmp;
|
2006-02-01 07:19:44 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-04-08 04:15:24 +00:00
|
|
|
// Simplify the expression using non-local knowledge.
|
2008-06-06 12:08:01 +00:00
|
|
|
if (!VT.isVector() &&
|
2008-07-27 21:46:04 +00:00
|
|
|
SimplifyDemandedBits(SDValue(N, 0)))
|
|
|
|
return SDValue(N, 0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
/// visitShiftByConstant - Handle transforms common to the three shifts, when
|
|
|
|
/// the shift amount is a constant.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
|
2008-08-28 21:40:38 +00:00
|
|
|
SDNode *LHS = N->getOperand(0).getNode();
|
2008-07-27 21:46:04 +00:00
|
|
|
if (!LHS->hasOneUse()) return SDValue();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
// We want to pull some binops through shifts, so that we have (and (shift))
|
|
|
|
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
|
|
|
|
// thing happens with address calculations, so it's important to canonicalize
|
|
|
|
// it.
|
|
|
|
bool HighBitSet = false; // Can we transform this if the high bit is set?
|
2009-02-17 22:15:04 +00:00
|
|
|
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
switch (LHS->getOpcode()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
default: return SDValue();
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
case ISD::OR:
|
|
|
|
case ISD::XOR:
|
|
|
|
HighBitSet = false; // We can only transform sra if the high bit is clear.
|
|
|
|
break;
|
|
|
|
case ISD::AND:
|
|
|
|
HighBitSet = true; // We can only transform sra if the high bit is set.
|
|
|
|
break;
|
|
|
|
case ISD::ADD:
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N->getOpcode() != ISD::SHL)
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(); // only shl(add) not sr[al](add).
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
HighBitSet = false; // We can only transform sra if the high bit is clear.
|
|
|
|
break;
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
// We require the RHS of the binop to be a constant as well.
|
|
|
|
ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
|
2008-07-27 21:46:04 +00:00
|
|
|
if (!BinOpCst) return SDValue();
|
2009-01-30 21:37:17 +00:00
|
|
|
|
|
|
|
// FIXME: disable this unless the input to the binop is a shift by a constant.
|
|
|
|
// If it is not a shift, it pessimizes some common cases like:
|
2007-12-06 07:47:55 +00:00
|
|
|
//
|
2009-01-30 21:37:17 +00:00
|
|
|
// void foo(int *X, int i) { X[i & 1235] = 1; }
|
|
|
|
// int bar(int *X, int i) { return X[i & 255]; }
|
2008-08-28 21:40:38 +00:00
|
|
|
SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
|
2009-02-17 22:15:04 +00:00
|
|
|
if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
|
2007-12-06 07:47:55 +00:00
|
|
|
BinOpLHSVal->getOpcode() != ISD::SRA &&
|
|
|
|
BinOpLHSVal->getOpcode() != ISD::SRL) ||
|
|
|
|
!isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 21:37:17 +00:00
|
|
|
// If this is a signed shift right, and the high bit is modified by the
|
|
|
|
// logical operation, do not perform the transformation. The highBitSet
|
|
|
|
// boolean indicates the value of the high bit of the constant which would
|
|
|
|
// cause it to be modified for this operation.
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
if (N->getOpcode() == ISD::SRA) {
|
2008-03-03 23:51:38 +00:00
|
|
|
bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
|
|
|
|
if (BinOpRHSSignSet != HighBitSet)
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
// Fold the constants, shifting the binop RHS by the shift amount.
|
2009-01-30 21:37:17 +00:00
|
|
|
SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),
|
|
|
|
N->getValueType(0),
|
|
|
|
LHS->getOperand(1), N->getOperand(1));
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
|
|
|
|
// Create the new shift.
|
2009-01-30 21:37:17 +00:00
|
|
|
SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
|
|
|
|
VT, LHS->getOperand(0), N->getOperand(1));
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
|
|
|
|
// Create the new binop.
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSHL(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N0.getValueType();
|
2009-12-11 21:31:27 +00:00
|
|
|
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (shl c1, c2) -> c1<<c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C)
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (shl 0, x) -> 0
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N0C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (shl x, c >= size(x)) -> undef
|
2008-09-12 16:56:44 +00:00
|
|
|
if (N1C && N1C->getZExtValue() >= OpSizeInBits)
|
2009-02-06 23:05:02 +00:00
|
|
|
return DAG.getUNDEF(VT);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (shl x, 0) -> x
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
2005-09-01 00:19:25 +00:00
|
|
|
// if (shl x, c) is known to be zero, return 0
|
2008-07-27 21:46:04 +00:00
|
|
|
if (DAG.MaskedValueIsZero(SDValue(N, 0),
|
2009-12-11 21:31:27 +00:00
|
|
|
APInt::getAllOnesValue(OpSizeInBits)))
|
2005-09-06 04:43:02 +00:00
|
|
|
return DAG.getConstant(0, VT);
|
2009-02-01 18:06:53 +00:00
|
|
|
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
|
2008-08-30 02:03:58 +00:00
|
|
|
if (N1.getOpcode() == ISD::TRUNCATE &&
|
2008-09-22 18:19:24 +00:00
|
|
|
N1.getOperand(0).getOpcode() == ISD::AND &&
|
|
|
|
N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
|
2008-08-30 02:03:58 +00:00
|
|
|
SDValue N101 = N1.getOperand(0).getOperand(1);
|
2008-09-22 18:19:24 +00:00
|
|
|
if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT TruncVT = N1.getValueType();
|
2008-09-22 18:19:24 +00:00
|
|
|
SDValue N100 = N1.getOperand(0).getOperand(0);
|
2009-02-01 18:06:53 +00:00
|
|
|
APInt TruncC = N101C->getAPIntValue();
|
|
|
|
TruncC.trunc(TruncVT.getSizeInBits());
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
|
2009-02-01 11:19:36 +00:00
|
|
|
DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
|
|
|
|
DAG.getNode(ISD::TRUNCATE,
|
|
|
|
N->getDebugLoc(),
|
|
|
|
TruncVT, N100),
|
2009-01-27 20:39:34 +00:00
|
|
|
DAG.getConstant(TruncC, TruncVT)));
|
2008-08-30 02:03:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
|
|
|
|
return SDValue(N, 0);
|
2009-01-30 21:37:17 +00:00
|
|
|
|
|
|
|
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::SHL &&
|
2005-09-01 00:19:25 +00:00
|
|
|
N0.getOperand(1).getOpcode() == ISD::Constant) {
|
2008-09-12 16:56:44 +00:00
|
|
|
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
|
|
|
|
uint64_t c2 = N1C->getZExtValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
if (c1 + c2 > OpSizeInBits)
|
2005-09-06 04:43:02 +00:00
|
|
|
return DAG.getConstant(0, VT);
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
|
2005-09-06 04:43:02 +00:00
|
|
|
DAG.getConstant(c1 + c2, N1.getValueType()));
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
2009-01-30 21:37:17 +00:00
|
|
|
// fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
|
|
|
|
// (srl (and x, (shl -1, c1)), (sub c1, c2))
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::SRL &&
|
2005-09-01 00:19:25 +00:00
|
|
|
N0.getOperand(1).getOpcode() == ISD::Constant) {
|
2008-09-12 16:56:44 +00:00
|
|
|
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
|
Fix a dagga combiner bug: avoid creating illegal constant.
Is this really a winning transformation?
fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
(srl (and x, (shl -1, c1)), (sub c1, c2))
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@76535 91177308-0d34-0410-b5e6-96231b3b80d8
2009-07-21 05:40:15 +00:00
|
|
|
if (c1 < VT.getSizeInBits()) {
|
|
|
|
uint64_t c2 = N1C->getZExtValue();
|
2009-08-06 09:18:59 +00:00
|
|
|
SDValue HiBitsMask =
|
|
|
|
DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
|
|
|
|
VT.getSizeInBits() - c1),
|
|
|
|
VT);
|
Fix a dagga combiner bug: avoid creating illegal constant.
Is this really a winning transformation?
fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
(srl (and x, (shl -1, c1)), (sub c1, c2))
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@76535 91177308-0d34-0410-b5e6-96231b3b80d8
2009-07-21 05:40:15 +00:00
|
|
|
SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0),
|
2009-08-06 09:18:59 +00:00
|
|
|
HiBitsMask);
|
Fix a dagga combiner bug: avoid creating illegal constant.
Is this really a winning transformation?
fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
(srl (and x, (shl -1, c1)), (sub c1, c2))
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@76535 91177308-0d34-0410-b5e6-96231b3b80d8
2009-07-21 05:40:15 +00:00
|
|
|
if (c2 > c1)
|
|
|
|
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
|
|
|
|
DAG.getConstant(c2-c1, N1.getValueType()));
|
|
|
|
else
|
|
|
|
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
|
|
|
|
DAG.getConstant(c1-c2, N1.getValueType()));
|
|
|
|
}
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
2009-01-30 21:37:17 +00:00
|
|
|
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
|
2009-08-06 09:18:59 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
|
|
|
|
SDValue HiBitsMask =
|
|
|
|
DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
|
|
|
|
VT.getSizeInBits() -
|
|
|
|
N1C->getZExtValue()),
|
|
|
|
VT);
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
|
2009-08-06 09:18:59 +00:00
|
|
|
HiBitsMask);
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-09-12 16:56:44 +00:00
|
|
|
return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSRA(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N0.getValueType();
|
2009-12-11 21:31:27 +00:00
|
|
|
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 21:37:17 +00:00
|
|
|
// fold (sra c1, c2) -> (sra c1, c2)
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C)
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (sra 0, x) -> 0
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N0C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (sra -1, x) -> -1
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N0C->isAllOnesValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
2009-01-30 21:37:17 +00:00
|
|
|
// fold (sra x, (setge c, size(x))) -> undef
|
2009-12-11 21:31:27 +00:00
|
|
|
if (N1C && N1C->getZExtValue() >= OpSizeInBits)
|
2009-02-06 23:05:02 +00:00
|
|
|
return DAG.getUNDEF(VT);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (sra x, 0) -> x
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
2006-02-17 19:54:08 +00:00
|
|
|
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
|
|
|
|
// sext_inreg.
|
|
|
|
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
|
2009-12-11 21:31:27 +00:00
|
|
|
unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
|
2009-08-12 00:36:31 +00:00
|
|
|
EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
|
2008-11-24 14:53:14 +00:00
|
|
|
if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), DAG.getValueType(EVT));
|
2006-02-17 19:54:08 +00:00
|
|
|
}
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
|
2009-01-30 21:37:17 +00:00
|
|
|
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
|
2006-02-28 06:23:04 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::SRA) {
|
|
|
|
if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
|
2008-09-12 16:56:44 +00:00
|
|
|
unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
|
2009-12-11 21:31:27 +00:00
|
|
|
if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
|
2006-02-28 06:23:04 +00:00
|
|
|
DAG.getConstant(Sum, N1C->getValueType(0)));
|
|
|
|
}
|
|
|
|
}
|
2008-03-19 08:30:06 +00:00
|
|
|
|
2009-01-30 21:37:17 +00:00
|
|
|
// fold (sra (shl X, m), (sub result_size, n))
|
|
|
|
// -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
|
2009-02-17 22:15:04 +00:00
|
|
|
// result_size - n != m.
|
|
|
|
// If truncate is free for the target sext(shl) is likely to result in better
|
2008-03-20 04:31:39 +00:00
|
|
|
// code.
|
2008-03-19 08:30:06 +00:00
|
|
|
if (N0.getOpcode() == ISD::SHL) {
|
|
|
|
// Get the two constanst of the shifts, CN0 = m, CN = n.
|
|
|
|
const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
|
|
|
|
if (N01C && N1C) {
|
2008-03-20 04:31:39 +00:00
|
|
|
// Determine what the truncate's result bitsize and type would be.
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT TruncVT =
|
2009-12-11 21:31:27 +00:00
|
|
|
EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
|
2008-03-20 04:31:39 +00:00
|
|
|
// Determine the residual right-shift amount.
|
2009-05-23 17:29:48 +00:00
|
|
|
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
|
2009-02-17 22:15:04 +00:00
|
|
|
// If the shift is not a no-op (in which case this should be just a sign
|
|
|
|
// extend already), the truncated to type is legal, sign_extend is legal
|
2008-08-30 19:29:20 +00:00
|
|
|
// on that type, and the the truncate to that type is both legal and free,
|
2008-03-20 04:31:39 +00:00
|
|
|
// perform the transform.
|
2009-05-23 17:29:48 +00:00
|
|
|
if ((ShiftAmt > 0) &&
|
2009-01-28 17:46:25 +00:00
|
|
|
TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
|
|
|
|
TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
|
2008-03-20 02:18:41 +00:00
|
|
|
TLI.isTruncateFree(VT, TruncVT)) {
|
2008-03-20 04:31:39 +00:00
|
|
|
|
2009-01-31 15:50:11 +00:00
|
|
|
SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy());
|
2009-01-30 21:37:17 +00:00
|
|
|
SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), Amt);
|
|
|
|
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
|
|
|
|
Shift);
|
|
|
|
return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
|
|
|
|
N->getValueType(0), Trunc);
|
2008-03-19 08:30:06 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-02-01 18:06:53 +00:00
|
|
|
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
|
2008-08-30 02:03:58 +00:00
|
|
|
if (N1.getOpcode() == ISD::TRUNCATE &&
|
2008-09-22 18:19:24 +00:00
|
|
|
N1.getOperand(0).getOpcode() == ISD::AND &&
|
|
|
|
N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
|
2008-08-30 02:03:58 +00:00
|
|
|
SDValue N101 = N1.getOperand(0).getOperand(1);
|
2008-09-22 18:19:24 +00:00
|
|
|
if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT TruncVT = N1.getValueType();
|
2008-09-22 18:19:24 +00:00
|
|
|
SDValue N100 = N1.getOperand(0).getOperand(0);
|
2009-02-01 18:06:53 +00:00
|
|
|
APInt TruncC = N101C->getAPIntValue();
|
2009-12-11 21:31:27 +00:00
|
|
|
TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
|
2009-01-31 03:12:48 +00:00
|
|
|
DAG.getNode(ISD::AND, N->getDebugLoc(),
|
2009-01-30 21:37:17 +00:00
|
|
|
TruncVT,
|
2009-01-31 03:12:48 +00:00
|
|
|
DAG.getNode(ISD::TRUNCATE,
|
|
|
|
N->getDebugLoc(),
|
|
|
|
TruncVT, N100),
|
2009-01-27 20:39:34 +00:00
|
|
|
DAG.getConstant(TruncC, TruncVT)));
|
2008-08-30 02:03:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-02-17 22:15:04 +00:00
|
|
|
// Simplify, based on bits shifted out of the LHS.
|
2008-07-27 21:46:04 +00:00
|
|
|
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
|
|
|
|
return SDValue(N, 0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// If the sign bit is known to be zero, switch this to a SRL.
|
2008-02-25 21:11:39 +00:00
|
|
|
if (DAG.SignBitIsZero(N0))
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
|
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
2007-12-06 07:33:36 +00:00
|
|
|
|
2008-09-12 16:56:44 +00:00
|
|
|
return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSRL(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N0.getValueType();
|
2009-12-11 21:31:27 +00:00
|
|
|
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (srl c1, c2) -> c1 >>u c2
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N1C)
|
2008-09-24 10:25:02 +00:00
|
|
|
return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (srl 0, x) -> 0
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0C && N0C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (srl x, c >= size(x)) -> undef
|
2008-09-12 16:56:44 +00:00
|
|
|
if (N1C && N1C->getZExtValue() >= OpSizeInBits)
|
2009-02-06 23:05:02 +00:00
|
|
|
return DAG.getUNDEF(VT);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (srl x, 0) -> x
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N1C && N1C->isNullValue())
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
2005-09-01 00:19:25 +00:00
|
|
|
// if (srl x, c) is known to be zero, return 0
|
2008-07-27 21:46:04 +00:00
|
|
|
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
|
2008-02-25 21:11:39 +00:00
|
|
|
APInt::getAllOnesValue(OpSizeInBits)))
|
2005-09-06 04:43:02 +00:00
|
|
|
return DAG.getConstant(0, VT);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 21:37:17 +00:00
|
|
|
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::SRL &&
|
2005-09-01 00:19:25 +00:00
|
|
|
N0.getOperand(1).getOpcode() == ISD::Constant) {
|
2008-09-12 16:56:44 +00:00
|
|
|
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
|
|
|
|
uint64_t c2 = N1C->getZExtValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
if (c1 + c2 > OpSizeInBits)
|
2005-09-06 04:43:02 +00:00
|
|
|
return DAG.getConstant(0, VT);
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
|
2005-09-06 04:43:02 +00:00
|
|
|
DAG.getConstant(c1 + c2, N1.getValueType()));
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-05-05 22:53:17 +00:00
|
|
|
// fold (srl (anyextend x), c) -> (anyextend (srl x, c))
|
|
|
|
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
|
|
|
|
// Shifting in all undef bits?
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT SmallVT = N0.getOperand(0).getValueType();
|
2008-09-12 16:56:44 +00:00
|
|
|
if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
|
2009-02-06 23:05:02 +00:00
|
|
|
return DAG.getUNDEF(VT);
|
2006-05-05 22:53:17 +00:00
|
|
|
|
2009-01-30 21:37:17 +00:00
|
|
|
SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
|
|
|
|
N0.getOperand(0), N1);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(SmallShift.getNode());
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
|
2006-05-05 22:53:17 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-12 20:23:19 +00:00
|
|
|
// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
|
|
|
|
// bit, which is unmodified by sra.
|
2009-01-30 21:37:17 +00:00
|
|
|
if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
|
2006-10-12 20:23:19 +00:00
|
|
|
if (N0.getOpcode() == ISD::SRA)
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);
|
2006-10-12 20:23:19 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Add a little dag combine to compile this:
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27356 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 06:11:11 +00:00
|
|
|
// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N1C && N0.getOpcode() == ISD::CTLZ &&
|
2008-06-06 12:08:01 +00:00
|
|
|
N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
|
2008-02-20 16:33:30 +00:00
|
|
|
APInt KnownZero, KnownOne;
|
2008-06-06 12:08:01 +00:00
|
|
|
APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
|
2007-06-22 14:59:07 +00:00
|
|
|
DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Add a little dag combine to compile this:
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27356 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 06:11:11 +00:00
|
|
|
// If any of the input bits are KnownOne, then the input couldn't be all
|
|
|
|
// zeros, thus the result of the srl will always be zero.
|
2008-02-20 16:33:30 +00:00
|
|
|
if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Add a little dag combine to compile this:
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27356 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 06:11:11 +00:00
|
|
|
// If all of the bits input the to ctlz node are known to be zero, then
|
|
|
|
// the result of the ctlz is "32" and the result of the shift is one.
|
2008-02-20 16:33:30 +00:00
|
|
|
APInt UnknownBits = ~KnownZero & Mask;
|
Add a little dag combine to compile this:
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27356 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 06:11:11 +00:00
|
|
|
if (UnknownBits == 0) return DAG.getConstant(1, VT);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Add a little dag combine to compile this:
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27356 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 06:11:11 +00:00
|
|
|
// Otherwise, check to see if there is exactly one bit input to the ctlz.
|
2009-01-30 21:37:17 +00:00
|
|
|
if ((UnknownBits & (UnknownBits - 1)) == 0) {
|
Add a little dag combine to compile this:
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27356 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 06:11:11 +00:00
|
|
|
// Okay, we know that only that the single bit specified by UnknownBits
|
2009-01-30 21:37:17 +00:00
|
|
|
// could be set on input to the CTLZ node. If this bit is set, the SRL
|
|
|
|
// will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
|
|
|
|
// to an SRL/XOR pair, which is likely to simplify more.
|
2008-02-20 16:33:30 +00:00
|
|
|
unsigned ShAmt = UnknownBits.countTrailingZeros();
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Op = N0.getOperand(0);
|
2009-01-30 21:37:17 +00:00
|
|
|
|
Add a little dag combine to compile this:
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27356 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 06:11:11 +00:00
|
|
|
if (ShAmt) {
|
2009-01-30 21:37:17 +00:00
|
|
|
Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
|
2009-01-31 15:50:11 +00:00
|
|
|
DAG.getConstant(ShAmt, getShiftAmountTy()));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Op.getNode());
|
Add a little dag combine to compile this:
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27356 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 06:11:11 +00:00
|
|
|
}
|
2009-01-30 21:37:17 +00:00
|
|
|
|
|
|
|
return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
|
|
|
|
Op, DAG.getConstant(1, VT));
|
Add a little dag combine to compile this:
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27356 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 06:11:11 +00:00
|
|
|
}
|
|
|
|
}
|
2008-08-30 02:03:58 +00:00
|
|
|
|
2009-02-01 18:06:53 +00:00
|
|
|
// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
|
2008-08-30 02:03:58 +00:00
|
|
|
if (N1.getOpcode() == ISD::TRUNCATE &&
|
2008-09-22 18:19:24 +00:00
|
|
|
N1.getOperand(0).getOpcode() == ISD::AND &&
|
|
|
|
N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
|
2008-08-30 02:03:58 +00:00
|
|
|
SDValue N101 = N1.getOperand(0).getOperand(1);
|
2008-09-22 18:19:24 +00:00
|
|
|
if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT TruncVT = N1.getValueType();
|
2008-09-22 18:19:24 +00:00
|
|
|
SDValue N100 = N1.getOperand(0).getOperand(0);
|
2009-02-01 18:06:53 +00:00
|
|
|
APInt TruncC = N101C->getAPIntValue();
|
|
|
|
TruncC.trunc(TruncVT.getSizeInBits());
|
2009-01-30 21:37:17 +00:00
|
|
|
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
|
2009-01-31 03:12:48 +00:00
|
|
|
DAG.getNode(ISD::AND, N->getDebugLoc(),
|
2009-01-30 21:37:17 +00:00
|
|
|
TruncVT,
|
2009-01-31 03:12:48 +00:00
|
|
|
DAG.getNode(ISD::TRUNCATE,
|
|
|
|
N->getDebugLoc(),
|
|
|
|
TruncVT, N100),
|
2009-01-27 20:39:34 +00:00
|
|
|
DAG.getConstant(TruncC, TruncVT)));
|
2008-08-30 02:03:58 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-04-18 03:06:49 +00:00
|
|
|
// fold operands of srl based on knowledge that the low bits are not
|
|
|
|
// demanded.
|
2008-07-27 21:46:04 +00:00
|
|
|
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
|
|
|
|
return SDValue(N, 0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-09-12 16:56:44 +00:00
|
|
|
return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitCTLZ(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2005-09-01 00:19:25 +00:00
|
|
|
|
|
|
|
// fold (ctlz c1) -> c2
|
2006-05-06 23:06:26 +00:00
|
|
|
if (isa<ConstantSDNode>(N0))
|
2009-01-30 22:02:18 +00:00
|
|
|
return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitCTTZ(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (cttz c1) -> c2
|
2006-05-06 23:06:26 +00:00
|
|
|
if (isa<ConstantSDNode>(N0))
|
2009-01-30 22:02:18 +00:00
|
|
|
return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (ctpop c1) -> c2
|
2006-05-06 23:06:26 +00:00
|
|
|
if (isa<ConstantSDNode>(N0))
|
2009-01-30 22:02:18 +00:00
|
|
|
return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSELECT(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
|
|
|
SDValue N2 = N->getOperand(2);
|
2005-09-16 00:54:12 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
|
|
|
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
|
|
|
EVT VT0 = N0.getValueType();
|
2005-09-19 22:34:01 +00:00
|
|
|
|
2009-01-30 22:02:18 +00:00
|
|
|
// fold (select C, X, X) -> X
|
2005-09-16 00:54:12 +00:00
|
|
|
if (N1 == N2)
|
|
|
|
return N1;
|
2009-01-30 22:02:18 +00:00
|
|
|
// fold (select true, X, Y) -> X
|
2005-09-16 00:54:12 +00:00
|
|
|
if (N0C && !N0C->isNullValue())
|
|
|
|
return N1;
|
2009-01-30 22:02:18 +00:00
|
|
|
// fold (select false, X, Y) -> Y
|
2005-09-16 00:54:12 +00:00
|
|
|
if (N0C && N0C->isNullValue())
|
|
|
|
return N2;
|
2009-01-30 22:02:18 +00:00
|
|
|
// fold (select C, 1, X) -> (or C, X)
|
2009-08-11 20:47:22 +00:00
|
|
|
if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
|
2009-01-30 22:02:18 +00:00
|
|
|
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
|
|
|
|
// fold (select C, 0, 1) -> (xor C, 1)
|
Fix a minor bug in DAGCombiner's folding of SELECT. Folding "select C, 0, 1"
to "C ^ 1" is only valid when C is known to be either 0 or 1. Most of the
similar foldings in this function only handle "i1" types, but this one appears
intentionally written to handle larger integer types. If C has an integer
type larger than "i1", this needs to check if the high bits of a boolean
are known to be zero. I also changed the comment to describe this folding as
"C ^ 1" instead of "~C", since that is what the code does and since the latter
would only be valid for "i1" types. The good news is that most LLVM targets
use TargetLowering::ZeroOrOneBooleanContent so this change will not disable
the optimization; the bad news is that I've been unable to come up with a
testcase to demonstrate the problem.
I have also removed a "FIXME" comment for folding "select C, X, 0" to "C & X",
since the code looks correct to me. It could be made more aggressive by not
limiting the type to "i1", but that would then require checking for
TargetLowering::ZeroOrNegativeOneBooleanContent. Similar changes could be
done for the other SELECT foldings, but it was decided to be not worth the
trouble and complexity (see e.g., r44663).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62790 91177308-0d34-0410-b5e6-96231b3b80d8
2009-01-22 22:05:48 +00:00
|
|
|
if (VT.isInteger() &&
|
2009-08-11 20:47:22 +00:00
|
|
|
(VT0 == MVT::i1 ||
|
Fix a minor bug in DAGCombiner's folding of SELECT. Folding "select C, 0, 1"
to "C ^ 1" is only valid when C is known to be either 0 or 1. Most of the
similar foldings in this function only handle "i1" types, but this one appears
intentionally written to handle larger integer types. If C has an integer
type larger than "i1", this needs to check if the high bits of a boolean
are known to be zero. I also changed the comment to describe this folding as
"C ^ 1" instead of "~C", since that is what the code does and since the latter
would only be valid for "i1" types. The good news is that most LLVM targets
use TargetLowering::ZeroOrOneBooleanContent so this change will not disable
the optimization; the bad news is that I've been unable to come up with a
testcase to demonstrate the problem.
I have also removed a "FIXME" comment for folding "select C, X, 0" to "C & X",
since the code looks correct to me. It could be made more aggressive by not
limiting the type to "i1", but that would then require checking for
TargetLowering::ZeroOrNegativeOneBooleanContent. Similar changes could be
done for the other SELECT foldings, but it was decided to be not worth the
trouble and complexity (see e.g., r44663).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62790 91177308-0d34-0410-b5e6-96231b3b80d8
2009-01-22 22:05:48 +00:00
|
|
|
(VT0.isInteger() &&
|
|
|
|
TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&
|
2008-03-13 22:13:53 +00:00
|
|
|
N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
|
2009-01-30 22:02:18 +00:00
|
|
|
SDValue XORNode;
|
2007-08-18 05:57:05 +00:00
|
|
|
if (VT == VT0)
|
2009-01-30 22:02:18 +00:00
|
|
|
return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,
|
|
|
|
N0, DAG.getConstant(1, VT0));
|
|
|
|
XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,
|
|
|
|
N0, DAG.getConstant(1, VT0));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(XORNode.getNode());
|
2008-06-08 20:54:56 +00:00
|
|
|
if (VT.bitsGT(VT0))
|
2009-01-30 22:02:18 +00:00
|
|
|
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);
|
|
|
|
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);
|
2007-08-18 05:57:05 +00:00
|
|
|
}
|
2009-01-30 22:02:18 +00:00
|
|
|
// fold (select C, 0, X) -> (and (not C), X)
|
2009-08-11 20:47:22 +00:00
|
|
|
if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
|
2009-01-30 23:03:19 +00:00
|
|
|
SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
|
2009-01-22 17:39:32 +00:00
|
|
|
AddToWorkList(NOTNode.getNode());
|
2009-01-30 23:03:19 +00:00
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);
|
2005-09-16 00:54:12 +00:00
|
|
|
}
|
2009-01-30 22:02:18 +00:00
|
|
|
// fold (select C, X, 1) -> (or (not C), X)
|
2009-08-11 20:47:22 +00:00
|
|
|
if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
|
2009-01-30 22:02:18 +00:00
|
|
|
SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
|
2009-01-22 17:39:32 +00:00
|
|
|
AddToWorkList(NOTNode.getNode());
|
2009-01-30 23:03:19 +00:00
|
|
|
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);
|
2005-09-16 00:54:12 +00:00
|
|
|
}
|
2009-01-30 22:02:18 +00:00
|
|
|
// fold (select C, X, 0) -> (and C, X)
|
2009-08-11 20:47:22 +00:00
|
|
|
if (VT == MVT::i1 && N2C && N2C->isNullValue())
|
2009-01-30 22:02:18 +00:00
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
|
|
|
|
// fold (select X, X, Y) -> (or X, Y)
|
|
|
|
// fold (select X, 1, Y) -> (or X, Y)
|
2009-08-11 20:47:22 +00:00
|
|
|
if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
|
2009-01-30 22:02:18 +00:00
|
|
|
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
|
|
|
|
// fold (select X, Y, X) -> (and X, Y)
|
|
|
|
// fold (select X, Y, 0) -> (and X, Y)
|
2009-08-11 20:47:22 +00:00
|
|
|
if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
|
2009-01-30 22:02:18 +00:00
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Fold (select C, load A, load B) -> load (select C, A, B). This happens quite
a lot throughout many programs. In particular, specfp triggers it a bunch for
constant FP nodes when you have code like cond ? 1.0 : -1.0.
If the PPC ISel exposed the loads implicit in pic references to external globals,
we would be able to eliminate a load in cases like this as well:
%X = external global int
%Y = external global int
int* %test4(bool %C) {
%G = select bool %C, int* %X, int* %Y
ret int* %G
}
Note that this breaks things that use SrcValue's (see the fixme), but since nothing
uses them yet, this is ok.
Also, simplify some code to use hasOneUse() on an SDOperand instead of hasNUsesOfValue directly.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23781 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-18 06:04:22 +00:00
|
|
|
// If we can fold this based on the true/false value, do so.
|
|
|
|
if (SimplifySelectOps(N, N1, N2))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Don't revisit N.
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
|
2005-09-19 22:34:01 +00:00
|
|
|
// fold selects based on a setcc into other things, such as min/max/abs
|
2008-02-20 11:10:28 +00:00
|
|
|
if (N0.getOpcode() == ISD::SETCC) {
|
2006-02-01 07:19:44 +00:00
|
|
|
// FIXME:
|
2009-08-11 20:47:22 +00:00
|
|
|
// Check against MVT::Other for SELECT_CC, which is a workaround for targets
|
2006-02-01 07:19:44 +00:00
|
|
|
// having to say they don't support SELECT_CC on every type the DAG knows
|
|
|
|
// about, since there is no way to mark an opcode illegal at all value types
|
2009-08-11 20:47:22 +00:00
|
|
|
if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
|
2009-08-02 16:19:38 +00:00
|
|
|
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
|
2009-01-30 22:02:18 +00:00
|
|
|
return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N0.getOperand(1),
|
2006-02-01 07:19:44 +00:00
|
|
|
N1, N2, N0.getOperand(2));
|
2009-03-11 05:08:08 +00:00
|
|
|
return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
|
2008-02-20 11:10:28 +00:00
|
|
|
}
|
2009-01-30 22:02:18 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-16 00:54:12 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
|
|
|
SDValue N2 = N->getOperand(2);
|
|
|
|
SDValue N3 = N->getOperand(3);
|
|
|
|
SDValue N4 = N->getOperand(4);
|
2005-09-19 22:34:01 +00:00
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-19 22:34:01 +00:00
|
|
|
// fold select_cc lhs, rhs, x, x, cc -> x
|
|
|
|
if (N2 == N3)
|
|
|
|
return N2;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Two things:
1. teach SimplifySetCC that '(srl (ctlz x), 5) == 0' is really x != 0.
2. Teach visitSELECT_CC to use SimplifySetCC instead of calling it and
ignoring the result. This allows us to compile:
bool %test(ulong %x) {
%tmp = setlt ulong %x, 4294967296
ret bool %tmp
}
to:
_test:
cntlzw r2, r3
cmplwi cr0, r3, 1
srwi r2, r2, 5
li r3, 0
beq cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
instead of:
_test:
addi r2, r3, -1
cntlzw r2, r2
cntlzw r3, r3
srwi r2, r2, 5
cmplwi cr0, r2, 0
srwi r2, r3, 5
li r3, 0
bne cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
This isn't wonderful, but it's an improvement.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30513 91177308-0d34-0410-b5e6-96231b3b80d8
2006-09-20 06:19:26 +00:00
|
|
|
// Determine if the condition we're dealing with is constant
|
2009-01-01 15:52:00 +00:00
|
|
|
SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
|
2009-02-03 00:47:48 +00:00
|
|
|
N0, N1, CC, N->getDebugLoc(), false);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (SCC.getNode()) AddToWorkList(SCC.getNode());
|
Two things:
1. teach SimplifySetCC that '(srl (ctlz x), 5) == 0' is really x != 0.
2. Teach visitSELECT_CC to use SimplifySetCC instead of calling it and
ignoring the result. This allows us to compile:
bool %test(ulong %x) {
%tmp = setlt ulong %x, 4294967296
ret bool %tmp
}
to:
_test:
cntlzw r2, r3
cmplwi cr0, r3, 1
srwi r2, r2, 5
li r3, 0
beq cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
instead of:
_test:
addi r2, r3, -1
cntlzw r2, r2
cntlzw r3, r3
srwi r2, r2, 5
cmplwi cr0, r2, 0
srwi r2, r3, 5
li r3, 0
bne cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
This isn't wonderful, but it's an improvement.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30513 91177308-0d34-0410-b5e6-96231b3b80d8
2006-09-20 06:19:26 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
|
2008-03-13 22:13:53 +00:00
|
|
|
if (!SCCC->isNullValue())
|
Two things:
1. teach SimplifySetCC that '(srl (ctlz x), 5) == 0' is really x != 0.
2. Teach visitSELECT_CC to use SimplifySetCC instead of calling it and
ignoring the result. This allows us to compile:
bool %test(ulong %x) {
%tmp = setlt ulong %x, 4294967296
ret bool %tmp
}
to:
_test:
cntlzw r2, r3
cmplwi cr0, r3, 1
srwi r2, r2, 5
li r3, 0
beq cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
instead of:
_test:
addi r2, r3, -1
cntlzw r2, r2
cntlzw r3, r3
srwi r2, r2, 5
cmplwi cr0, r2, 0
srwi r2, r3, 5
li r3, 0
bne cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
This isn't wonderful, but it's an improvement.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30513 91177308-0d34-0410-b5e6-96231b3b80d8
2006-09-20 06:19:26 +00:00
|
|
|
return N2; // cond always true -> true val
|
|
|
|
else
|
|
|
|
return N3; // cond always false -> false val
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Two things:
1. teach SimplifySetCC that '(srl (ctlz x), 5) == 0' is really x != 0.
2. Teach visitSELECT_CC to use SimplifySetCC instead of calling it and
ignoring the result. This allows us to compile:
bool %test(ulong %x) {
%tmp = setlt ulong %x, 4294967296
ret bool %tmp
}
to:
_test:
cntlzw r2, r3
cmplwi cr0, r3, 1
srwi r2, r2, 5
li r3, 0
beq cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
instead of:
_test:
addi r2, r3, -1
cntlzw r2, r2
cntlzw r3, r3
srwi r2, r2, 5
cmplwi cr0, r2, 0
srwi r2, r3, 5
li r3, 0
bne cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
This isn't wonderful, but it's an improvement.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30513 91177308-0d34-0410-b5e6-96231b3b80d8
2006-09-20 06:19:26 +00:00
|
|
|
// Fold to a simpler select_cc
|
2008-08-28 21:40:38 +00:00
|
|
|
if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
|
2009-02-17 22:15:04 +00:00
|
|
|
return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),
|
|
|
|
SCC.getOperand(0), SCC.getOperand(1), N2, N3,
|
Two things:
1. teach SimplifySetCC that '(srl (ctlz x), 5) == 0' is really x != 0.
2. Teach visitSELECT_CC to use SimplifySetCC instead of calling it and
ignoring the result. This allows us to compile:
bool %test(ulong %x) {
%tmp = setlt ulong %x, 4294967296
ret bool %tmp
}
to:
_test:
cntlzw r2, r3
cmplwi cr0, r3, 1
srwi r2, r2, 5
li r3, 0
beq cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
instead of:
_test:
addi r2, r3, -1
cntlzw r2, r2
cntlzw r3, r3
srwi r2, r2, 5
cmplwi cr0, r2, 0
srwi r2, r3, 5
li r3, 0
bne cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
This isn't wonderful, but it's an improvement.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30513 91177308-0d34-0410-b5e6-96231b3b80d8
2006-09-20 06:19:26 +00:00
|
|
|
SCC.getOperand(2));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Fold (select C, load A, load B) -> load (select C, A, B). This happens quite
a lot throughout many programs. In particular, specfp triggers it a bunch for
constant FP nodes when you have code like cond ? 1.0 : -1.0.
If the PPC ISel exposed the loads implicit in pic references to external globals,
we would be able to eliminate a load in cases like this as well:
%X = external global int
%Y = external global int
int* %test4(bool %C) {
%G = select bool %C, int* %X, int* %Y
ret int* %G
}
Note that this breaks things that use SrcValue's (see the fixme), but since nothing
uses them yet, this is ok.
Also, simplify some code to use hasOneUse() on an SDOperand instead of hasNUsesOfValue directly.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23781 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-18 06:04:22 +00:00
|
|
|
// If we can fold this based on the true/false value, do so.
|
|
|
|
if (SimplifySelectOps(N, N2, N3))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Don't revisit N.
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-19 22:34:01 +00:00
|
|
|
// fold select_cc into other things, such as min/max/abs
|
2009-01-30 23:59:18 +00:00
|
|
|
return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);
|
2005-09-16 00:54:12 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSETCC(SDNode *N) {
|
2005-09-16 00:54:12 +00:00
|
|
|
return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
|
2009-02-03 00:47:48 +00:00
|
|
|
cast<CondCodeSDNode>(N->getOperand(2))->get(),
|
|
|
|
N->getDebugLoc());
|
2005-09-16 00:54:12 +00:00
|
|
|
}
|
|
|
|
|
2007-10-29 19:58:20 +00:00
|
|
|
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
|
2009-04-09 03:51:29 +00:00
|
|
|
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
|
2007-10-29 19:58:20 +00:00
|
|
|
// transformation. Returns true if extension are possible and the above
|
2009-02-17 22:15:04 +00:00
|
|
|
// mentioned transformation is profitable.
|
2008-07-27 21:46:04 +00:00
|
|
|
static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
|
2007-10-29 19:58:20 +00:00
|
|
|
unsigned ExtOpc,
|
|
|
|
SmallVector<SDNode*, 4> &ExtendNodes,
|
2009-01-15 19:20:50 +00:00
|
|
|
const TargetLowering &TLI) {
|
2007-10-29 19:58:20 +00:00
|
|
|
bool HasCopyToRegUses = false;
|
|
|
|
bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
|
2008-08-30 19:29:20 +00:00
|
|
|
for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
|
|
|
|
UE = N0.getNode()->use_end();
|
2007-10-29 19:58:20 +00:00
|
|
|
UI != UE; ++UI) {
|
2008-07-27 20:43:25 +00:00
|
|
|
SDNode *User = *UI;
|
2007-10-29 19:58:20 +00:00
|
|
|
if (User == N)
|
|
|
|
continue;
|
2009-04-09 03:51:29 +00:00
|
|
|
if (UI.getUse().getResNo() != N0.getResNo())
|
|
|
|
continue;
|
2007-10-29 19:58:20 +00:00
|
|
|
// FIXME: Only extend SETCC N, N and SETCC N, c for now.
|
2009-04-09 03:51:29 +00:00
|
|
|
if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
|
2007-10-29 19:58:20 +00:00
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
|
|
|
|
if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
|
|
|
|
// Sign bits will be lost after a zext.
|
|
|
|
return false;
|
|
|
|
bool Add = false;
|
|
|
|
for (unsigned i = 0; i != 2; ++i) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue UseOp = User->getOperand(i);
|
2007-10-29 19:58:20 +00:00
|
|
|
if (UseOp == N0)
|
|
|
|
continue;
|
|
|
|
if (!isa<ConstantSDNode>(UseOp))
|
|
|
|
return false;
|
|
|
|
Add = true;
|
|
|
|
}
|
|
|
|
if (Add)
|
|
|
|
ExtendNodes.push_back(User);
|
2009-04-09 03:51:29 +00:00
|
|
|
continue;
|
2007-10-29 19:58:20 +00:00
|
|
|
}
|
2009-04-09 03:51:29 +00:00
|
|
|
// If truncates aren't free and there are users we can't
|
|
|
|
// extend, it isn't worthwhile.
|
|
|
|
if (!isTruncFree)
|
|
|
|
return false;
|
|
|
|
// Remember if this value is live-out.
|
|
|
|
if (User->getOpcode() == ISD::CopyToReg)
|
|
|
|
HasCopyToRegUses = true;
|
2007-10-29 19:58:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (HasCopyToRegUses) {
|
|
|
|
bool BothLiveOut = false;
|
|
|
|
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
|
|
|
|
UI != UE; ++UI) {
|
2009-04-09 03:51:29 +00:00
|
|
|
SDUse &Use = UI.getUse();
|
|
|
|
if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
|
|
|
|
BothLiveOut = true;
|
|
|
|
break;
|
2007-10-29 19:58:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (BothLiveOut)
|
|
|
|
// Both unextended and extended values are live out. There had better be
|
|
|
|
// good a reason for the transformation.
|
|
|
|
return ExtendNodes.size();
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2005-09-01 00:19:25 +00:00
|
|
|
|
|
|
|
// fold (sext c1) -> c1
|
2006-11-02 20:25:50 +00:00
|
|
|
if (isa<ConstantSDNode>(N0))
|
2009-01-30 22:23:15 +00:00
|
|
|
return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (sext (sext x)) -> (sext x)
|
2006-05-06 23:06:26 +00:00
|
|
|
// fold (sext (aext x)) -> (sext x)
|
|
|
|
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
|
2009-01-30 22:23:15 +00:00
|
|
|
return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-03-22 01:54:19 +00:00
|
|
|
if (N0.getOpcode() == ISD::TRUNCATE) {
|
2008-05-20 20:56:33 +00:00
|
|
|
// fold (sext (truncate (load x))) -> (sext (smaller load x))
|
|
|
|
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
|
2008-08-28 21:40:38 +00:00
|
|
|
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
|
|
|
|
if (NarrowLoad.getNode()) {
|
|
|
|
if (NarrowLoad.getNode() != N0.getNode())
|
|
|
|
CombineTo(N0.getNode(), NarrowLoad);
|
2009-04-27 02:00:55 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2007-03-23 02:16:52 +00:00
|
|
|
}
|
2007-03-22 01:54:19 +00:00
|
|
|
|
2008-05-20 20:56:33 +00:00
|
|
|
// See if the value being truncated is already sign extended. If so, just
|
|
|
|
// eliminate the trunc/sext pair.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Op = N0.getOperand(0);
|
2008-06-06 12:08:01 +00:00
|
|
|
unsigned OpBits = Op.getValueType().getSizeInBits();
|
|
|
|
unsigned MidBits = N0.getValueType().getSizeInBits();
|
|
|
|
unsigned DestBits = VT.getSizeInBits();
|
2007-06-22 14:59:07 +00:00
|
|
|
unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-02-26 03:13:59 +00:00
|
|
|
if (OpBits == DestBits) {
|
|
|
|
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
|
|
|
|
// bits, it is already ready.
|
|
|
|
if (NumSignBits > DestBits-MidBits)
|
|
|
|
return Op;
|
|
|
|
} else if (OpBits < DestBits) {
|
|
|
|
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
|
|
|
|
// bits, just sext from i32.
|
|
|
|
if (NumSignBits > OpBits-MidBits)
|
2009-01-30 22:23:15 +00:00
|
|
|
return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);
|
2007-02-26 03:13:59 +00:00
|
|
|
} else {
|
|
|
|
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
|
|
|
|
// bits, just truncate to i32.
|
|
|
|
if (NumSignBits > OpBits-MidBits)
|
2009-01-30 22:23:15 +00:00
|
|
|
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
|
2007-02-26 03:13:59 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-02-26 03:13:59 +00:00
|
|
|
// fold (sext (truncate x)) -> (sextinreg x).
|
2008-11-24 14:53:14 +00:00
|
|
|
if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
|
|
|
|
N0.getValueType())) {
|
2008-06-08 20:54:56 +00:00
|
|
|
if (Op.getValueType().bitsLT(VT))
|
2009-01-30 22:23:15 +00:00
|
|
|
Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
|
2008-06-08 20:54:56 +00:00
|
|
|
else if (Op.getValueType().bitsGT(VT))
|
2009-01-30 22:23:15 +00:00
|
|
|
Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
|
|
|
|
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
|
2009-12-11 21:31:27 +00:00
|
|
|
DAG.getValueType(N0.getValueType().getScalarType()));
|
2006-09-21 06:00:20 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-12-14 02:19:23 +00:00
|
|
|
// fold (sext (load x)) -> (sext (truncate (sextload x)))
|
2008-08-28 21:40:38 +00:00
|
|
|
if (ISD::isNON_EXTLoad(N0.getNode()) &&
|
2008-11-24 14:53:14 +00:00
|
|
|
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
|
2008-10-14 21:26:46 +00:00
|
|
|
TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
|
2007-10-29 19:58:20 +00:00
|
|
|
bool DoXform = true;
|
|
|
|
SmallVector<SDNode*, 4> SetCCs;
|
|
|
|
if (!N0.hasOneUse())
|
|
|
|
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
|
|
|
|
if (DoXform) {
|
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2009-04-09 03:51:29 +00:00
|
|
|
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
|
|
|
|
LN0->getChain(),
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->getBasePtr(), LN0->getSrcValue(),
|
|
|
|
LN0->getSrcValueOffset(),
|
|
|
|
N0.getValueType(),
|
|
|
|
LN0->isVolatile(), LN0->getAlignment());
|
2007-10-29 19:58:20 +00:00
|
|
|
CombineTo(N, ExtLoad);
|
2009-01-30 22:23:15 +00:00
|
|
|
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
|
|
|
|
N0.getValueType(), ExtLoad);
|
2008-08-28 21:40:38 +00:00
|
|
|
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
|
2009-01-30 22:23:15 +00:00
|
|
|
|
2007-10-29 19:58:20 +00:00
|
|
|
// Extend SetCC uses if necessary.
|
|
|
|
for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
|
|
|
|
SDNode *SetCC = SetCCs[i];
|
2008-07-27 21:46:04 +00:00
|
|
|
SmallVector<SDValue, 4> Ops;
|
2009-01-30 22:23:15 +00:00
|
|
|
|
2007-10-29 19:58:20 +00:00
|
|
|
for (unsigned j = 0; j != 2; ++j) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue SOp = SetCC->getOperand(j);
|
2007-10-29 19:58:20 +00:00
|
|
|
if (SOp == Trunc)
|
|
|
|
Ops.push_back(ExtLoad);
|
|
|
|
else
|
2009-04-09 03:51:29 +00:00
|
|
|
Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
|
|
|
|
N->getDebugLoc(), VT, SOp));
|
2009-01-30 22:23:15 +00:00
|
|
|
}
|
|
|
|
|
2007-10-29 19:58:20 +00:00
|
|
|
Ops.push_back(SetCC->getOperand(2));
|
2009-01-31 03:12:48 +00:00
|
|
|
CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
|
2009-01-30 22:23:15 +00:00
|
|
|
SetCC->getValueType(0),
|
2007-10-29 19:58:20 +00:00
|
|
|
&Ops[0], Ops.size()));
|
|
|
|
}
|
2009-01-30 22:23:15 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2007-10-29 19:58:20 +00:00
|
|
|
}
|
More cool stuff for the dag combiner. We can now finally handle things
like turning:
_foo:
fctiwz f0, f1
stfd f0, -8(r1)
lwz r2, -4(r1)
rlwinm r3, r2, 0, 16, 31
blr
into
_foo:
fctiwz f0,f1
stfd f0,-8(r1)
lhz r3,-2(r1)
blr
Also removed an unncessary constraint from sra -> srl conversion, which
should take care of hte only reason we would ever need to handle sra in
MaskedValueIsZero, AFAIK.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23703 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-12 20:40:40 +00:00
|
|
|
}
|
2005-12-14 19:05:06 +00:00
|
|
|
|
|
|
|
// fold (sext (sextload x)) -> (sext (truncate (sextload x)))
|
|
|
|
// fold (sext ( extload x)) -> (sext (truncate (sextload x)))
|
2008-08-28 21:40:38 +00:00
|
|
|
if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
|
|
|
|
ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
|
2006-10-09 20:57:25 +00:00
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2009-09-23 21:02:20 +00:00
|
|
|
EVT MemVT = LN0->getMemoryVT();
|
2008-11-24 14:53:14 +00:00
|
|
|
if ((!LegalOperations && !LN0->isVolatile()) ||
|
2009-09-23 21:02:20 +00:00
|
|
|
TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
|
2009-01-30 22:23:15 +00:00
|
|
|
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
|
|
|
|
LN0->getChain(),
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->getBasePtr(), LN0->getSrcValue(),
|
2009-09-23 21:02:20 +00:00
|
|
|
LN0->getSrcValueOffset(), MemVT,
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->isVolatile(), LN0->getAlignment());
|
2006-12-15 21:38:30 +00:00
|
|
|
CombineTo(N, ExtLoad);
|
2008-08-30 19:29:20 +00:00
|
|
|
CombineTo(N0.getNode(),
|
2009-01-30 22:23:15 +00:00
|
|
|
DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
|
|
|
|
N0.getValueType(), ExtLoad),
|
2006-12-15 21:38:30 +00:00
|
|
|
ExtLoad.getValue(1));
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2006-12-15 21:38:30 +00:00
|
|
|
}
|
2005-12-14 19:05:06 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Teach the codegen to turn [aez]ext (setcc) -> selectcc of 1/0, which often
allows other simplifications. For example, this compiles:
int isnegative(unsigned int X) {
return !(X < 2147483648U);
}
Into this code:
x86:
movl 4(%esp), %eax
shrl $31, %eax
ret
arm:
mov r0, r0, lsr #31
bx lr
thumb:
lsr r0, r0, #31
bx lr
instead of:
x86:
cmpl $0, 4(%esp)
sets %al
movzbl %al, %eax
ret
arm:
mov r3, #0
cmp r0, #0
movlt r3, #1
mov r0, r3
bx lr
thumb:
mov r2, #1
mov r1, #0
cmp r0, #0
blt LBB1_2 @entry
LBB1_1: @entry
cpy r2, r1
LBB1_2: @entry
cpy r0, r2
bx lr
Testcase here: test/CodeGen/Generic/ispositive.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35883 91177308-0d34-0410-b5e6-96231b3b80d8
2007-04-11 05:32:27 +00:00
|
|
|
if (N0.getOpcode() == ISD::SETCC) {
|
2009-07-08 00:31:33 +00:00
|
|
|
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
|
|
|
|
if (VT.isVector() &&
|
|
|
|
// We know that the # elements of the results is the same as the
|
|
|
|
// # elements of the compare (and the # elements of the compare result
|
|
|
|
// for that matter). Check to see that they are the same size. If so,
|
|
|
|
// we know that the element size of the sext'd result matches the
|
|
|
|
// element size of the compare operands.
|
|
|
|
VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() &&
|
|
|
|
|
|
|
|
// Only do this before legalize for now.
|
|
|
|
!LegalOperations) {
|
|
|
|
return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
|
|
|
|
N0.getOperand(1),
|
|
|
|
cast<CondCodeSDNode>(N0.getOperand(2))->get());
|
|
|
|
}
|
|
|
|
|
|
|
|
// sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
|
2009-08-06 09:18:59 +00:00
|
|
|
SDValue NegOne =
|
|
|
|
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
|
2009-02-17 22:15:04 +00:00
|
|
|
SDValue SCC =
|
2009-01-30 23:59:18 +00:00
|
|
|
SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
|
2009-08-06 09:18:59 +00:00
|
|
|
NegOne, DAG.getConstant(0, VT),
|
2007-04-11 06:50:51 +00:00
|
|
|
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (SCC.getNode()) return SCC;
|
Teach the codegen to turn [aez]ext (setcc) -> selectcc of 1/0, which often
allows other simplifications. For example, this compiles:
int isnegative(unsigned int X) {
return !(X < 2147483648U);
}
Into this code:
x86:
movl 4(%esp), %eax
shrl $31, %eax
ret
arm:
mov r0, r0, lsr #31
bx lr
thumb:
lsr r0, r0, #31
bx lr
instead of:
x86:
cmpl $0, 4(%esp)
sets %al
movzbl %al, %eax
ret
arm:
mov r3, #0
cmp r0, #0
movlt r3, #1
mov r0, r3
bx lr
thumb:
mov r2, #1
mov r1, #0
cmp r0, #0
blt LBB1_2 @entry
LBB1_1: @entry
cpy r2, r1
LBB1_2: @entry
cpy r0, r2
bx lr
Testcase here: test/CodeGen/Generic/ispositive.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35883 91177308-0d34-0410-b5e6-96231b3b80d8
2007-04-11 05:32:27 +00:00
|
|
|
}
|
2009-07-08 00:31:33 +00:00
|
|
|
|
|
|
|
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-04-28 16:58:24 +00:00
|
|
|
// fold (sext x) -> (zext x) if the sign bit is known zero.
|
2008-11-24 14:53:14 +00:00
|
|
|
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
|
2008-04-28 18:47:17 +00:00
|
|
|
DAG.SignBitIsZero(N0))
|
2009-01-30 22:23:15 +00:00
|
|
|
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2005-09-01 00:19:25 +00:00
|
|
|
|
|
|
|
// fold (zext c1) -> c1
|
2006-11-02 20:25:50 +00:00
|
|
|
if (isa<ConstantSDNode>(N0))
|
2009-01-30 22:23:15 +00:00
|
|
|
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (zext (zext x)) -> (zext x)
|
2006-05-06 23:06:26 +00:00
|
|
|
// fold (zext (aext x)) -> (zext x)
|
|
|
|
if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
|
2009-01-30 22:23:15 +00:00
|
|
|
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0));
|
2006-09-21 06:00:20 +00:00
|
|
|
|
2007-03-22 01:54:19 +00:00
|
|
|
// fold (zext (truncate (load x))) -> (zext (smaller load x))
|
|
|
|
// fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
|
2007-03-30 21:38:07 +00:00
|
|
|
if (N0.getOpcode() == ISD::TRUNCATE) {
|
2008-08-28 21:40:38 +00:00
|
|
|
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
|
|
|
|
if (NarrowLoad.getNode()) {
|
|
|
|
if (NarrowLoad.getNode() != N0.getNode())
|
|
|
|
CombineTo(N0.getNode(), NarrowLoad);
|
2009-01-30 22:23:15 +00:00
|
|
|
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
|
2007-03-23 02:16:52 +00:00
|
|
|
}
|
2007-03-22 01:54:19 +00:00
|
|
|
}
|
|
|
|
|
2006-09-21 06:00:20 +00:00
|
|
|
// fold (zext (truncate x)) -> (and x, mask)
|
|
|
|
if (N0.getOpcode() == ISD::TRUNCATE &&
|
2008-11-24 14:53:14 +00:00
|
|
|
(!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Op = N0.getOperand(0);
|
2008-06-08 20:54:56 +00:00
|
|
|
if (Op.getValueType().bitsLT(VT)) {
|
2009-01-31 03:12:48 +00:00
|
|
|
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
|
2008-06-08 20:54:56 +00:00
|
|
|
} else if (Op.getValueType().bitsGT(VT)) {
|
2009-01-31 03:12:48 +00:00
|
|
|
Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
|
2006-09-21 06:00:20 +00:00
|
|
|
}
|
2009-12-11 21:31:27 +00:00
|
|
|
return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
|
|
|
|
N0.getValueType().getScalarType());
|
2006-09-21 06:00:20 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68576 91177308-0d34-0410-b5e6-96231b3b80d8
2009-04-08 00:15:30 +00:00
|
|
|
// Fold (zext (and (trunc x), cst)) -> (and x, cst),
|
|
|
|
// if either of the casts is not free.
|
Compile:
int %test(ulong *%tmp) {
%tmp = load ulong* %tmp ; <ulong> [#uses=1]
%tmp.mask = shr ulong %tmp, ubyte 50 ; <ulong> [#uses=1]
%tmp.mask = cast ulong %tmp.mask to ubyte
%tmp2 = and ubyte %tmp.mask, 3 ; <ubyte> [#uses=1]
%tmp2 = cast ubyte %tmp2 to int ; <int> [#uses=1]
ret int %tmp2
}
to:
_test:
movl 4(%esp), %eax
movl 4(%eax), %eax
shrl $18, %eax
andl $3, %eax
ret
instead of:
_test:
movl 4(%esp), %eax
movl 4(%eax), %eax
shrl $18, %eax
# TRUNCATE movb %al, %al
andb $3, %al
movzbl %al, %eax
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30558 91177308-0d34-0410-b5e6-96231b3b80d8
2006-09-21 06:14:31 +00:00
|
|
|
if (N0.getOpcode() == ISD::AND &&
|
|
|
|
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68576 91177308-0d34-0410-b5e6-96231b3b80d8
2009-04-08 00:15:30 +00:00
|
|
|
N0.getOperand(1).getOpcode() == ISD::Constant &&
|
|
|
|
(!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
|
|
|
|
N0.getValueType()) ||
|
|
|
|
!TLI.isZExtFree(N0.getValueType(), VT))) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue X = N0.getOperand(0).getOperand(0);
|
2008-06-08 20:54:56 +00:00
|
|
|
if (X.getValueType().bitsLT(VT)) {
|
2009-01-31 03:12:48 +00:00
|
|
|
X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
|
2008-06-08 20:54:56 +00:00
|
|
|
} else if (X.getValueType().bitsGT(VT)) {
|
2009-01-31 03:12:48 +00:00
|
|
|
X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
|
Compile:
int %test(ulong *%tmp) {
%tmp = load ulong* %tmp ; <ulong> [#uses=1]
%tmp.mask = shr ulong %tmp, ubyte 50 ; <ulong> [#uses=1]
%tmp.mask = cast ulong %tmp.mask to ubyte
%tmp2 = and ubyte %tmp.mask, 3 ; <ubyte> [#uses=1]
%tmp2 = cast ubyte %tmp2 to int ; <int> [#uses=1]
ret int %tmp2
}
to:
_test:
movl 4(%esp), %eax
movl 4(%eax), %eax
shrl $18, %eax
andl $3, %eax
ret
instead of:
_test:
movl 4(%esp), %eax
movl 4(%eax), %eax
shrl $18, %eax
# TRUNCATE movb %al, %al
andb $3, %al
movzbl %al, %eax
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30558 91177308-0d34-0410-b5e6-96231b3b80d8
2006-09-21 06:14:31 +00:00
|
|
|
}
|
2008-03-03 23:51:38 +00:00
|
|
|
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
|
2008-06-06 12:08:01 +00:00
|
|
|
Mask.zext(VT.getSizeInBits());
|
2009-01-30 22:23:15 +00:00
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
|
|
|
|
X, DAG.getConstant(Mask, VT));
|
Compile:
int %test(ulong *%tmp) {
%tmp = load ulong* %tmp ; <ulong> [#uses=1]
%tmp.mask = shr ulong %tmp, ubyte 50 ; <ulong> [#uses=1]
%tmp.mask = cast ulong %tmp.mask to ubyte
%tmp2 = and ubyte %tmp.mask, 3 ; <ubyte> [#uses=1]
%tmp2 = cast ubyte %tmp2 to int ; <int> [#uses=1]
ret int %tmp2
}
to:
_test:
movl 4(%esp), %eax
movl 4(%eax), %eax
shrl $18, %eax
andl $3, %eax
ret
instead of:
_test:
movl 4(%esp), %eax
movl 4(%eax), %eax
shrl $18, %eax
# TRUNCATE movb %al, %al
andb $3, %al
movzbl %al, %eax
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30558 91177308-0d34-0410-b5e6-96231b3b80d8
2006-09-21 06:14:31 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-12-14 02:19:23 +00:00
|
|
|
// fold (zext (load x)) -> (zext (truncate (zextload x)))
|
2008-08-28 21:40:38 +00:00
|
|
|
if (ISD::isNON_EXTLoad(N0.getNode()) &&
|
2008-11-24 14:53:14 +00:00
|
|
|
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
|
2008-10-14 21:26:46 +00:00
|
|
|
TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
|
2007-10-29 19:58:20 +00:00
|
|
|
bool DoXform = true;
|
|
|
|
SmallVector<SDNode*, 4> SetCCs;
|
|
|
|
if (!N0.hasOneUse())
|
|
|
|
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
|
|
|
|
if (DoXform) {
|
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2009-01-30 22:23:15 +00:00
|
|
|
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
|
|
|
|
LN0->getChain(),
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->getBasePtr(), LN0->getSrcValue(),
|
|
|
|
LN0->getSrcValueOffset(),
|
|
|
|
N0.getValueType(),
|
|
|
|
LN0->isVolatile(), LN0->getAlignment());
|
2007-10-29 19:58:20 +00:00
|
|
|
CombineTo(N, ExtLoad);
|
2009-01-30 22:23:15 +00:00
|
|
|
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
|
|
|
|
N0.getValueType(), ExtLoad);
|
2008-08-28 21:40:38 +00:00
|
|
|
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
|
2009-01-30 22:23:15 +00:00
|
|
|
|
2007-10-29 19:58:20 +00:00
|
|
|
// Extend SetCC uses if necessary.
|
|
|
|
for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
|
|
|
|
SDNode *SetCC = SetCCs[i];
|
2008-07-27 21:46:04 +00:00
|
|
|
SmallVector<SDValue, 4> Ops;
|
2009-01-30 22:23:15 +00:00
|
|
|
|
2007-10-29 19:58:20 +00:00
|
|
|
for (unsigned j = 0; j != 2; ++j) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue SOp = SetCC->getOperand(j);
|
2007-10-29 19:58:20 +00:00
|
|
|
if (SOp == Trunc)
|
|
|
|
Ops.push_back(ExtLoad);
|
|
|
|
else
|
2009-01-31 03:12:48 +00:00
|
|
|
Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
|
|
|
|
N->getDebugLoc(), VT, SOp));
|
2009-01-30 22:23:15 +00:00
|
|
|
}
|
|
|
|
|
2007-10-29 19:58:20 +00:00
|
|
|
Ops.push_back(SetCC->getOperand(2));
|
2009-01-31 03:12:48 +00:00
|
|
|
CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
|
2009-01-30 22:23:15 +00:00
|
|
|
SetCC->getValueType(0),
|
2007-10-29 19:58:20 +00:00
|
|
|
&Ops[0], Ops.size()));
|
|
|
|
}
|
2009-01-30 22:23:15 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2007-10-29 19:58:20 +00:00
|
|
|
}
|
2005-12-14 02:19:23 +00:00
|
|
|
}
|
2005-12-14 19:05:06 +00:00
|
|
|
|
|
|
|
// fold (zext (zextload x)) -> (zext (truncate (zextload x)))
|
|
|
|
// fold (zext ( extload x)) -> (zext (truncate (zextload x)))
|
2008-08-28 21:40:38 +00:00
|
|
|
if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
|
|
|
|
ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
|
2006-10-09 20:57:25 +00:00
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2009-09-23 21:02:20 +00:00
|
|
|
EVT MemVT = LN0->getMemoryVT();
|
2008-11-24 14:53:14 +00:00
|
|
|
if ((!LegalOperations && !LN0->isVolatile()) ||
|
2009-09-23 21:02:20 +00:00
|
|
|
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
|
2009-01-30 22:23:15 +00:00
|
|
|
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
|
|
|
|
LN0->getChain(),
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->getBasePtr(), LN0->getSrcValue(),
|
2009-09-23 21:02:20 +00:00
|
|
|
LN0->getSrcValueOffset(), MemVT,
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->isVolatile(), LN0->getAlignment());
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
CombineTo(N, ExtLoad);
|
2008-08-30 19:29:20 +00:00
|
|
|
CombineTo(N0.getNode(),
|
2009-01-30 22:23:15 +00:00
|
|
|
DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
|
|
|
|
ExtLoad),
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
ExtLoad.getValue(1));
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
}
|
2005-12-14 19:05:06 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Teach the codegen to turn [aez]ext (setcc) -> selectcc of 1/0, which often
allows other simplifications. For example, this compiles:
int isnegative(unsigned int X) {
return !(X < 2147483648U);
}
Into this code:
x86:
movl 4(%esp), %eax
shrl $31, %eax
ret
arm:
mov r0, r0, lsr #31
bx lr
thumb:
lsr r0, r0, #31
bx lr
instead of:
x86:
cmpl $0, 4(%esp)
sets %al
movzbl %al, %eax
ret
arm:
mov r3, #0
cmp r0, #0
movlt r3, #1
mov r0, r3
bx lr
thumb:
mov r2, #1
mov r1, #0
cmp r0, #0
blt LBB1_2 @entry
LBB1_1: @entry
cpy r2, r1
LBB1_2: @entry
cpy r0, r2
bx lr
Testcase here: test/CodeGen/Generic/ispositive.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35883 91177308-0d34-0410-b5e6-96231b3b80d8
2007-04-11 05:32:27 +00:00
|
|
|
// zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
|
|
|
|
if (N0.getOpcode() == ISD::SETCC) {
|
2009-02-17 22:15:04 +00:00
|
|
|
SDValue SCC =
|
2009-01-30 23:59:18 +00:00
|
|
|
SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
|
Teach the codegen to turn [aez]ext (setcc) -> selectcc of 1/0, which often
allows other simplifications. For example, this compiles:
int isnegative(unsigned int X) {
return !(X < 2147483648U);
}
Into this code:
x86:
movl 4(%esp), %eax
shrl $31, %eax
ret
arm:
mov r0, r0, lsr #31
bx lr
thumb:
lsr r0, r0, #31
bx lr
instead of:
x86:
cmpl $0, 4(%esp)
sets %al
movzbl %al, %eax
ret
arm:
mov r3, #0
cmp r0, #0
movlt r3, #1
mov r0, r3
bx lr
thumb:
mov r2, #1
mov r1, #0
cmp r0, #0
blt LBB1_2 @entry
LBB1_1: @entry
cpy r2, r1
LBB1_2: @entry
cpy r0, r2
bx lr
Testcase here: test/CodeGen/Generic/ispositive.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35883 91177308-0d34-0410-b5e6-96231b3b80d8
2007-04-11 05:32:27 +00:00
|
|
|
DAG.getConstant(1, VT), DAG.getConstant(0, VT),
|
2007-04-11 06:50:51 +00:00
|
|
|
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (SCC.getNode()) return SCC;
|
Teach the codegen to turn [aez]ext (setcc) -> selectcc of 1/0, which often
allows other simplifications. For example, this compiles:
int isnegative(unsigned int X) {
return !(X < 2147483648U);
}
Into this code:
x86:
movl 4(%esp), %eax
shrl $31, %eax
ret
arm:
mov r0, r0, lsr #31
bx lr
thumb:
lsr r0, r0, #31
bx lr
instead of:
x86:
cmpl $0, 4(%esp)
sets %al
movzbl %al, %eax
ret
arm:
mov r3, #0
cmp r0, #0
movlt r3, #1
mov r0, r3
bx lr
thumb:
mov r2, #1
mov r1, #0
cmp r0, #0
blt LBB1_2 @entry
LBB1_1: @entry
cpy r2, r1
LBB1_2: @entry
cpy r0, r2
bx lr
Testcase here: test/CodeGen/Generic/ispositive.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35883 91177308-0d34-0410-b5e6-96231b3b80d8
2007-04-11 05:32:27 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-05-05 05:58:59 +00:00
|
|
|
// fold (aext c1) -> c1
|
2006-05-06 23:06:26 +00:00
|
|
|
if (isa<ConstantSDNode>(N0))
|
2009-02-01 11:19:36 +00:00
|
|
|
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);
|
2006-05-05 05:58:59 +00:00
|
|
|
// fold (aext (aext x)) -> (aext x)
|
|
|
|
// fold (aext (zext x)) -> (zext x)
|
|
|
|
// fold (aext (sext x)) -> (sext x)
|
|
|
|
if (N0.getOpcode() == ISD::ANY_EXTEND ||
|
|
|
|
N0.getOpcode() == ISD::ZERO_EXTEND ||
|
|
|
|
N0.getOpcode() == ISD::SIGN_EXTEND)
|
2009-01-30 22:27:33 +00:00
|
|
|
return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-03-22 01:54:19 +00:00
|
|
|
// fold (aext (truncate (load x))) -> (aext (smaller load x))
|
|
|
|
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
|
|
|
|
if (N0.getOpcode() == ISD::TRUNCATE) {
|
2008-08-28 21:40:38 +00:00
|
|
|
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
|
|
|
|
if (NarrowLoad.getNode()) {
|
|
|
|
if (NarrowLoad.getNode() != N0.getNode())
|
|
|
|
CombineTo(N0.getNode(), NarrowLoad);
|
2009-01-30 22:27:33 +00:00
|
|
|
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
|
2007-03-23 02:16:52 +00:00
|
|
|
}
|
2007-03-22 01:54:19 +00:00
|
|
|
}
|
|
|
|
|
2006-09-20 06:29:17 +00:00
|
|
|
// fold (aext (truncate x))
|
|
|
|
if (N0.getOpcode() == ISD::TRUNCATE) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue TruncOp = N0.getOperand(0);
|
2006-09-20 06:29:17 +00:00
|
|
|
if (TruncOp.getValueType() == VT)
|
|
|
|
return TruncOp; // x iff x size == zext size.
|
2008-06-08 20:54:56 +00:00
|
|
|
if (TruncOp.getValueType().bitsGT(VT))
|
2009-01-30 22:27:33 +00:00
|
|
|
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);
|
|
|
|
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
|
2006-09-20 06:29:17 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68576 91177308-0d34-0410-b5e6-96231b3b80d8
2009-04-08 00:15:30 +00:00
|
|
|
// Fold (aext (and (trunc x), cst)) -> (and x, cst)
|
|
|
|
// if the trunc is not free.
|
2006-09-21 06:40:43 +00:00
|
|
|
if (N0.getOpcode() == ISD::AND &&
|
|
|
|
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
|
Implement support for using modeling implicit-zero-extension on x86-64
with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce
SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG
instructions), and teach the DAGCombiner to take advantage of this on
targets which support it. This eliminates many redundant
zero-extension operations on x86-64.
This adds a new TargetLowering hook, isZExtFree. It's similar to
isTruncateFree, except it only applies to actual definitions, and not
no-op truncates which may not zero the high bits.
Also, this adds a new optimization to SimplifyDemandedBits: transform
operations like x+y into (zext (add (trunc x), (trunc y))) on targets
where all the casts are no-ops. In contexts where the high part of the
add is explicitly masked off, this allows the mask operation to be
eliminated. Fix the DAGCombiner to avoid undoing these transformations
to eliminate casts on targets where the casts are no-ops.
Also, this adds a new two-address lowering heuristic. Since
two-address lowering runs before coalescing, it helps to be able to
look through copies when deciding whether commuting and/or
three-address conversion are profitable.
Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle
the case that a clobber range extended both before and beyond an
existing live range. In that case, multiple live ranges need to be
added. This was exposed by the new subreg coalescing code.
Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the
spiller behavior it was looking for no longer occurrs with the new
instruction selection.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68576 91177308-0d34-0410-b5e6-96231b3b80d8
2009-04-08 00:15:30 +00:00
|
|
|
N0.getOperand(1).getOpcode() == ISD::Constant &&
|
|
|
|
!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
|
|
|
|
N0.getValueType())) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue X = N0.getOperand(0).getOperand(0);
|
2008-06-08 20:54:56 +00:00
|
|
|
if (X.getValueType().bitsLT(VT)) {
|
2009-01-31 03:12:48 +00:00
|
|
|
X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
|
2008-06-08 20:54:56 +00:00
|
|
|
} else if (X.getValueType().bitsGT(VT)) {
|
2009-01-31 03:12:48 +00:00
|
|
|
X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
|
2006-09-21 06:40:43 +00:00
|
|
|
}
|
2008-03-03 23:51:38 +00:00
|
|
|
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
|
2008-06-06 12:08:01 +00:00
|
|
|
Mask.zext(VT.getSizeInBits());
|
2009-01-30 22:27:33 +00:00
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
|
|
|
|
X, DAG.getConstant(Mask, VT));
|
2006-09-21 06:40:43 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-05-05 05:58:59 +00:00
|
|
|
// fold (aext (load x)) -> (aext (truncate (extload x)))
|
2009-04-09 03:51:29 +00:00
|
|
|
if (ISD::isNON_EXTLoad(N0.getNode()) &&
|
2008-11-24 14:53:14 +00:00
|
|
|
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
|
2008-10-14 21:26:46 +00:00
|
|
|
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
|
2009-04-09 03:51:29 +00:00
|
|
|
bool DoXform = true;
|
|
|
|
SmallVector<SDNode*, 4> SetCCs;
|
|
|
|
if (!N0.hasOneUse())
|
|
|
|
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
|
|
|
|
if (DoXform) {
|
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
|
|
|
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
|
|
|
|
LN0->getChain(),
|
|
|
|
LN0->getBasePtr(), LN0->getSrcValue(),
|
|
|
|
LN0->getSrcValueOffset(),
|
|
|
|
N0.getValueType(),
|
|
|
|
LN0->isVolatile(), LN0->getAlignment());
|
|
|
|
CombineTo(N, ExtLoad);
|
|
|
|
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
|
|
|
|
N0.getValueType(), ExtLoad);
|
|
|
|
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
|
|
|
|
|
|
|
|
// Extend SetCC uses if necessary.
|
|
|
|
for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
|
|
|
|
SDNode *SetCC = SetCCs[i];
|
|
|
|
SmallVector<SDValue, 4> Ops;
|
|
|
|
|
|
|
|
for (unsigned j = 0; j != 2; ++j) {
|
|
|
|
SDValue SOp = SetCC->getOperand(j);
|
|
|
|
if (SOp == Trunc)
|
|
|
|
Ops.push_back(ExtLoad);
|
|
|
|
else
|
|
|
|
Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
|
|
|
|
N->getDebugLoc(), VT, SOp));
|
|
|
|
}
|
|
|
|
|
|
|
|
Ops.push_back(SetCC->getOperand(2));
|
|
|
|
CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
|
|
|
|
SetCC->getValueType(0),
|
|
|
|
&Ops[0], Ops.size()));
|
|
|
|
}
|
|
|
|
|
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
|
|
|
}
|
2006-05-05 05:58:59 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-05-05 05:58:59 +00:00
|
|
|
// fold (aext (zextload x)) -> (aext (truncate (zextload x)))
|
|
|
|
// fold (aext (sextload x)) -> (aext (truncate (sextload x)))
|
|
|
|
// fold (aext ( extload x)) -> (aext (truncate (extload x)))
|
2007-03-07 08:07:03 +00:00
|
|
|
if (N0.getOpcode() == ISD::LOAD &&
|
2008-08-28 21:40:38 +00:00
|
|
|
!ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
|
2006-10-09 20:57:25 +00:00
|
|
|
N0.hasOneUse()) {
|
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2009-09-23 21:02:20 +00:00
|
|
|
EVT MemVT = LN0->getMemoryVT();
|
2009-01-30 22:27:33 +00:00
|
|
|
SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
|
|
|
|
VT, LN0->getChain(), LN0->getBasePtr(),
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->getSrcValue(),
|
2009-09-23 21:02:20 +00:00
|
|
|
LN0->getSrcValueOffset(), MemVT,
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->isVolatile(), LN0->getAlignment());
|
2006-05-05 05:58:59 +00:00
|
|
|
CombineTo(N, ExtLoad);
|
2008-08-29 23:20:46 +00:00
|
|
|
CombineTo(N0.getNode(),
|
2009-01-30 22:27:33 +00:00
|
|
|
DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
|
|
|
|
N0.getValueType(), ExtLoad),
|
2006-05-05 05:58:59 +00:00
|
|
|
ExtLoad.getValue(1));
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2006-05-05 05:58:59 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Teach the codegen to turn [aez]ext (setcc) -> selectcc of 1/0, which often
allows other simplifications. For example, this compiles:
int isnegative(unsigned int X) {
return !(X < 2147483648U);
}
Into this code:
x86:
movl 4(%esp), %eax
shrl $31, %eax
ret
arm:
mov r0, r0, lsr #31
bx lr
thumb:
lsr r0, r0, #31
bx lr
instead of:
x86:
cmpl $0, 4(%esp)
sets %al
movzbl %al, %eax
ret
arm:
mov r3, #0
cmp r0, #0
movlt r3, #1
mov r0, r3
bx lr
thumb:
mov r2, #1
mov r1, #0
cmp r0, #0
blt LBB1_2 @entry
LBB1_1: @entry
cpy r2, r1
LBB1_2: @entry
cpy r0, r2
bx lr
Testcase here: test/CodeGen/Generic/ispositive.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35883 91177308-0d34-0410-b5e6-96231b3b80d8
2007-04-11 05:32:27 +00:00
|
|
|
// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
|
|
|
|
if (N0.getOpcode() == ISD::SETCC) {
|
2009-02-17 22:15:04 +00:00
|
|
|
SDValue SCC =
|
2009-01-30 23:59:18 +00:00
|
|
|
SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
|
2007-04-11 06:50:51 +00:00
|
|
|
DAG.getConstant(1, VT), DAG.getConstant(0, VT),
|
2007-04-11 16:51:53 +00:00
|
|
|
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (SCC.getNode())
|
2007-04-11 06:43:25 +00:00
|
|
|
return SCC;
|
Teach the codegen to turn [aez]ext (setcc) -> selectcc of 1/0, which often
allows other simplifications. For example, this compiles:
int isnegative(unsigned int X) {
return !(X < 2147483648U);
}
Into this code:
x86:
movl 4(%esp), %eax
shrl $31, %eax
ret
arm:
mov r0, r0, lsr #31
bx lr
thumb:
lsr r0, r0, #31
bx lr
instead of:
x86:
cmpl $0, 4(%esp)
sets %al
movzbl %al, %eax
ret
arm:
mov r3, #0
cmp r0, #0
movlt r3, #1
mov r0, r3
bx lr
thumb:
mov r2, #1
mov r1, #0
cmp r0, #0
blt LBB1_2 @entry
LBB1_1: @entry
cpy r2, r1
LBB1_2: @entry
cpy r0, r2
bx lr
Testcase here: test/CodeGen/Generic/ispositive.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35883 91177308-0d34-0410-b5e6-96231b3b80d8
2007-04-11 05:32:27 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-05-05 05:58:59 +00:00
|
|
|
}
|
|
|
|
|
2007-10-13 06:35:54 +00:00
|
|
|
/// GetDemandedBits - See if the specified operand can be simplified with the
|
|
|
|
/// knowledge that only the bits specified by Mask are used. If so, return the
|
2008-07-27 21:46:04 +00:00
|
|
|
/// simpler operand, otherwise return a null SDValue.
|
|
|
|
SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
|
2007-10-13 06:35:54 +00:00
|
|
|
switch (V.getOpcode()) {
|
|
|
|
default: break;
|
|
|
|
case ISD::OR:
|
|
|
|
case ISD::XOR:
|
|
|
|
// If the LHS or RHS don't contribute bits to the or, drop them.
|
|
|
|
if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
|
|
|
|
return V.getOperand(1);
|
|
|
|
if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
|
|
|
|
return V.getOperand(0);
|
|
|
|
break;
|
Enhance the truncstore optimization code to handle shifted
values and propagate demanded bits through them in simple cases.
This allows this code:
void foo(char *P) {
strcpy(P, "abc");
}
to compile to:
_foo:
ldrb r3, [r1]
ldrb r2, [r1, #+1]
ldrb r12, [r1, #+2]!
ldrb r1, [r1, #+1]
strb r1, [r0, #+3]
strb r2, [r0, #+1]
strb r12, [r0, #+2]
strb r3, [r0]
bx lr
instead of:
_foo:
ldrb r3, [r1, #+3]
ldrb r2, [r1, #+2]
orr r3, r2, r3, lsl #8
ldrb r2, [r1, #+1]
ldrb r1, [r1]
orr r2, r1, r2, lsl #8
orr r3, r2, r3, lsl #16
strb r3, [r0]
mov r2, r3, lsr #24
strb r2, [r0, #+3]
mov r2, r3, lsr #16
strb r2, [r0, #+2]
mov r3, r3, lsr #8
strb r3, [r0, #+1]
bx lr
testcase here: test/CodeGen/ARM/truncstore-dag-combine.ll
This also helps occasionally for X86 and other cases not involving
unaligned load/stores.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42954 91177308-0d34-0410-b5e6-96231b3b80d8
2007-10-13 06:58:48 +00:00
|
|
|
case ISD::SRL:
|
|
|
|
// Only look at single-use SRLs.
|
2008-08-28 21:40:38 +00:00
|
|
|
if (!V.getNode()->hasOneUse())
|
Enhance the truncstore optimization code to handle shifted
values and propagate demanded bits through them in simple cases.
This allows this code:
void foo(char *P) {
strcpy(P, "abc");
}
to compile to:
_foo:
ldrb r3, [r1]
ldrb r2, [r1, #+1]
ldrb r12, [r1, #+2]!
ldrb r1, [r1, #+1]
strb r1, [r0, #+3]
strb r2, [r0, #+1]
strb r12, [r0, #+2]
strb r3, [r0]
bx lr
instead of:
_foo:
ldrb r3, [r1, #+3]
ldrb r2, [r1, #+2]
orr r3, r2, r3, lsl #8
ldrb r2, [r1, #+1]
ldrb r1, [r1]
orr r2, r1, r2, lsl #8
orr r3, r2, r3, lsl #16
strb r3, [r0]
mov r2, r3, lsr #24
strb r2, [r0, #+3]
mov r2, r3, lsr #16
strb r2, [r0, #+2]
mov r3, r3, lsr #8
strb r3, [r0, #+1]
bx lr
testcase here: test/CodeGen/ARM/truncstore-dag-combine.ll
This also helps occasionally for X86 and other cases not involving
unaligned load/stores.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42954 91177308-0d34-0410-b5e6-96231b3b80d8
2007-10-13 06:58:48 +00:00
|
|
|
break;
|
|
|
|
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
|
|
|
|
// See if we can recursively simplify the LHS.
|
2008-09-12 16:56:44 +00:00
|
|
|
unsigned Amt = RHSC->getZExtValue();
|
2009-01-30 22:33:24 +00:00
|
|
|
|
2009-01-03 19:22:06 +00:00
|
|
|
// Watch out for shift count overflow though.
|
|
|
|
if (Amt >= Mask.getBitWidth()) break;
|
2008-02-25 21:11:39 +00:00
|
|
|
APInt NewMask = Mask << Amt;
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
|
2009-01-30 22:33:24 +00:00
|
|
|
if (SimplifyLHS.getNode())
|
2009-02-17 22:15:04 +00:00
|
|
|
return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),
|
Enhance the truncstore optimization code to handle shifted
values and propagate demanded bits through them in simple cases.
This allows this code:
void foo(char *P) {
strcpy(P, "abc");
}
to compile to:
_foo:
ldrb r3, [r1]
ldrb r2, [r1, #+1]
ldrb r12, [r1, #+2]!
ldrb r1, [r1, #+1]
strb r1, [r0, #+3]
strb r2, [r0, #+1]
strb r12, [r0, #+2]
strb r3, [r0]
bx lr
instead of:
_foo:
ldrb r3, [r1, #+3]
ldrb r2, [r1, #+2]
orr r3, r2, r3, lsl #8
ldrb r2, [r1, #+1]
ldrb r1, [r1]
orr r2, r1, r2, lsl #8
orr r3, r2, r3, lsl #16
strb r3, [r0]
mov r2, r3, lsr #24
strb r2, [r0, #+3]
mov r2, r3, lsr #16
strb r2, [r0, #+2]
mov r3, r3, lsr #8
strb r3, [r0, #+1]
bx lr
testcase here: test/CodeGen/ARM/truncstore-dag-combine.ll
This also helps occasionally for X86 and other cases not involving
unaligned load/stores.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42954 91177308-0d34-0410-b5e6-96231b3b80d8
2007-10-13 06:58:48 +00:00
|
|
|
SimplifyLHS, V.getOperand(1));
|
|
|
|
}
|
2007-10-13 06:35:54 +00:00
|
|
|
}
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-10-13 06:35:54 +00:00
|
|
|
}
|
|
|
|
|
2007-03-22 01:54:19 +00:00
|
|
|
/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
|
|
|
|
/// bits and then truncated to a narrower type and where N is a multiple
|
|
|
|
/// of number of bits of the narrower type, transform it to a narrower load
|
|
|
|
/// from address + N / num of bits of new type. If the result is to be
|
|
|
|
/// extended, also fold the extension to form a extending load.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
|
2007-03-22 01:54:19 +00:00
|
|
|
unsigned Opc = N->getOpcode();
|
|
|
|
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue N0 = N->getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
|
|
|
EVT ExtVT = VT;
|
2007-03-22 01:54:19 +00:00
|
|
|
|
2008-08-14 20:04:46 +00:00
|
|
|
// This transformation isn't valid for vector loads.
|
|
|
|
if (VT.isVector())
|
|
|
|
return SDValue();
|
|
|
|
|
2007-03-23 22:13:36 +00:00
|
|
|
// Special case: SIGN_EXTEND_INREG is basically truncating to EVT then
|
|
|
|
// extended to VT.
|
2007-03-22 01:54:19 +00:00
|
|
|
if (Opc == ISD::SIGN_EXTEND_INREG) {
|
|
|
|
ExtType = ISD::SEXTLOAD;
|
2009-08-10 22:56:29 +00:00
|
|
|
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
|
|
|
|
if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-03-22 01:54:19 +00:00
|
|
|
}
|
|
|
|
|
2009-08-10 22:56:29 +00:00
|
|
|
unsigned EVTBits = ExtVT.getSizeInBits();
|
2007-03-22 01:54:19 +00:00
|
|
|
unsigned ShAmt = 0;
|
2009-08-23 00:14:19 +00:00
|
|
|
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
|
2007-03-22 01:54:19 +00:00
|
|
|
if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
|
2008-09-12 16:56:44 +00:00
|
|
|
ShAmt = N01->getZExtValue();
|
2007-03-22 01:54:19 +00:00
|
|
|
// Is the shift amount a multiple of size of VT?
|
|
|
|
if ((ShAmt & (EVTBits-1)) == 0) {
|
|
|
|
N0 = N0.getOperand(0);
|
2009-08-19 08:46:10 +00:00
|
|
|
// Is the load width a multiple of size of VT?
|
|
|
|
if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-03-22 01:54:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-06-16 08:14:38 +00:00
|
|
|
// Do not generate loads of non-round integer types since these can
|
|
|
|
// be expensive (and would be wrong if the type is not byte sized).
|
2009-08-10 22:56:29 +00:00
|
|
|
if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
|
2008-07-31 00:50:31 +00:00
|
|
|
cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
// Do not change the width of a volatile load.
|
|
|
|
!cast<LoadSDNode>(N0)->isVolatile()) {
|
2007-03-22 01:54:19 +00:00
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT PtrType = N0.getOperand(1).getValueType();
|
2009-01-30 22:33:24 +00:00
|
|
|
|
2007-03-24 00:02:43 +00:00
|
|
|
// For big endian targets, we need to adjust the offset to the pointer to
|
|
|
|
// load the correct bytes.
|
2008-02-11 10:37:04 +00:00
|
|
|
if (TLI.isBigEndian()) {
|
2008-07-31 00:50:31 +00:00
|
|
|
unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
|
2009-08-10 22:56:29 +00:00
|
|
|
unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
|
2007-11-09 08:57:19 +00:00
|
|
|
ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
|
|
|
|
}
|
2009-01-30 22:33:24 +00:00
|
|
|
|
2007-03-24 00:02:43 +00:00
|
|
|
uint64_t PtrOff = ShAmt / 8;
|
2007-10-28 12:59:45 +00:00
|
|
|
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
|
2009-01-31 03:12:48 +00:00
|
|
|
SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
|
2009-01-30 22:33:24 +00:00
|
|
|
PtrType, LN0->getBasePtr(),
|
2008-11-24 14:53:14 +00:00
|
|
|
DAG.getConstant(PtrOff, PtrType));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(NewPtr.getNode());
|
2009-01-30 22:33:24 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Load = (ExtType == ISD::NON_EXTLOAD)
|
2009-01-30 22:33:24 +00:00
|
|
|
? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
|
2008-07-31 00:50:31 +00:00
|
|
|
LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
|
2007-10-28 12:59:45 +00:00
|
|
|
LN0->isVolatile(), NewAlign)
|
2009-01-30 22:33:24 +00:00
|
|
|
: DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
|
2008-07-31 00:50:31 +00:00
|
|
|
LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
|
2009-08-10 22:56:29 +00:00
|
|
|
ExtVT, LN0->isVolatile(), NewAlign);
|
2009-01-30 22:33:24 +00:00
|
|
|
|
2009-01-21 15:17:51 +00:00
|
|
|
// Replace the old load's chain with the new load's chain.
|
|
|
|
WorkListRemover DeadNodes(*this);
|
|
|
|
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
|
|
|
|
&DeadNodes);
|
2009-01-30 22:33:24 +00:00
|
|
|
|
2009-01-21 15:17:51 +00:00
|
|
|
// Return the new loaded value.
|
|
|
|
return Load;
|
2007-03-22 01:54:19 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-03-22 01:54:19 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
|
|
|
EVT EVT = cast<VTSDNode>(N1)->getVT();
|
2009-12-11 21:31:27 +00:00
|
|
|
unsigned VTBits = VT.getScalarType().getSizeInBits();
|
2008-06-06 12:08:01 +00:00
|
|
|
unsigned EVTBits = EVT.getSizeInBits();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (sext_in_reg c1) -> c1
|
2006-05-08 20:59:41 +00:00
|
|
|
if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
|
2009-01-30 22:33:24 +00:00
|
|
|
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-05-06 22:43:44 +00:00
|
|
|
// If the input is already sign extended, just drop the extension.
|
2009-12-11 21:31:27 +00:00
|
|
|
if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
|
Use the new TargetLowering::ComputeNumSignBits method to eliminate
sign_extend_inreg operations. Though ComputeNumSignBits is still rudimentary,
this is enough to compile this:
short test(short X, short x) {
int Y = X+x;
return (Y >> 1);
}
short test2(short X, short x) {
int Y = (short)(X+x);
return Y >> 1;
}
into:
_test:
add r2, r3, r4
srawi r3, r2, 1
blr
_test2:
add r2, r3, r4
extsh r2, r2
srawi r3, r2, 1
blr
instead of:
_test:
add r2, r3, r4
srawi r2, r2, 1
extsh r3, r2
blr
_test2:
add r2, r3, r4
extsh r2, r2
srawi r2, r2, 1
extsh r3, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28146 91177308-0d34-0410-b5e6-96231b3b80d8
2006-05-06 09:30:03 +00:00
|
|
|
return N0;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-02 21:18:40 +00:00
|
|
|
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
|
|
|
|
if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
|
2008-06-08 20:54:56 +00:00
|
|
|
EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
|
2009-01-30 22:33:24 +00:00
|
|
|
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N1);
|
2005-09-02 21:18:40 +00:00
|
|
|
}
|
Make the case I just checked in stronger. Now we compile this:
short test2(short X, short x) {
int Y = (short)(X+x);
return Y >> 1;
}
to:
_test2:
add r2, r3, r4
extsh r2, r2
srawi r3, r2, 1
blr
instead of:
_test2:
add r2, r3, r4
extsh r2, r2
srwi r2, r2, 1
extsh r3, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28175 91177308-0d34-0410-b5e6-96231b3b80d8
2006-05-08 21:18:59 +00:00
|
|
|
|
2008-07-31 00:50:31 +00:00
|
|
|
// fold (sext_in_reg (sext x)) -> (sext x)
|
|
|
|
// fold (sext_in_reg (aext x)) -> (sext x)
|
|
|
|
// if x is small enough.
|
|
|
|
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
|
|
|
|
SDValue N00 = N0.getOperand(0);
|
2009-12-11 21:31:27 +00:00
|
|
|
if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
|
2009-01-30 22:33:24 +00:00
|
|
|
return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
|
2008-07-31 00:50:31 +00:00
|
|
|
}
|
|
|
|
|
SIGN_EXTEND_INREG does not demand its top bits. Give SimplifyDemandedBits
a chance to hack on it. This compiles:
int baz(long long a) { return (short)(((int)(a >>24)) >> 9); }
into:
_baz:
slwi r2, r3, 8
srwi r2, r2, 9
extsh r3, r2
blr
instead of:
_baz:
srwi r2, r4, 24
rlwimi r2, r3, 8, 0, 23
srwi r2, r2, 9
extsh r3, r2
blr
This implements CodeGen/PowerPC/sign_ext_inreg1.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36212 91177308-0d34-0410-b5e6-96231b3b80d8
2007-04-17 19:03:21 +00:00
|
|
|
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
|
2008-02-25 21:11:39 +00:00
|
|
|
if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
|
2009-02-01 11:19:36 +00:00
|
|
|
return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
SIGN_EXTEND_INREG does not demand its top bits. Give SimplifyDemandedBits
a chance to hack on it. This compiles:
int baz(long long a) { return (short)(((int)(a >>24)) >> 9); }
into:
_baz:
slwi r2, r3, 8
srwi r2, r2, 9
extsh r3, r2
blr
instead of:
_baz:
srwi r2, r4, 24
rlwimi r2, r3, 8, 0, 23
srwi r2, r2, 9
extsh r3, r2
blr
This implements CodeGen/PowerPC/sign_ext_inreg1.ll
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36212 91177308-0d34-0410-b5e6-96231b3b80d8
2007-04-17 19:03:21 +00:00
|
|
|
// fold operands of sext_in_reg based on knowledge that the top bits are not
|
|
|
|
// demanded.
|
2008-07-27 21:46:04 +00:00
|
|
|
if (SimplifyDemandedBits(SDValue(N, 0)))
|
|
|
|
return SDValue(N, 0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-03-22 01:54:19 +00:00
|
|
|
// fold (sext_in_reg (load x)) -> (smaller sextload x)
|
|
|
|
// fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue NarrowLoad = ReduceLoadWidth(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (NarrowLoad.getNode())
|
2007-03-22 01:54:19 +00:00
|
|
|
return NarrowLoad;
|
|
|
|
|
2009-01-30 22:33:24 +00:00
|
|
|
// fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
|
|
|
|
// fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
|
Make the case I just checked in stronger. Now we compile this:
short test2(short X, short x) {
int Y = (short)(X+x);
return Y >> 1;
}
to:
_test2:
add r2, r3, r4
extsh r2, r2
srawi r3, r2, 1
blr
instead of:
_test2:
add r2, r3, r4
extsh r2, r2
srwi r2, r2, 1
extsh r3, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28175 91177308-0d34-0410-b5e6-96231b3b80d8
2006-05-08 21:18:59 +00:00
|
|
|
// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
|
2006-05-08 20:59:41 +00:00
|
|
|
if (N0.getOpcode() == ISD::SRL) {
|
|
|
|
if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
|
2009-12-11 21:31:27 +00:00
|
|
|
if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
|
Make the case I just checked in stronger. Now we compile this:
short test2(short X, short x) {
int Y = (short)(X+x);
return Y >> 1;
}
to:
_test2:
add r2, r3, r4
extsh r2, r2
srawi r3, r2, 1
blr
instead of:
_test2:
add r2, r3, r4
extsh r2, r2
srwi r2, r2, 1
extsh r3, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28175 91177308-0d34-0410-b5e6-96231b3b80d8
2006-05-08 21:18:59 +00:00
|
|
|
// We can turn this into an SRA iff the input to the SRL is already sign
|
|
|
|
// extended enough.
|
2007-06-22 14:59:07 +00:00
|
|
|
unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
|
2009-12-11 21:31:27 +00:00
|
|
|
if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
|
2009-01-30 22:33:24 +00:00
|
|
|
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N0.getOperand(1));
|
Make the case I just checked in stronger. Now we compile this:
short test2(short X, short x) {
int Y = (short)(X+x);
return Y >> 1;
}
to:
_test2:
add r2, r3, r4
extsh r2, r2
srawi r3, r2, 1
blr
instead of:
_test2:
add r2, r3, r4
extsh r2, r2
srwi r2, r2, 1
extsh r3, r2
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28175 91177308-0d34-0410-b5e6-96231b3b80d8
2006-05-08 21:18:59 +00:00
|
|
|
}
|
2006-05-08 20:59:41 +00:00
|
|
|
}
|
2007-03-22 01:54:19 +00:00
|
|
|
|
2005-10-13 03:11:28 +00:00
|
|
|
// fold (sext_inreg (extload x)) -> (sextload x)
|
2009-02-17 22:15:04 +00:00
|
|
|
if (ISD::isEXTLoad(N0.getNode()) &&
|
2008-08-28 21:40:38 +00:00
|
|
|
ISD::isUNINDEXEDLoad(N0.getNode()) &&
|
2008-01-30 00:15:11 +00:00
|
|
|
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
|
2008-11-24 14:53:14 +00:00
|
|
|
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
|
2008-10-14 21:26:46 +00:00
|
|
|
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
|
2006-10-09 20:57:25 +00:00
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2009-01-30 22:33:24 +00:00
|
|
|
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
|
|
|
|
LN0->getChain(),
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->getBasePtr(), LN0->getSrcValue(),
|
|
|
|
LN0->getSrcValueOffset(), EVT,
|
|
|
|
LN0->isVolatile(), LN0->getAlignment());
|
2005-12-14 19:25:30 +00:00
|
|
|
CombineTo(N, ExtLoad);
|
2008-08-28 21:40:38 +00:00
|
|
|
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2005-10-13 03:11:28 +00:00
|
|
|
}
|
|
|
|
// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
|
2008-08-28 21:40:38 +00:00
|
|
|
if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
|
2007-03-07 08:07:03 +00:00
|
|
|
N0.hasOneUse() &&
|
2008-01-30 00:15:11 +00:00
|
|
|
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
|
2008-11-24 14:53:14 +00:00
|
|
|
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
|
2008-10-14 21:26:46 +00:00
|
|
|
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
|
2006-10-09 20:57:25 +00:00
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2009-01-30 22:33:24 +00:00
|
|
|
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
|
|
|
|
LN0->getChain(),
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->getBasePtr(), LN0->getSrcValue(),
|
|
|
|
LN0->getSrcValueOffset(), EVT,
|
|
|
|
LN0->isVolatile(), LN0->getAlignment());
|
2005-12-14 19:25:30 +00:00
|
|
|
CombineTo(N, ExtLoad);
|
2008-08-28 21:40:38 +00:00
|
|
|
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2005-10-13 03:11:28 +00:00
|
|
|
}
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2005-09-01 00:19:25 +00:00
|
|
|
|
|
|
|
// noop truncate
|
|
|
|
if (N0.getValueType() == N->getValueType(0))
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0;
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (truncate c1) -> c1
|
2006-05-06 23:06:26 +00:00
|
|
|
if (isa<ConstantSDNode>(N0))
|
2009-01-30 22:44:24 +00:00
|
|
|
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (truncate (truncate x)) -> (truncate x)
|
|
|
|
if (N0.getOpcode() == ISD::TRUNCATE)
|
2009-01-30 22:44:24 +00:00
|
|
|
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (truncate (ext x)) -> (ext x) or (truncate x) or x
|
2006-05-05 22:56:26 +00:00
|
|
|
if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND||
|
|
|
|
N0.getOpcode() == ISD::ANY_EXTEND) {
|
2008-06-08 20:54:56 +00:00
|
|
|
if (N0.getOperand(0).getValueType().bitsLT(VT))
|
2005-09-01 00:19:25 +00:00
|
|
|
// if the source is smaller than the dest, we still need an extend
|
2009-01-30 22:44:24 +00:00
|
|
|
return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0));
|
2008-06-08 20:54:56 +00:00
|
|
|
else if (N0.getOperand(0).getValueType().bitsGT(VT))
|
2005-09-01 00:19:25 +00:00
|
|
|
// if the source is larger than the dest, than we just need the truncate
|
2009-01-30 22:44:24 +00:00
|
|
|
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
|
2005-09-01 00:19:25 +00:00
|
|
|
else
|
|
|
|
// if the source and dest are the same type, we can drop both the extend
|
|
|
|
// and the truncate
|
2005-09-06 04:43:02 +00:00
|
|
|
return N0.getOperand(0);
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
2007-03-21 20:14:05 +00:00
|
|
|
|
2007-10-13 06:35:54 +00:00
|
|
|
// See if we can simplify the input to this truncate through knowledge that
|
|
|
|
// only the low bits are being used. For example "trunc (or (shl x, 8), y)"
|
|
|
|
// -> trunc y
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Shorter =
|
2008-02-25 21:11:39 +00:00
|
|
|
GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
|
2008-06-06 12:08:01 +00:00
|
|
|
VT.getSizeInBits()));
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Shorter.getNode())
|
2009-01-30 22:44:24 +00:00
|
|
|
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
|
2007-10-13 06:35:54 +00:00
|
|
|
|
More cool stuff for the dag combiner. We can now finally handle things
like turning:
_foo:
fctiwz f0, f1
stfd f0, -8(r1)
lwz r2, -4(r1)
rlwinm r3, r2, 0, 16, 31
blr
into
_foo:
fctiwz f0,f1
stfd f0,-8(r1)
lhz r3,-2(r1)
blr
Also removed an unncessary constraint from sra -> srl conversion, which
should take care of hte only reason we would ever need to handle sra in
MaskedValueIsZero, AFAIK.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23703 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-12 20:40:40 +00:00
|
|
|
// fold (truncate (load x)) -> (smaller load x)
|
2007-03-21 20:14:05 +00:00
|
|
|
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
|
2007-03-22 01:54:19 +00:00
|
|
|
return ReduceLoadWidth(N);
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-05-12 23:04:07 +00:00
|
|
|
static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Elt = N->getOperand(i);
|
2008-05-12 23:04:07 +00:00
|
|
|
if (Elt.getOpcode() != ISD::MERGE_VALUES)
|
2008-08-28 21:40:38 +00:00
|
|
|
return Elt.getNode();
|
|
|
|
return Elt.getOperand(Elt.getResNo()).getNode();
|
2008-05-12 23:04:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// CombineConsecutiveLoads - build_pair (load, load) -> load
|
2009-02-17 22:15:04 +00:00
|
|
|
/// if load locations are consecutive.
|
2009-08-10 22:56:29 +00:00
|
|
|
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
|
2008-05-12 23:04:07 +00:00
|
|
|
assert(N->getOpcode() == ISD::BUILD_PAIR);
|
|
|
|
|
Adapt the x86 build_vector dagcombine to the current state of the legalizer.
build vectors with i64 elements will only appear on 32b x86 before legalize.
Since vector widening occurs during legalize, and produces i64 build_vector
elements, the dag combiner is never run on these before legalize splits them
into 32b elements.
Teach the build_vector dag combine in x86 back end to recognize consecutive
loads producing the low part of the vector.
Convert the two uses of TLI's consecutive load recognizer to pass LoadSDNodes
since that was required implicitly.
Add a testcase for the transform.
Old:
subl $28, %esp
movl 32(%esp), %eax
movl 4(%eax), %ecx
movl %ecx, 4(%esp)
movl (%eax), %eax
movl %eax, (%esp)
movaps (%esp), %xmm0
pmovzxwd %xmm0, %xmm0
movl 36(%esp), %eax
movaps %xmm0, (%eax)
addl $28, %esp
ret
New:
movl 4(%esp), %eax
pmovzxwd (%eax), %xmm0
movl 8(%esp), %eax
movaps %xmm0, (%eax)
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@72957 91177308-0d34-0410-b5e6-96231b3b80d8
2009-06-05 21:37:30 +00:00
|
|
|
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
|
|
|
|
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
|
|
|
|
if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT LD1VT = LD1->getValueType(0);
|
2009-01-30 22:44:24 +00:00
|
|
|
|
2008-05-12 23:04:07 +00:00
|
|
|
if (ISD::isNON_EXTLoad(LD2) &&
|
|
|
|
LD2->hasOneUse() &&
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
// If both are volatile this would reduce the number of volatile loads.
|
|
|
|
// If one is volatile it might be ok, but play conservative and bail out.
|
Adapt the x86 build_vector dagcombine to the current state of the legalizer.
build vectors with i64 elements will only appear on 32b x86 before legalize.
Since vector widening occurs during legalize, and produces i64 build_vector
elements, the dag combiner is never run on these before legalize splits them
into 32b elements.
Teach the build_vector dag combine in x86 back end to recognize consecutive
loads producing the low part of the vector.
Convert the two uses of TLI's consecutive load recognizer to pass LoadSDNodes
since that was required implicitly.
Add a testcase for the transform.
Old:
subl $28, %esp
movl 32(%esp), %eax
movl 4(%eax), %ecx
movl %ecx, 4(%esp)
movl (%eax), %eax
movl %eax, (%esp)
movaps (%esp), %xmm0
pmovzxwd %xmm0, %xmm0
movl 36(%esp), %eax
movaps %xmm0, (%eax)
addl $28, %esp
ret
New:
movl 4(%esp), %eax
pmovzxwd (%eax), %xmm0
movl 8(%esp), %eax
movaps %xmm0, (%eax)
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@72957 91177308-0d34-0410-b5e6-96231b3b80d8
2009-06-05 21:37:30 +00:00
|
|
|
!LD1->isVolatile() &&
|
|
|
|
!LD2->isVolatile() &&
|
2009-12-09 01:36:00 +00:00
|
|
|
DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
|
Adapt the x86 build_vector dagcombine to the current state of the legalizer.
build vectors with i64 elements will only appear on 32b x86 before legalize.
Since vector widening occurs during legalize, and produces i64 build_vector
elements, the dag combiner is never run on these before legalize splits them
into 32b elements.
Teach the build_vector dag combine in x86 back end to recognize consecutive
loads producing the low part of the vector.
Convert the two uses of TLI's consecutive load recognizer to pass LoadSDNodes
since that was required implicitly.
Add a testcase for the transform.
Old:
subl $28, %esp
movl 32(%esp), %eax
movl 4(%eax), %ecx
movl %ecx, 4(%esp)
movl (%eax), %eax
movl %eax, (%esp)
movaps (%esp), %xmm0
pmovzxwd %xmm0, %xmm0
movl 36(%esp), %eax
movaps %xmm0, (%eax)
addl $28, %esp
ret
New:
movl 4(%esp), %eax
pmovzxwd (%eax), %xmm0
movl 8(%esp), %eax
movaps %xmm0, (%eax)
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@72957 91177308-0d34-0410-b5e6-96231b3b80d8
2009-06-05 21:37:30 +00:00
|
|
|
unsigned Align = LD1->getAlignment();
|
2008-09-04 15:39:15 +00:00
|
|
|
unsigned NewAlign = TLI.getTargetData()->
|
2009-08-12 00:36:31 +00:00
|
|
|
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
|
2009-01-30 22:44:24 +00:00
|
|
|
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
if (NewAlign <= Align &&
|
2008-11-24 14:53:14 +00:00
|
|
|
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
|
Adapt the x86 build_vector dagcombine to the current state of the legalizer.
build vectors with i64 elements will only appear on 32b x86 before legalize.
Since vector widening occurs during legalize, and produces i64 build_vector
elements, the dag combiner is never run on these before legalize splits them
into 32b elements.
Teach the build_vector dag combine in x86 back end to recognize consecutive
loads producing the low part of the vector.
Convert the two uses of TLI's consecutive load recognizer to pass LoadSDNodes
since that was required implicitly.
Add a testcase for the transform.
Old:
subl $28, %esp
movl 32(%esp), %eax
movl 4(%eax), %ecx
movl %ecx, 4(%esp)
movl (%eax), %eax
movl %eax, (%esp)
movaps (%esp), %xmm0
pmovzxwd %xmm0, %xmm0
movl 36(%esp), %eax
movaps %xmm0, (%eax)
addl $28, %esp
ret
New:
movl 4(%esp), %eax
pmovzxwd (%eax), %xmm0
movl 8(%esp), %eax
movaps %xmm0, (%eax)
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@72957 91177308-0d34-0410-b5e6-96231b3b80d8
2009-06-05 21:37:30 +00:00
|
|
|
return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
|
|
|
|
LD1->getBasePtr(), LD1->getSrcValue(),
|
|
|
|
LD1->getSrcValueOffset(), false, Align);
|
2008-05-12 23:04:07 +00:00
|
|
|
}
|
2009-01-30 22:44:24 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2008-05-12 23:04:07 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
constant fold bits_convert in getNode and in the dag combiner for fp<->int
conversions. This allows V8 to compiles this:
void %test() {
call float %test2( float 1.000000e+00, float 2.000000e+00, double 3.000000e+00, double* null )
ret void
}
into:
test:
save -96, %o6, %o6
sethi 0, %o3
sethi 1049088, %o2
sethi 1048576, %o1
sethi 1040384, %o0
or %g0, %o3, %o4
call test2
nop
restore %g0, %g0, %g0
retl
nop
instead of:
test:
save -112, %o6, %o6
sethi 0, %o4
sethi 1049088, %l0
st %o4, [%i6+-12]
st %l0, [%i6+-16]
ld [%i6+-12], %o3
ld [%i6+-16], %o2
sethi 1048576, %o1
sethi 1040384, %o0
call test2
nop
restore %g0, %g0, %g0
retl
nop
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@24980 91177308-0d34-0410-b5e6-96231b3b80d8
2005-12-23 05:30:37 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
|
|
|
|
// Only do this before legalize, since afterward the target may be depending
|
|
|
|
// on the bitconvert.
|
|
|
|
// First check to see if this is all constant.
|
2008-11-24 14:53:14 +00:00
|
|
|
if (!LegalTypes &&
|
2008-08-28 21:40:38 +00:00
|
|
|
N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
|
2008-06-06 12:08:01 +00:00
|
|
|
VT.isVector()) {
|
2007-06-25 16:23:39 +00:00
|
|
|
bool isSimple = true;
|
|
|
|
for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
|
|
|
|
if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
|
|
|
|
N0.getOperand(i).getOpcode() != ISD::Constant &&
|
|
|
|
N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
|
2009-02-17 22:15:04 +00:00
|
|
|
isSimple = false;
|
2007-06-25 16:23:39 +00:00
|
|
|
break;
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT DestEltVT = N->getValueType(0).getVectorElementType();
|
2008-06-06 12:08:01 +00:00
|
|
|
assert(!DestEltVT.isVector() &&
|
2007-06-25 16:23:39 +00:00
|
|
|
"Element type of vector ValueType must not be vector!");
|
2009-01-30 22:44:24 +00:00
|
|
|
if (isSimple)
|
2008-08-28 21:40:38 +00:00
|
|
|
return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
|
2007-06-25 16:23:39 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-09-05 01:58:21 +00:00
|
|
|
// If the input is a constant, let getNode fold it.
|
constant fold bits_convert in getNode and in the dag combiner for fp<->int
conversions. This allows V8 to compiles this:
void %test() {
call float %test2( float 1.000000e+00, float 2.000000e+00, double 3.000000e+00, double* null )
ret void
}
into:
test:
save -96, %o6, %o6
sethi 0, %o3
sethi 1049088, %o2
sethi 1048576, %o1
sethi 1040384, %o0
or %g0, %o3, %o4
call test2
nop
restore %g0, %g0, %g0
retl
nop
instead of:
test:
save -112, %o6, %o6
sethi 0, %o4
sethi 1049088, %l0
st %o4, [%i6+-12]
st %l0, [%i6+-16]
ld [%i6+-12], %o3
ld [%i6+-16], %o2
sethi 1048576, %o1
sethi 1040384, %o0
call test2
nop
restore %g0, %g0, %g0
retl
nop
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@24980 91177308-0d34-0410-b5e6-96231b3b80d8
2005-12-23 05:30:37 +00:00
|
|
|
if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
|
2009-01-30 22:44:24 +00:00
|
|
|
SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
|
2009-08-10 23:15:10 +00:00
|
|
|
if (Res.getNode() != N) {
|
|
|
|
if (!LegalOperations ||
|
|
|
|
TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
|
|
|
|
return Res;
|
|
|
|
|
|
|
|
// Folding it resulted in an illegal node, and it's too late to
|
|
|
|
// do that. Clean up the old node and forego the transformation.
|
|
|
|
// Ideally this won't happen very often, because instcombine
|
|
|
|
// and the earlier dagcombine runs (where illegal nodes are
|
|
|
|
// permitted) should have folded most of them already.
|
|
|
|
DAG.DeleteNode(Res.getNode());
|
|
|
|
}
|
constant fold bits_convert in getNode and in the dag combiner for fp<->int
conversions. This allows V8 to compiles this:
void %test() {
call float %test2( float 1.000000e+00, float 2.000000e+00, double 3.000000e+00, double* null )
ret void
}
into:
test:
save -96, %o6, %o6
sethi 0, %o3
sethi 1049088, %o2
sethi 1048576, %o1
sethi 1040384, %o0
or %g0, %o3, %o4
call test2
nop
restore %g0, %g0, %g0
retl
nop
instead of:
test:
save -112, %o6, %o6
sethi 0, %o4
sethi 1049088, %l0
st %o4, [%i6+-12]
st %l0, [%i6+-16]
ld [%i6+-12], %o3
ld [%i6+-16], %o2
sethi 1048576, %o1
sethi 1040384, %o0
call test2
nop
restore %g0, %g0, %g0
retl
nop
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@24980 91177308-0d34-0410-b5e6-96231b3b80d8
2005-12-23 05:30:37 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 22:44:24 +00:00
|
|
|
// (conv (conv x, t1), t2) -> (conv x, t2)
|
|
|
|
if (N0.getOpcode() == ISD::BIT_CONVERT)
|
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0));
|
2006-04-02 02:53:43 +00:00
|
|
|
|
fold (conv (load x)) -> (load (conv*)x).
This allows us to compile this:
void foo(double);
void bar(double *X) { foo(*X); }
To this:
bar:
save -96, %o6, %o6
ld [%i0+4], %o1
ld [%i0], %o0
call foo
nop
restore %g0, %g0, %g0
retl
nop
instead of this:
bar:
save -104, %o6, %o6
ldd [%i0], %f0
std %f0, [%i6+-8]
ld [%i6+-4], %o1
ld [%i6+-8], %o0
call foo
nop
restore %g0, %g0, %g0
retl
nop
on SparcV8.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@24982 91177308-0d34-0410-b5e6-96231b3b80d8
2005-12-23 05:44:41 +00:00
|
|
|
// fold (conv (load x)) -> (load (conv*)x)
|
2007-10-06 08:19:55 +00:00
|
|
|
// If the resultant load doesn't need a higher alignment than the original!
|
2008-08-28 21:40:38 +00:00
|
|
|
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
// Do not change the width of a volatile load.
|
|
|
|
!cast<LoadSDNode>(N0)->isVolatile() &&
|
2008-11-24 14:53:14 +00:00
|
|
|
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
|
2006-10-09 20:57:25 +00:00
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2008-09-04 15:39:15 +00:00
|
|
|
unsigned Align = TLI.getTargetData()->
|
2009-08-12 00:36:31 +00:00
|
|
|
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
|
2007-05-07 21:27:48 +00:00
|
|
|
unsigned OrigAlign = LN0->getAlignment();
|
2009-01-30 22:44:24 +00:00
|
|
|
|
2007-05-07 21:27:48 +00:00
|
|
|
if (Align <= OrigAlign) {
|
2009-01-30 22:44:24 +00:00
|
|
|
SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
|
|
|
|
LN0->getBasePtr(),
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->getSrcValue(), LN0->getSrcValueOffset(),
|
|
|
|
LN0->isVolatile(), OrigAlign);
|
2007-05-07 21:27:48 +00:00
|
|
|
AddToWorkList(N);
|
2008-08-30 19:29:20 +00:00
|
|
|
CombineTo(N0.getNode(),
|
2009-01-30 22:44:24 +00:00
|
|
|
DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
|
|
|
|
N0.getValueType(), Load),
|
2007-05-07 21:27:48 +00:00
|
|
|
Load.getValue(1));
|
|
|
|
return Load;
|
|
|
|
}
|
fold (conv (load x)) -> (load (conv*)x).
This allows us to compile this:
void foo(double);
void bar(double *X) { foo(*X); }
To this:
bar:
save -96, %o6, %o6
ld [%i0+4], %o1
ld [%i0], %o0
call foo
nop
restore %g0, %g0, %g0
retl
nop
instead of this:
bar:
save -104, %o6, %o6
ldd [%i0], %f0
std %f0, [%i6+-8]
ld [%i6+-4], %o1
ld [%i6+-8], %o0
call foo
nop
restore %g0, %g0, %g0
retl
nop
on SparcV8.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@24982 91177308-0d34-0410-b5e6-96231b3b80d8
2005-12-23 05:44:41 +00:00
|
|
|
}
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
|
2009-01-30 22:44:24 +00:00
|
|
|
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
|
|
|
|
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
|
Implement some dag combines that allow doing fneg/fabs/fcopysign in integer
registers if used by a bitconvert or using a bitconvert. This allows us to
avoid constant pool loads and use cheaper integer instructions when the
values come from or end up in integer regs anyway. For example, we now
compile CodeGen/X86/fp-in-intregs.ll to:
_test1:
movl $2147483648, %eax
xorl 4(%esp), %eax
ret
_test2:
movl $1065353216, %eax
orl 4(%esp), %eax
andl $3212836864, %eax
ret
Instead of:
_test1:
movss 4(%esp), %xmm0
xorps LCPI2_0, %xmm0
movd %xmm0, %eax
ret
_test2:
movss 4(%esp), %xmm0
andps LCPI3_0, %xmm0
movss LCPI3_1, %xmm1
andps LCPI3_2, %xmm1
orps %xmm0, %xmm1
movd %xmm1, %eax
ret
bitconverts can happen due to various calling conventions that require
fp values to passed in integer regs in some cases, e.g. when returning
a complex.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46414 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-27 17:42:27 +00:00
|
|
|
// This often reduces constant pool loads.
|
|
|
|
if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
|
2008-08-28 21:40:38 +00:00
|
|
|
N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
|
2009-01-30 22:44:24 +00:00
|
|
|
SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(NewConv.getNode());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-06-06 12:08:01 +00:00
|
|
|
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
|
Implement some dag combines that allow doing fneg/fabs/fcopysign in integer
registers if used by a bitconvert or using a bitconvert. This allows us to
avoid constant pool loads and use cheaper integer instructions when the
values come from or end up in integer regs anyway. For example, we now
compile CodeGen/X86/fp-in-intregs.ll to:
_test1:
movl $2147483648, %eax
xorl 4(%esp), %eax
ret
_test2:
movl $1065353216, %eax
orl 4(%esp), %eax
andl $3212836864, %eax
ret
Instead of:
_test1:
movss 4(%esp), %xmm0
xorps LCPI2_0, %xmm0
movd %xmm0, %eax
ret
_test2:
movss 4(%esp), %xmm0
andps LCPI3_0, %xmm0
movss LCPI3_1, %xmm1
andps LCPI3_2, %xmm1
orps %xmm0, %xmm1
movd %xmm1, %eax
ret
bitconverts can happen due to various calling conventions that require
fp values to passed in integer regs in some cases, e.g. when returning
a complex.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46414 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-27 17:42:27 +00:00
|
|
|
if (N0.getOpcode() == ISD::FNEG)
|
2009-01-30 22:44:24 +00:00
|
|
|
return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
|
|
|
|
NewConv, DAG.getConstant(SignBit, VT));
|
Implement some dag combines that allow doing fneg/fabs/fcopysign in integer
registers if used by a bitconvert or using a bitconvert. This allows us to
avoid constant pool loads and use cheaper integer instructions when the
values come from or end up in integer regs anyway. For example, we now
compile CodeGen/X86/fp-in-intregs.ll to:
_test1:
movl $2147483648, %eax
xorl 4(%esp), %eax
ret
_test2:
movl $1065353216, %eax
orl 4(%esp), %eax
andl $3212836864, %eax
ret
Instead of:
_test1:
movss 4(%esp), %xmm0
xorps LCPI2_0, %xmm0
movd %xmm0, %eax
ret
_test2:
movss 4(%esp), %xmm0
andps LCPI3_0, %xmm0
movss LCPI3_1, %xmm1
andps LCPI3_2, %xmm1
orps %xmm0, %xmm1
movd %xmm1, %eax
ret
bitconverts can happen due to various calling conventions that require
fp values to passed in integer regs in some cases, e.g. when returning
a complex.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46414 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-27 17:42:27 +00:00
|
|
|
assert(N0.getOpcode() == ISD::FABS);
|
2009-01-30 22:44:24 +00:00
|
|
|
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
|
|
|
|
NewConv, DAG.getConstant(~SignBit, VT));
|
Implement some dag combines that allow doing fneg/fabs/fcopysign in integer
registers if used by a bitconvert or using a bitconvert. This allows us to
avoid constant pool loads and use cheaper integer instructions when the
values come from or end up in integer regs anyway. For example, we now
compile CodeGen/X86/fp-in-intregs.ll to:
_test1:
movl $2147483648, %eax
xorl 4(%esp), %eax
ret
_test2:
movl $1065353216, %eax
orl 4(%esp), %eax
andl $3212836864, %eax
ret
Instead of:
_test1:
movss 4(%esp), %xmm0
xorps LCPI2_0, %xmm0
movd %xmm0, %eax
ret
_test2:
movss 4(%esp), %xmm0
andps LCPI3_0, %xmm0
movss LCPI3_1, %xmm1
andps LCPI3_2, %xmm1
orps %xmm0, %xmm1
movd %xmm1, %eax
ret
bitconverts can happen due to various calling conventions that require
fp values to passed in integer regs in some cases, e.g. when returning
a complex.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46414 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-27 17:42:27 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 22:44:24 +00:00
|
|
|
// fold (bitconvert (fcopysign cst, x)) ->
|
|
|
|
// (or (and (bitconvert x), sign), (and cst, (not sign)))
|
|
|
|
// Note that we don't handle (copysign x, cst) because this can always be
|
|
|
|
// folded to an fneg or fabs.
|
2008-08-28 21:40:38 +00:00
|
|
|
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
|
2008-01-27 23:32:17 +00:00
|
|
|
isa<ConstantFPSDNode>(N0.getOperand(0)) &&
|
2008-06-06 12:08:01 +00:00
|
|
|
VT.isInteger() && !VT.isVector()) {
|
|
|
|
unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
|
2009-08-12 00:36:31 +00:00
|
|
|
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
|
2008-11-24 14:53:14 +00:00
|
|
|
if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
|
2009-01-31 03:12:48 +00:00
|
|
|
SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
|
2009-01-30 22:44:24 +00:00
|
|
|
IntXVT, N0.getOperand(1));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(X.getNode());
|
2008-11-24 14:53:14 +00:00
|
|
|
|
|
|
|
// If X has a different width than the result/lhs, sext it or truncate it.
|
|
|
|
unsigned VTWidth = VT.getSizeInBits();
|
|
|
|
if (OrigXWidth < VTWidth) {
|
2009-01-31 03:12:48 +00:00
|
|
|
X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);
|
2008-11-24 14:53:14 +00:00
|
|
|
AddToWorkList(X.getNode());
|
|
|
|
} else if (OrigXWidth > VTWidth) {
|
|
|
|
// To get the sign bit in the right place, we have to shift it right
|
|
|
|
// before truncating.
|
2009-01-31 03:12:48 +00:00
|
|
|
X = DAG.getNode(ISD::SRL, X.getDebugLoc(),
|
2009-01-30 22:44:24 +00:00
|
|
|
X.getValueType(), X,
|
2008-11-24 14:53:14 +00:00
|
|
|
DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
|
|
|
|
AddToWorkList(X.getNode());
|
2009-01-31 03:12:48 +00:00
|
|
|
X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
|
2008-11-24 14:53:14 +00:00
|
|
|
AddToWorkList(X.getNode());
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-11-24 14:53:14 +00:00
|
|
|
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
|
2009-01-31 03:12:48 +00:00
|
|
|
X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,
|
2009-01-30 22:44:24 +00:00
|
|
|
X, DAG.getConstant(SignBit, VT));
|
2008-11-24 14:53:14 +00:00
|
|
|
AddToWorkList(X.getNode());
|
Implement some dag combines that allow doing fneg/fabs/fcopysign in integer
registers if used by a bitconvert or using a bitconvert. This allows us to
avoid constant pool loads and use cheaper integer instructions when the
values come from or end up in integer regs anyway. For example, we now
compile CodeGen/X86/fp-in-intregs.ll to:
_test1:
movl $2147483648, %eax
xorl 4(%esp), %eax
ret
_test2:
movl $1065353216, %eax
orl 4(%esp), %eax
andl $3212836864, %eax
ret
Instead of:
_test1:
movss 4(%esp), %xmm0
xorps LCPI2_0, %xmm0
movd %xmm0, %eax
ret
_test2:
movss 4(%esp), %xmm0
andps LCPI3_0, %xmm0
movss LCPI3_1, %xmm1
andps LCPI3_2, %xmm1
orps %xmm0, %xmm1
movd %xmm1, %eax
ret
bitconverts can happen due to various calling conventions that require
fp values to passed in integer regs in some cases, e.g. when returning
a complex.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46414 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-27 17:42:27 +00:00
|
|
|
|
2009-01-31 03:12:48 +00:00
|
|
|
SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
|
2009-01-30 22:44:24 +00:00
|
|
|
VT, N0.getOperand(0));
|
2009-01-31 03:12:48 +00:00
|
|
|
Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
|
2009-01-30 22:44:24 +00:00
|
|
|
Cst, DAG.getConstant(~SignBit, VT));
|
2008-11-24 14:53:14 +00:00
|
|
|
AddToWorkList(Cst.getNode());
|
Implement some dag combines that allow doing fneg/fabs/fcopysign in integer
registers if used by a bitconvert or using a bitconvert. This allows us to
avoid constant pool loads and use cheaper integer instructions when the
values come from or end up in integer regs anyway. For example, we now
compile CodeGen/X86/fp-in-intregs.ll to:
_test1:
movl $2147483648, %eax
xorl 4(%esp), %eax
ret
_test2:
movl $1065353216, %eax
orl 4(%esp), %eax
andl $3212836864, %eax
ret
Instead of:
_test1:
movss 4(%esp), %xmm0
xorps LCPI2_0, %xmm0
movd %xmm0, %eax
ret
_test2:
movss 4(%esp), %xmm0
andps LCPI3_0, %xmm0
movss LCPI3_1, %xmm1
andps LCPI3_2, %xmm1
orps %xmm0, %xmm1
movd %xmm1, %eax
ret
bitconverts can happen due to various calling conventions that require
fp values to passed in integer regs in some cases, e.g. when returning
a complex.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46414 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-27 17:42:27 +00:00
|
|
|
|
2009-01-30 22:44:24 +00:00
|
|
|
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);
|
2008-11-24 14:53:14 +00:00
|
|
|
}
|
Implement some dag combines that allow doing fneg/fabs/fcopysign in integer
registers if used by a bitconvert or using a bitconvert. This allows us to
avoid constant pool loads and use cheaper integer instructions when the
values come from or end up in integer regs anyway. For example, we now
compile CodeGen/X86/fp-in-intregs.ll to:
_test1:
movl $2147483648, %eax
xorl 4(%esp), %eax
ret
_test2:
movl $1065353216, %eax
orl 4(%esp), %eax
andl $3212836864, %eax
ret
Instead of:
_test1:
movss 4(%esp), %xmm0
xorps LCPI2_0, %xmm0
movd %xmm0, %eax
ret
_test2:
movss 4(%esp), %xmm0
andps LCPI3_0, %xmm0
movss LCPI3_1, %xmm1
andps LCPI3_2, %xmm1
orps %xmm0, %xmm1
movd %xmm1, %eax
ret
bitconverts can happen due to various calling conventions that require
fp values to passed in integer regs in some cases, e.g. when returning
a complex.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46414 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-27 17:42:27 +00:00
|
|
|
}
|
2008-05-12 23:04:07 +00:00
|
|
|
|
2009-02-17 22:15:04 +00:00
|
|
|
// bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
|
2008-05-12 23:04:07 +00:00
|
|
|
if (N0.getOpcode() == ISD::BUILD_PAIR) {
|
2008-08-28 21:40:38 +00:00
|
|
|
SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
|
|
|
|
if (CombineLD.getNode())
|
2008-05-12 23:04:07 +00:00
|
|
|
return CombineLD;
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
constant fold bits_convert in getNode and in the dag combiner for fp<->int
conversions. This allows V8 to compiles this:
void %test() {
call float %test2( float 1.000000e+00, float 2.000000e+00, double 3.000000e+00, double* null )
ret void
}
into:
test:
save -96, %o6, %o6
sethi 0, %o3
sethi 1049088, %o2
sethi 1048576, %o1
sethi 1040384, %o0
or %g0, %o3, %o4
call test2
nop
restore %g0, %g0, %g0
retl
nop
instead of:
test:
save -112, %o6, %o6
sethi 0, %o4
sethi 1049088, %l0
st %o4, [%i6+-12]
st %l0, [%i6+-16]
ld [%i6+-12], %o3
ld [%i6+-16], %o2
sethi 1048576, %o1
sethi 1040384, %o0
call test2
nop
restore %g0, %g0, %g0
retl
nop
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@24980 91177308-0d34-0410-b5e6-96231b3b80d8
2005-12-23 05:30:37 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2008-05-12 23:04:07 +00:00
|
|
|
return CombineConsecutiveLoads(N, VT);
|
|
|
|
}
|
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
|
2009-02-17 22:15:04 +00:00
|
|
|
/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
|
2006-04-02 02:53:43 +00:00
|
|
|
/// destination element value type.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::
|
2009-08-10 22:56:29 +00:00
|
|
|
ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
|
|
|
|
EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-04-02 02:53:43 +00:00
|
|
|
// If this is already the right type, we're done.
|
2008-07-27 21:46:04 +00:00
|
|
|
if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-06-06 12:08:01 +00:00
|
|
|
unsigned SrcBitSize = SrcEltVT.getSizeInBits();
|
|
|
|
unsigned DstBitSize = DstEltVT.getSizeInBits();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-04-02 02:53:43 +00:00
|
|
|
// If this is a conversion of N elements of one type to N elements of another
|
|
|
|
// type, convert each element. This handles FP<->INT cases.
|
|
|
|
if (SrcBitSize == DstBitSize) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SmallVector<SDValue, 8> Ops;
|
2007-06-25 16:23:39 +00:00
|
|
|
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
|
2009-04-13 22:05:19 +00:00
|
|
|
SDValue Op = BV->getOperand(i);
|
|
|
|
// If the vector element type is not legal, the BUILD_VECTOR operands
|
|
|
|
// are promoted and implicitly truncated. Make that explicit here.
|
2009-04-20 17:27:09 +00:00
|
|
|
if (Op.getValueType() != SrcEltVT)
|
|
|
|
Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
|
2009-02-01 11:19:36 +00:00
|
|
|
Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
|
2009-04-13 22:05:19 +00:00
|
|
|
DstEltVT, Op));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Ops.back().getNode());
|
2006-04-08 04:15:24 +00:00
|
|
|
}
|
2009-08-12 00:36:31 +00:00
|
|
|
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
|
2008-06-06 12:08:01 +00:00
|
|
|
BV->getValueType(0).getVectorNumElements());
|
2009-02-25 22:49:59 +00:00
|
|
|
return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
|
|
|
|
&Ops[0], Ops.size());
|
2006-04-02 02:53:43 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-04-02 02:53:43 +00:00
|
|
|
// Otherwise, we're growing or shrinking the elements. To avoid having to
|
|
|
|
// handle annoying details of growing/shrinking FP values, we convert them to
|
|
|
|
// int first.
|
2008-06-06 12:08:01 +00:00
|
|
|
if (SrcEltVT.isFloatingPoint()) {
|
2006-04-02 02:53:43 +00:00
|
|
|
// Convert the input float vector to a int vector where the elements are the
|
|
|
|
// same sizes.
|
2009-08-11 20:47:22 +00:00
|
|
|
assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
|
2009-08-12 00:36:31 +00:00
|
|
|
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
|
2008-08-28 21:40:38 +00:00
|
|
|
BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
|
2006-04-02 02:53:43 +00:00
|
|
|
SrcEltVT = IntVT;
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-04-02 02:53:43 +00:00
|
|
|
// Now we know the input is an integer vector. If the output is a FP type,
|
|
|
|
// convert to integer first, then to FP of the right size.
|
2008-06-06 12:08:01 +00:00
|
|
|
if (DstEltVT.isFloatingPoint()) {
|
2009-08-11 20:47:22 +00:00
|
|
|
assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
|
2009-08-12 00:36:31 +00:00
|
|
|
EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
|
2008-08-28 21:40:38 +00:00
|
|
|
SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-04-02 02:53:43 +00:00
|
|
|
// Next, convert to FP elements of the same size.
|
2007-06-25 16:23:39 +00:00
|
|
|
return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
|
2006-04-02 02:53:43 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-04-02 02:53:43 +00:00
|
|
|
// Okay, we know the src/dst types are both integers of differing types.
|
|
|
|
// Handling growing first.
|
2008-06-06 12:08:01 +00:00
|
|
|
assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
|
2006-04-02 02:53:43 +00:00
|
|
|
if (SrcBitSize < DstBitSize) {
|
|
|
|
unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SmallVector<SDValue, 8> Ops;
|
2007-06-25 16:23:39 +00:00
|
|
|
for (unsigned i = 0, e = BV->getNumOperands(); i != e;
|
2006-04-02 02:53:43 +00:00
|
|
|
i += NumInputsPerOutput) {
|
|
|
|
bool isLE = TLI.isLittleEndian();
|
2008-03-03 23:51:38 +00:00
|
|
|
APInt NewBits = APInt(DstBitSize, 0);
|
2006-04-02 02:53:43 +00:00
|
|
|
bool EltIsUndef = true;
|
|
|
|
for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
|
|
|
|
// Shift the previously computed bits over.
|
|
|
|
NewBits <<= SrcBitSize;
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
|
2006-04-02 02:53:43 +00:00
|
|
|
if (Op.getOpcode() == ISD::UNDEF) continue;
|
|
|
|
EltIsUndef = false;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-04-13 22:05:19 +00:00
|
|
|
NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
|
|
|
|
zextOrTrunc(SrcBitSize).zext(DstBitSize));
|
2006-04-02 02:53:43 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-04-02 02:53:43 +00:00
|
|
|
if (EltIsUndef)
|
2009-02-06 23:05:02 +00:00
|
|
|
Ops.push_back(DAG.getUNDEF(DstEltVT));
|
2006-04-02 02:53:43 +00:00
|
|
|
else
|
|
|
|
Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
|
|
|
|
}
|
|
|
|
|
2009-08-12 00:36:31 +00:00
|
|
|
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
|
2009-02-25 22:49:59 +00:00
|
|
|
return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
|
|
|
|
&Ops[0], Ops.size());
|
2006-04-02 02:53:43 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-04-02 02:53:43 +00:00
|
|
|
// Finally, this must be the case where we are shrinking elements: each input
|
|
|
|
// turns into multiple outputs.
|
- When DAG combiner is folding a bit convert into a BUILD_VECTOR, it should check if it's essentially a SCALAR_TO_VECTOR. Avoid turning (v8i16) <10, u, u, u> to <10, 0, u, u, u, u, u, u>. Instead, simply convert it to a SCALAR_TO_VECTOR of the proper type.
- X86 now normalize SCALAR_TO_VECTOR to (BIT_CONVERT (v4i32 SCALAR_TO_VECTOR)). Get rid of X86ISD::S2VEC.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47290 91177308-0d34-0410-b5e6-96231b3b80d8
2008-02-18 23:04:32 +00:00
|
|
|
bool isS2V = ISD::isScalarToVector(BV);
|
2006-04-02 02:53:43 +00:00
|
|
|
unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
|
2009-08-12 00:36:31 +00:00
|
|
|
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
|
|
|
|
NumOutputsPerInput*BV->getNumOperands());
|
2008-07-27 21:46:04 +00:00
|
|
|
SmallVector<SDValue, 8> Ops;
|
2009-01-30 22:53:48 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
|
2006-04-02 02:53:43 +00:00
|
|
|
if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
|
|
|
|
for (unsigned j = 0; j != NumOutputsPerInput; ++j)
|
2009-02-06 23:05:02 +00:00
|
|
|
Ops.push_back(DAG.getUNDEF(DstEltVT));
|
2006-04-02 02:53:43 +00:00
|
|
|
continue;
|
|
|
|
}
|
2009-01-30 22:53:48 +00:00
|
|
|
|
2009-04-13 22:05:19 +00:00
|
|
|
APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
|
|
|
|
getAPIntValue()).zextOrTrunc(SrcBitSize);
|
2009-01-30 22:53:48 +00:00
|
|
|
|
2006-04-02 02:53:43 +00:00
|
|
|
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
|
2008-03-03 23:51:38 +00:00
|
|
|
APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
|
2006-04-02 02:53:43 +00:00
|
|
|
Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
|
2008-03-03 23:51:38 +00:00
|
|
|
if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
|
- When DAG combiner is folding a bit convert into a BUILD_VECTOR, it should check if it's essentially a SCALAR_TO_VECTOR. Avoid turning (v8i16) <10, u, u, u> to <10, 0, u, u, u, u, u, u>. Instead, simply convert it to a SCALAR_TO_VECTOR of the proper type.
- X86 now normalize SCALAR_TO_VECTOR to (BIT_CONVERT (v4i32 SCALAR_TO_VECTOR)). Get rid of X86ISD::S2VEC.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47290 91177308-0d34-0410-b5e6-96231b3b80d8
2008-02-18 23:04:32 +00:00
|
|
|
// Simply turn this into a SCALAR_TO_VECTOR of the new type.
|
2009-01-30 22:53:48 +00:00
|
|
|
return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
|
|
|
|
Ops[0]);
|
2008-03-03 23:51:38 +00:00
|
|
|
OpVal = OpVal.lshr(DstBitSize);
|
2006-04-02 02:53:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// For big endian targets, swap the order of the pieces of each element.
|
2008-02-11 10:37:04 +00:00
|
|
|
if (TLI.isBigEndian())
|
2006-04-02 02:53:43 +00:00
|
|
|
std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
|
|
|
|
}
|
|
|
|
|
2009-02-25 22:49:59 +00:00
|
|
|
return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
|
|
|
|
&Ops[0], Ops.size());
|
2009-01-30 22:53:48 +00:00
|
|
|
}
|
2006-04-02 02:53:43 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFADD(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-10-18 00:28:13 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
|
|
|
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 22:53:48 +00:00
|
|
|
// fold (fadd c1, c2) -> (fadd c1, c2)
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0CFP && N1CFP && VT != MVT::ppcf128)
|
2009-01-30 22:53:48 +00:00
|
|
|
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
|
2005-10-18 00:28:13 +00:00
|
|
|
// canonicalize constant to RHS
|
|
|
|
if (N0CFP && !N1CFP)
|
2009-01-30 22:53:48 +00:00
|
|
|
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
|
|
|
|
// fold (fadd A, 0) -> A
|
2009-01-22 21:58:43 +00:00
|
|
|
if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
|
|
|
|
return N0;
|
2009-01-30 22:53:48 +00:00
|
|
|
// fold (fadd A, (fneg B)) -> (fsub A, B)
|
2008-11-24 14:53:14 +00:00
|
|
|
if (isNegatibleForFree(N1, LegalOperations) == 2)
|
2009-01-30 22:53:48 +00:00
|
|
|
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
|
2008-11-24 14:53:14 +00:00
|
|
|
GetNegatedExpression(N1, DAG, LegalOperations));
|
2009-01-30 22:53:48 +00:00
|
|
|
// fold (fadd (fneg A), B) -> (fsub B, A)
|
2008-11-24 14:53:14 +00:00
|
|
|
if (isNegatibleForFree(N0, LegalOperations) == 2)
|
2009-01-30 22:53:48 +00:00
|
|
|
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
|
2008-11-24 14:53:14 +00:00
|
|
|
GetNegatedExpression(N0, DAG, LegalOperations));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-01-08 23:04:05 +00:00
|
|
|
// If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
|
|
|
|
if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
|
2008-08-28 21:40:38 +00:00
|
|
|
N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
|
2009-01-30 22:53:48 +00:00
|
|
|
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
|
2009-02-01 11:19:36 +00:00
|
|
|
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(1), N1));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-28 22:28:18 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFSUB(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-10-18 00:28:13 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
|
|
|
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-10-18 00:28:13 +00:00
|
|
|
// fold (fsub c1, c2) -> c1-c2
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0CFP && N1CFP && VT != MVT::ppcf128)
|
2009-02-01 11:19:36 +00:00
|
|
|
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
|
2009-01-30 22:53:48 +00:00
|
|
|
// fold (fsub A, 0) -> A
|
2009-01-23 19:10:37 +00:00
|
|
|
if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
|
|
|
|
return N0;
|
2009-01-30 22:53:48 +00:00
|
|
|
// fold (fsub 0, B) -> -B
|
2007-08-31 23:34:27 +00:00
|
|
|
if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
|
2008-11-24 14:53:14 +00:00
|
|
|
if (isNegatibleForFree(N1, LegalOperations))
|
|
|
|
return GetNegatedExpression(N1, DAG, LegalOperations);
|
2009-01-22 21:58:43 +00:00
|
|
|
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
|
2009-01-30 22:53:48 +00:00
|
|
|
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
|
2007-07-02 15:48:56 +00:00
|
|
|
}
|
2009-01-30 22:53:48 +00:00
|
|
|
// fold (fsub A, (fneg B)) -> (fadd A, B)
|
2008-11-24 14:53:14 +00:00
|
|
|
if (isNegatibleForFree(N1, LegalOperations))
|
2009-01-30 22:53:48 +00:00
|
|
|
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
|
2008-11-24 14:53:14 +00:00
|
|
|
GetNegatedExpression(N1, DAG, LegalOperations));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-28 22:28:18 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFMUL(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2005-10-17 20:40:11 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
|
|
|
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2005-09-28 22:28:18 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-10-17 20:40:11 +00:00
|
|
|
// fold (fmul c1, c2) -> c1*c2
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0CFP && N1CFP && VT != MVT::ppcf128)
|
2009-01-30 22:57:07 +00:00
|
|
|
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
|
2005-10-17 20:40:11 +00:00
|
|
|
// canonicalize constant to RHS
|
2005-10-18 00:28:13 +00:00
|
|
|
if (N0CFP && !N1CFP)
|
2009-01-30 22:57:07 +00:00
|
|
|
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
|
|
|
|
// fold (fmul A, 0) -> 0
|
2009-01-22 21:58:43 +00:00
|
|
|
if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
|
|
|
|
return N1;
|
2009-06-04 17:12:12 +00:00
|
|
|
// fold (fmul A, 0) -> 0, vector edition.
|
|
|
|
if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
|
|
|
|
return N1;
|
2005-10-17 20:40:11 +00:00
|
|
|
// fold (fmul X, 2.0) -> (fadd X, X)
|
|
|
|
if (N1CFP && N1CFP->isExactlyValue(+2.0))
|
2009-01-30 22:57:07 +00:00
|
|
|
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
|
2009-08-10 16:50:32 +00:00
|
|
|
// fold (fmul X, -1.0) -> (fneg X)
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
if (N1CFP && N1CFP->isExactlyValue(-1.0))
|
2009-01-22 21:58:43 +00:00
|
|
|
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
|
2009-01-30 22:57:07 +00:00
|
|
|
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 22:57:07 +00:00
|
|
|
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
|
2008-11-24 14:53:14 +00:00
|
|
|
if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
|
|
|
|
if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
// Both can be negated for free, check to see if at least one is cheaper
|
|
|
|
// negated.
|
|
|
|
if (LHSNeg == 2 || RHSNeg == 2)
|
2009-01-30 22:57:07 +00:00
|
|
|
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
|
2008-11-24 14:53:14 +00:00
|
|
|
GetNegatedExpression(N0, DAG, LegalOperations),
|
|
|
|
GetNegatedExpression(N1, DAG, LegalOperations));
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-01-08 23:04:05 +00:00
|
|
|
// If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
|
|
|
|
if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
|
2008-08-28 21:40:38 +00:00
|
|
|
N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
|
2009-01-30 22:57:07 +00:00
|
|
|
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
|
2009-02-17 22:15:04 +00:00
|
|
|
DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
|
2009-02-06 21:50:26 +00:00
|
|
|
N0.getOperand(1), N1));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-28 22:28:18 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFDIV(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2006-01-18 22:35:16 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
|
|
|
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2005-09-28 22:28:18 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// fold vector ops
|
2008-06-06 12:08:01 +00:00
|
|
|
if (VT.isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue FoldedVOp = SimplifyVBinOp(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (FoldedVOp.getNode()) return FoldedVOp;
|
2007-07-13 20:03:40 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-01-18 22:35:16 +00:00
|
|
|
// fold (fdiv c1, c2) -> c1/c2
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0CFP && N1CFP && VT != MVT::ppcf128)
|
2009-01-30 22:57:07 +00:00
|
|
|
return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
|
|
|
|
2009-01-30 22:57:07 +00:00
|
|
|
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
|
2008-11-24 14:53:14 +00:00
|
|
|
if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
|
|
|
|
if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
// Both can be negated for free, check to see if at least one is cheaper
|
|
|
|
// negated.
|
|
|
|
if (LHSNeg == 2 || RHSNeg == 2)
|
2009-02-17 22:15:04 +00:00
|
|
|
return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,
|
2008-11-24 14:53:14 +00:00
|
|
|
GetNegatedExpression(N0, DAG, LegalOperations),
|
|
|
|
GetNegatedExpression(N1, DAG, LegalOperations));
|
implement a simple fneg optimization/propagation thing. This compiles:
CodeGen/PowerPC/fneg.ll into:
_t4:
fmul f0, f3, f4
fmadd f1, f1, f2, f0
blr
instead of:
_t4:
fneg f0, f3
fmul f0, f0, f4
fmsub f1, f1, f2, f0
blr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37054 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-14 22:04:50 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-28 22:28:18 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFREM(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2006-01-18 22:35:16 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
|
|
|
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2005-09-28 22:28:18 +00:00
|
|
|
|
2006-01-18 22:35:16 +00:00
|
|
|
// fold (frem c1, c2) -> fmod(c1,c2)
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0CFP && N1CFP && VT != MVT::ppcf128)
|
2009-01-30 22:57:07 +00:00
|
|
|
return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-28 22:28:18 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2006-03-05 05:30:57 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
|
|
|
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2006-03-05 05:30:57 +00:00
|
|
|
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold
|
2009-02-01 11:19:36 +00:00
|
|
|
return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-03-05 05:30:57 +00:00
|
|
|
if (N1CFP) {
|
2007-08-26 01:18:27 +00:00
|
|
|
const APFloat& V = N1CFP->getValueAPF();
|
2006-03-05 05:30:57 +00:00
|
|
|
// copysign(x, c1) -> fabs(x) iff ispos(c1)
|
|
|
|
// copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
|
2009-01-22 21:58:43 +00:00
|
|
|
if (!V.isNegative()) {
|
|
|
|
if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
|
2009-01-22 21:58:43 +00:00
|
|
|
} else {
|
|
|
|
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
|
2009-01-31 03:12:48 +00:00
|
|
|
DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));
|
2009-01-22 21:58:43 +00:00
|
|
|
}
|
2006-03-05 05:30:57 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-03-05 05:30:57 +00:00
|
|
|
// copysign(fabs(x), y) -> copysign(x, y)
|
|
|
|
// copysign(fneg(x), y) -> copysign(x, y)
|
|
|
|
// copysign(copysign(x,z), y) -> copysign(x, y)
|
|
|
|
if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
|
|
|
|
N0.getOpcode() == ISD::FCOPYSIGN)
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N1);
|
2006-03-05 05:30:57 +00:00
|
|
|
|
|
|
|
// copysign(x, abs(y)) -> abs(x)
|
|
|
|
if (N1.getOpcode() == ISD::FABS)
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-03-05 05:30:57 +00:00
|
|
|
// copysign(x, copysign(y,z)) -> copysign(x, z)
|
|
|
|
if (N1.getOpcode() == ISD::FCOPYSIGN)
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
|
|
|
|
N0, N1.getOperand(1));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-03-05 05:30:57 +00:00
|
|
|
// copysign(x, fp_extend(y)) -> copysign(x, y)
|
|
|
|
// copysign(x, fp_round(y)) -> copysign(x, y)
|
|
|
|
if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
|
|
|
|
N0, N1.getOperand(0));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-03-05 05:30:57 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
|
|
|
EVT OpVT = N0.getValueType();
|
when we know the signbit of an input to uint_to_fp is zero,
change it to sint_to_fp on targets where that is cheaper (and
visaversa of course). This allows us to compile uint_to_fp to:
_test:
movl 4(%esp), %eax
shrl $23, %eax
cvtsi2ss %eax, %xmm0
movl 8(%esp), %eax
movss %xmm0, (%eax)
ret
instead of:
.align 3
LCPI1_0: ## double
.long 0 ## double least significant word 4.5036e+15
.long 1127219200 ## double most significant word 4.5036e+15
.text
.align 4,0x90
.globl _test
_test:
subl $12, %esp
movl 16(%esp), %eax
shrl $23, %eax
movl %eax, (%esp)
movl $1127219200, 4(%esp)
movsd (%esp), %xmm0
subsd LCPI1_0, %xmm0
cvtsd2ss %xmm0, %xmm0
movl 20(%esp), %eax
movss %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52747 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-26 00:16:49 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (sint_to_fp c1) -> c1fp
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0C && OpVT != MVT::ppcf128)
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
when we know the signbit of an input to uint_to_fp is zero,
change it to sint_to_fp on targets where that is cheaper (and
visaversa of course). This allows us to compile uint_to_fp to:
_test:
movl 4(%esp), %eax
shrl $23, %eax
cvtsi2ss %eax, %xmm0
movl 8(%esp), %eax
movss %xmm0, (%eax)
ret
instead of:
.align 3
LCPI1_0: ## double
.long 0 ## double least significant word 4.5036e+15
.long 1127219200 ## double most significant word 4.5036e+15
.text
.align 4,0x90
.globl _test
_test:
subl $12, %esp
movl 16(%esp), %eax
shrl $23, %eax
movl %eax, (%esp)
movl $1127219200, 4(%esp)
movsd (%esp), %xmm0
subsd LCPI1_0, %xmm0
cvtsd2ss %xmm0, %xmm0
movl 20(%esp), %eax
movss %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52747 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-26 00:16:49 +00:00
|
|
|
// If the input is a legal type, and SINT_TO_FP is not legal on this target,
|
|
|
|
// but UINT_TO_FP is legal on this target, try to convert.
|
2009-01-28 17:46:25 +00:00
|
|
|
if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
|
|
|
|
TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
|
2009-02-17 22:15:04 +00:00
|
|
|
// If the sign bit is known to be zero, we can change this to UINT_TO_FP.
|
when we know the signbit of an input to uint_to_fp is zero,
change it to sint_to_fp on targets where that is cheaper (and
visaversa of course). This allows us to compile uint_to_fp to:
_test:
movl 4(%esp), %eax
shrl $23, %eax
cvtsi2ss %eax, %xmm0
movl 8(%esp), %eax
movss %xmm0, (%eax)
ret
instead of:
.align 3
LCPI1_0: ## double
.long 0 ## double least significant word 4.5036e+15
.long 1127219200 ## double most significant word 4.5036e+15
.text
.align 4,0x90
.globl _test
_test:
subl $12, %esp
movl 16(%esp), %eax
shrl $23, %eax
movl %eax, (%esp)
movl $1127219200, 4(%esp)
movsd (%esp), %xmm0
subsd LCPI1_0, %xmm0
cvtsd2ss %xmm0, %xmm0
movl 20(%esp), %eax
movss %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52747 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-26 00:16:49 +00:00
|
|
|
if (DAG.SignBitIsZero(N0))
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
|
when we know the signbit of an input to uint_to_fp is zero,
change it to sint_to_fp on targets where that is cheaper (and
visaversa of course). This allows us to compile uint_to_fp to:
_test:
movl 4(%esp), %eax
shrl $23, %eax
cvtsi2ss %eax, %xmm0
movl 8(%esp), %eax
movss %xmm0, (%eax)
ret
instead of:
.align 3
LCPI1_0: ## double
.long 0 ## double least significant word 4.5036e+15
.long 1127219200 ## double most significant word 4.5036e+15
.text
.align 4,0x90
.globl _test
_test:
subl $12, %esp
movl 16(%esp), %eax
shrl $23, %eax
movl %eax, (%esp)
movl $1127219200, 4(%esp)
movsd (%esp), %xmm0
subsd LCPI1_0, %xmm0
cvtsd2ss %xmm0, %xmm0
movl 20(%esp), %eax
movss %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52747 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-26 00:16:49 +00:00
|
|
|
}
|
2009-01-30 23:15:49 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
|
|
|
EVT OpVT = N0.getValueType();
|
2006-01-18 22:35:16 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (uint_to_fp c1) -> c1fp
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0C && OpVT != MVT::ppcf128)
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
when we know the signbit of an input to uint_to_fp is zero,
change it to sint_to_fp on targets where that is cheaper (and
visaversa of course). This allows us to compile uint_to_fp to:
_test:
movl 4(%esp), %eax
shrl $23, %eax
cvtsi2ss %eax, %xmm0
movl 8(%esp), %eax
movss %xmm0, (%eax)
ret
instead of:
.align 3
LCPI1_0: ## double
.long 0 ## double least significant word 4.5036e+15
.long 1127219200 ## double most significant word 4.5036e+15
.text
.align 4,0x90
.globl _test
_test:
subl $12, %esp
movl 16(%esp), %eax
shrl $23, %eax
movl %eax, (%esp)
movl $1127219200, 4(%esp)
movsd (%esp), %xmm0
subsd LCPI1_0, %xmm0
cvtsd2ss %xmm0, %xmm0
movl 20(%esp), %eax
movss %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52747 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-26 00:16:49 +00:00
|
|
|
// If the input is a legal type, and UINT_TO_FP is not legal on this target,
|
|
|
|
// but SINT_TO_FP is legal on this target, try to convert.
|
2009-01-28 17:46:25 +00:00
|
|
|
if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
|
|
|
|
TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
|
2009-02-17 22:15:04 +00:00
|
|
|
// If the sign bit is known to be zero, we can change this to SINT_TO_FP.
|
when we know the signbit of an input to uint_to_fp is zero,
change it to sint_to_fp on targets where that is cheaper (and
visaversa of course). This allows us to compile uint_to_fp to:
_test:
movl 4(%esp), %eax
shrl $23, %eax
cvtsi2ss %eax, %xmm0
movl 8(%esp), %eax
movss %xmm0, (%eax)
ret
instead of:
.align 3
LCPI1_0: ## double
.long 0 ## double least significant word 4.5036e+15
.long 1127219200 ## double most significant word 4.5036e+15
.text
.align 4,0x90
.globl _test
_test:
subl $12, %esp
movl 16(%esp), %eax
shrl $23, %eax
movl %eax, (%esp)
movl $1127219200, 4(%esp)
movsd (%esp), %xmm0
subsd LCPI1_0, %xmm0
cvtsd2ss %xmm0, %xmm0
movl 20(%esp), %eax
movss %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52747 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-26 00:16:49 +00:00
|
|
|
if (DAG.SignBitIsZero(N0))
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
|
when we know the signbit of an input to uint_to_fp is zero,
change it to sint_to_fp on targets where that is cheaper (and
visaversa of course). This allows us to compile uint_to_fp to:
_test:
movl 4(%esp), %eax
shrl $23, %eax
cvtsi2ss %eax, %xmm0
movl 8(%esp), %eax
movss %xmm0, (%eax)
ret
instead of:
.align 3
LCPI1_0: ## double
.long 0 ## double least significant word 4.5036e+15
.long 1127219200 ## double most significant word 4.5036e+15
.text
.align 4,0x90
.globl _test
_test:
subl $12, %esp
movl 16(%esp), %eax
shrl $23, %eax
movl %eax, (%esp)
movl $1127219200, 4(%esp)
movsd (%esp), %xmm0
subsd LCPI1_0, %xmm0
cvtsd2ss %xmm0, %xmm0
movl 20(%esp), %eax
movss %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52747 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-26 00:16:49 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2006-01-18 22:35:16 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (fp_to_sint c1fp) -> c1
|
2005-09-02 21:18:40 +00:00
|
|
|
if (N0CFP)
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2006-01-18 22:35:16 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (fp_to_uint c1fp) -> c1
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0CFP && VT != MVT::ppcf128)
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
2006-01-18 22:35:16 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (fp_round c1fp) -> c1fp
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0CFP && N0.getValueType() != MVT::ppcf128)
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-03-13 06:26:26 +00:00
|
|
|
// fold (fp_round (fp_extend x)) -> x
|
|
|
|
if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
|
|
|
|
return N0.getOperand(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-01-24 06:45:35 +00:00
|
|
|
// fold (fp_round (fp_round x)) -> (fp_round x)
|
|
|
|
if (N0.getOpcode() == ISD::FP_ROUND) {
|
|
|
|
// This is a value preserving truncation if both round's are.
|
|
|
|
bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
|
2008-08-28 21:40:38 +00:00
|
|
|
N0.getNode()->getConstantOperandVal(1) == 1;
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),
|
2008-01-24 06:45:35 +00:00
|
|
|
DAG.getIntPtrConstant(IsTrunc));
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-03-13 06:26:26 +00:00
|
|
|
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
|
2008-08-28 21:40:38 +00:00
|
|
|
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
|
2009-01-30 23:15:49 +00:00
|
|
|
SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,
|
|
|
|
N0.getOperand(0), N1);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Tmp.getNode());
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
|
|
|
|
Tmp, N0.getOperand(1));
|
2006-03-13 06:26:26 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
|
|
|
EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
|
2005-09-02 21:18:40 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (fp_round_inreg c1fp) -> c1fp
|
2008-11-24 14:53:14 +00:00
|
|
|
if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) {
|
2008-09-12 18:08:03 +00:00
|
|
|
SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
2009-01-30 23:15:49 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2006-01-18 22:35:16 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-12-29 06:55:23 +00:00
|
|
|
// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N->hasOneUse() &&
|
2009-01-26 04:35:06 +00:00
|
|
|
N->use_begin()->getOpcode() == ISD::FP_ROUND)
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2008-01-17 07:00:52 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (fp_extend c1fp) -> c1fp
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0CFP && VT != MVT::ppcf128)
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
|
2008-01-17 07:00:52 +00:00
|
|
|
|
|
|
|
// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
|
|
|
|
// value of X.
|
2008-08-30 19:29:20 +00:00
|
|
|
if (N0.getOpcode() == ISD::FP_ROUND
|
|
|
|
&& N0.getNode()->getConstantOperandVal(1) == 1) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue In = N0.getOperand(0);
|
2008-01-17 07:00:52 +00:00
|
|
|
if (In.getValueType() == VT) return In;
|
2008-06-08 20:54:56 +00:00
|
|
|
if (VT.bitsLT(In.getValueType()))
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,
|
|
|
|
In, N0.getOperand(1));
|
|
|
|
return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);
|
2008-01-17 07:00:52 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-01-17 07:00:52 +00:00
|
|
|
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
|
2008-08-28 21:40:38 +00:00
|
|
|
if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
|
2008-11-24 14:53:14 +00:00
|
|
|
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
|
2008-10-14 21:26:46 +00:00
|
|
|
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
|
2006-10-09 20:57:25 +00:00
|
|
|
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
2009-01-30 23:15:49 +00:00
|
|
|
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
|
|
|
|
LN0->getChain(),
|
2008-11-24 14:53:14 +00:00
|
|
|
LN0->getBasePtr(), LN0->getSrcValue(),
|
|
|
|
LN0->getSrcValueOffset(),
|
|
|
|
N0.getValueType(),
|
|
|
|
LN0->isVolatile(), LN0->getAlignment());
|
2006-05-05 21:34:35 +00:00
|
|
|
CombineTo(N, ExtLoad);
|
2009-01-30 23:15:49 +00:00
|
|
|
CombineTo(N0.getNode(),
|
|
|
|
DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
|
|
|
|
N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
|
2006-05-05 21:34:35 +00:00
|
|
|
ExtLoad.getValue(1));
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2006-05-05 21:34:35 +00:00
|
|
|
}
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFNEG(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2009-10-20 21:37:45 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2006-01-18 22:35:16 +00:00
|
|
|
|
2008-11-24 14:53:14 +00:00
|
|
|
if (isNegatibleForFree(N0, LegalOperations))
|
|
|
|
return GetNegatedExpression(N0, DAG, LegalOperations);
|
2007-07-02 15:48:56 +00:00
|
|
|
|
Implement some dag combines that allow doing fneg/fabs/fcopysign in integer
registers if used by a bitconvert or using a bitconvert. This allows us to
avoid constant pool loads and use cheaper integer instructions when the
values come from or end up in integer regs anyway. For example, we now
compile CodeGen/X86/fp-in-intregs.ll to:
_test1:
movl $2147483648, %eax
xorl 4(%esp), %eax
ret
_test2:
movl $1065353216, %eax
orl 4(%esp), %eax
andl $3212836864, %eax
ret
Instead of:
_test1:
movss 4(%esp), %xmm0
xorps LCPI2_0, %xmm0
movd %xmm0, %eax
ret
_test2:
movss 4(%esp), %xmm0
andps LCPI3_0, %xmm0
movss LCPI3_1, %xmm1
andps LCPI3_2, %xmm1
orps %xmm0, %xmm1
movd %xmm1, %eax
ret
bitconverts can happen due to various calling conventions that require
fp values to passed in integer regs in some cases, e.g. when returning
a complex.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46414 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-27 17:42:27 +00:00
|
|
|
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
|
|
|
|
// constant pool values.
|
2009-10-20 21:37:45 +00:00
|
|
|
if (N0.getOpcode() == ISD::BIT_CONVERT &&
|
|
|
|
!VT.isVector() &&
|
|
|
|
N0.getNode()->hasOneUse() &&
|
|
|
|
N0.getOperand(0).getValueType().isInteger()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Int = N0.getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT IntVT = Int.getValueType();
|
2008-06-06 12:08:01 +00:00
|
|
|
if (IntVT.isInteger() && !IntVT.isVector()) {
|
2009-02-01 18:06:53 +00:00
|
|
|
Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
|
|
|
|
DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Int.getNode());
|
2009-01-30 23:15:49 +00:00
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
|
2009-10-20 21:37:45 +00:00
|
|
|
VT, Int);
|
Implement some dag combines that allow doing fneg/fabs/fcopysign in integer
registers if used by a bitconvert or using a bitconvert. This allows us to
avoid constant pool loads and use cheaper integer instructions when the
values come from or end up in integer regs anyway. For example, we now
compile CodeGen/X86/fp-in-intregs.ll to:
_test1:
movl $2147483648, %eax
xorl 4(%esp), %eax
ret
_test2:
movl $1065353216, %eax
orl 4(%esp), %eax
andl $3212836864, %eax
ret
Instead of:
_test1:
movss 4(%esp), %xmm0
xorps LCPI2_0, %xmm0
movd %xmm0, %eax
ret
_test2:
movss 4(%esp), %xmm0
andps LCPI3_0, %xmm0
movss LCPI3_1, %xmm1
andps LCPI3_2, %xmm1
orps %xmm0, %xmm1
movd %xmm1, %eax
ret
bitconverts can happen due to various calling conventions that require
fp values to passed in integer regs in some cases, e.g. when returning
a complex.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46414 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-27 17:42:27 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitFABS(SDNode *N) {
|
|
|
|
SDValue N0 = N->getOperand(0);
|
2006-01-18 22:35:16 +00:00
|
|
|
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (fabs c1) -> fabs(c1)
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N0CFP && VT != MVT::ppcf128)
|
2009-01-30 23:27:35 +00:00
|
|
|
return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (fabs (fabs x)) -> (fabs x)
|
2006-03-05 05:30:57 +00:00
|
|
|
if (N0.getOpcode() == ISD::FABS)
|
2005-09-06 04:43:02 +00:00
|
|
|
return N->getOperand(0);
|
2005-09-01 00:19:25 +00:00
|
|
|
// fold (fabs (fneg x)) -> (fabs x)
|
2006-03-05 05:30:57 +00:00
|
|
|
// fold (fabs (fcopysign x, y)) -> (fabs x)
|
|
|
|
if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
|
2009-01-30 23:27:35 +00:00
|
|
|
return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Implement some dag combines that allow doing fneg/fabs/fcopysign in integer
registers if used by a bitconvert or using a bitconvert. This allows us to
avoid constant pool loads and use cheaper integer instructions when the
values come from or end up in integer regs anyway. For example, we now
compile CodeGen/X86/fp-in-intregs.ll to:
_test1:
movl $2147483648, %eax
xorl 4(%esp), %eax
ret
_test2:
movl $1065353216, %eax
orl 4(%esp), %eax
andl $3212836864, %eax
ret
Instead of:
_test1:
movss 4(%esp), %xmm0
xorps LCPI2_0, %xmm0
movd %xmm0, %eax
ret
_test2:
movss 4(%esp), %xmm0
andps LCPI3_0, %xmm0
movss LCPI3_1, %xmm1
andps LCPI3_2, %xmm1
orps %xmm0, %xmm1
movd %xmm1, %eax
ret
bitconverts can happen due to various calling conventions that require
fp values to passed in integer regs in some cases, e.g. when returning
a complex.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46414 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-27 17:42:27 +00:00
|
|
|
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
|
|
|
|
// constant pool values.
|
2008-08-28 21:40:38 +00:00
|
|
|
if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
|
2008-06-06 12:08:01 +00:00
|
|
|
N0.getOperand(0).getValueType().isInteger() &&
|
|
|
|
!N0.getOperand(0).getValueType().isVector()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Int = N0.getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT IntVT = Int.getValueType();
|
2008-06-06 12:08:01 +00:00
|
|
|
if (IntVT.isInteger() && !IntVT.isVector()) {
|
2009-02-17 22:15:04 +00:00
|
|
|
Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
|
2009-02-01 18:06:53 +00:00
|
|
|
DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Int.getNode());
|
2009-01-30 23:27:35 +00:00
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
|
|
|
|
N->getValueType(0), Int);
|
Implement some dag combines that allow doing fneg/fabs/fcopysign in integer
registers if used by a bitconvert or using a bitconvert. This allows us to
avoid constant pool loads and use cheaper integer instructions when the
values come from or end up in integer regs anyway. For example, we now
compile CodeGen/X86/fp-in-intregs.ll to:
_test1:
movl $2147483648, %eax
xorl 4(%esp), %eax
ret
_test2:
movl $1065353216, %eax
orl 4(%esp), %eax
andl $3212836864, %eax
ret
Instead of:
_test1:
movss 4(%esp), %xmm0
xorps LCPI2_0, %xmm0
movd %xmm0, %eax
ret
_test2:
movss 4(%esp), %xmm0
andps LCPI3_0, %xmm0
movss LCPI3_1, %xmm1
andps LCPI3_2, %xmm1
orps %xmm0, %xmm1
movd %xmm1, %eax
ret
bitconverts can happen due to various calling conventions that require
fp values to passed in integer regs in some cases, e.g. when returning
a complex.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46414 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-27 17:42:27 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitBRCOND(SDNode *N) {
|
|
|
|
SDValue Chain = N->getOperand(0);
|
|
|
|
SDValue N1 = N->getOperand(1);
|
|
|
|
SDValue N2 = N->getOperand(2);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-11-17 00:47:23 +00:00
|
|
|
// If N is a constant we could fold this into a fallthrough or unconditional
|
|
|
|
// branch. However that doesn't happen very often in normal code, because
|
|
|
|
// Instcombine/SimplifyCFG should have handled the available opportunities.
|
|
|
|
// If we did this folding here, it would be necessary to update the
|
|
|
|
// MachineBasicBlock CFG, which is awkward.
|
|
|
|
|
2006-02-01 07:19:44 +00:00
|
|
|
// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
|
|
|
|
// on the target.
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N1.getOpcode() == ISD::SETCC &&
|
2009-08-11 20:47:22 +00:00
|
|
|
TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
|
|
|
|
return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
|
2009-01-30 23:27:35 +00:00
|
|
|
Chain, N1.getOperand(2),
|
2006-02-01 07:19:44 +00:00
|
|
|
N1.getOperand(0), N1.getOperand(1), N2);
|
|
|
|
}
|
2009-01-30 23:27:35 +00:00
|
|
|
|
2009-03-26 06:14:09 +00:00
|
|
|
if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
|
|
|
|
// Match this pattern so that we can generate simpler code:
|
|
|
|
//
|
|
|
|
// %a = ...
|
|
|
|
// %b = and i32 %a, 2
|
|
|
|
// %c = srl i32 %b, 1
|
|
|
|
// brcond i32 %c ...
|
|
|
|
//
|
|
|
|
// into
|
|
|
|
//
|
|
|
|
// %a = ...
|
|
|
|
// %b = and %a, 2
|
|
|
|
// %c = setcc eq %b, 0
|
|
|
|
// brcond %c ...
|
|
|
|
//
|
|
|
|
// This applies only when the AND constant value has one bit set and the
|
|
|
|
// SRL constant is equal to the log2 of the AND constant. The back-end is
|
|
|
|
// smart enough to convert the result into a TEST/JMP sequence.
|
|
|
|
SDValue Op0 = N1.getOperand(0);
|
|
|
|
SDValue Op1 = N1.getOperand(1);
|
|
|
|
|
|
|
|
if (Op0.getOpcode() == ISD::AND &&
|
|
|
|
Op0.hasOneUse() &&
|
|
|
|
Op1.getOpcode() == ISD::Constant) {
|
|
|
|
SDValue AndOp1 = Op0.getOperand(1);
|
|
|
|
|
|
|
|
if (AndOp1.getOpcode() == ISD::Constant) {
|
|
|
|
const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
|
|
|
|
|
|
|
|
if (AndConst.isPowerOf2() &&
|
|
|
|
cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
|
|
|
|
SDValue SetCC =
|
|
|
|
DAG.getSetCC(N->getDebugLoc(),
|
|
|
|
TLI.getSetCCResultType(Op0.getValueType()),
|
|
|
|
Op0, DAG.getConstant(0, Op0.getValueType()),
|
|
|
|
ISD::SETNE);
|
|
|
|
|
|
|
|
// Replace the uses of SRL with SETCC
|
|
|
|
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
|
|
|
|
removeFromWorkList(N1.getNode());
|
|
|
|
DAG.DeleteNode(N1.getNode());
|
|
|
|
return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
|
2009-08-11 20:47:22 +00:00
|
|
|
MVT::Other, Chain, SetCC, N2);
|
2009-03-26 06:14:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-19 22:34:01 +00:00
|
|
|
}
|
|
|
|
|
2005-10-05 06:47:48 +00:00
|
|
|
// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
|
|
|
|
//
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitBR_CC(SDNode *N) {
|
2005-10-05 06:47:48 +00:00
|
|
|
CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-11-17 00:47:23 +00:00
|
|
|
// If N is a constant we could fold this into a fallthrough or unconditional
|
|
|
|
// branch. However that doesn't happen very often in normal code, because
|
|
|
|
// Instcombine/SimplifyCFG should have handled the available opportunities.
|
|
|
|
// If we did this folding here, it would be necessary to update the
|
|
|
|
// MachineBasicBlock CFG, which is awkward.
|
|
|
|
|
2008-06-09 11:32:28 +00:00
|
|
|
// Use SimplifySetCC to simplify SETCC's.
|
2009-01-01 15:52:00 +00:00
|
|
|
SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
|
2009-02-03 00:47:48 +00:00
|
|
|
CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
|
|
|
|
false);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Simp.getNode()) AddToWorkList(Simp.getNode());
|
2006-10-14 03:52:46 +00:00
|
|
|
|
2005-10-05 21:43:42 +00:00
|
|
|
// fold to a simpler setcc
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
|
2009-08-11 20:47:22 +00:00
|
|
|
return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
|
2009-01-30 23:27:35 +00:00
|
|
|
N->getOperand(0), Simp.getOperand(2),
|
|
|
|
Simp.getOperand(0), Simp.getOperand(1),
|
|
|
|
N->getOperand(4));
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-19 22:34:01 +00:00
|
|
|
}
|
|
|
|
|
2008-06-15 20:12:31 +00:00
|
|
|
/// CombineToPreIndexedLoadStore - Try turning a load / store into a
|
|
|
|
/// pre-indexed load / store when the base pointer is an add or subtract
|
2006-11-11 00:39:41 +00:00
|
|
|
/// and it has other uses besides the load / store. After the
|
|
|
|
/// transformation, the new indexed load / store has effectively folded
|
|
|
|
/// the add / subtract in and all of its other uses are redirected to the
|
|
|
|
/// new load / store.
|
|
|
|
bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
|
2008-11-24 14:53:14 +00:00
|
|
|
if (!LegalOperations)
|
2006-11-11 00:39:41 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
bool isLoad = true;
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Ptr;
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT;
|
2006-11-11 00:39:41 +00:00
|
|
|
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
|
2008-01-17 19:59:44 +00:00
|
|
|
if (LD->isIndexed())
|
2006-12-16 06:25:23 +00:00
|
|
|
return false;
|
2008-01-30 00:15:11 +00:00
|
|
|
VT = LD->getMemoryVT();
|
2007-03-07 08:07:03 +00:00
|
|
|
if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
|
2006-11-11 00:39:41 +00:00
|
|
|
!TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
|
|
|
|
return false;
|
|
|
|
Ptr = LD->getBasePtr();
|
|
|
|
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
|
2008-01-17 19:59:44 +00:00
|
|
|
if (ST->isIndexed())
|
2006-12-16 06:25:23 +00:00
|
|
|
return false;
|
2008-01-30 00:15:11 +00:00
|
|
|
VT = ST->getMemoryVT();
|
2006-11-11 00:39:41 +00:00
|
|
|
if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
|
|
|
|
!TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
|
|
|
|
return false;
|
|
|
|
Ptr = ST->getBasePtr();
|
|
|
|
isLoad = false;
|
2009-01-30 23:27:35 +00:00
|
|
|
} else {
|
2006-11-11 00:39:41 +00:00
|
|
|
return false;
|
2009-01-30 23:27:35 +00:00
|
|
|
}
|
2006-11-11 00:39:41 +00:00
|
|
|
|
2006-11-11 00:56:29 +00:00
|
|
|
// If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
|
|
|
|
// out. There is no reason to make this a preinc/predec.
|
|
|
|
if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
|
2008-08-28 21:40:38 +00:00
|
|
|
Ptr.getNode()->hasOneUse())
|
2006-11-11 00:56:29 +00:00
|
|
|
return false;
|
2006-11-11 00:39:41 +00:00
|
|
|
|
2006-11-11 00:56:29 +00:00
|
|
|
// Ask the target to do addressing mode selection.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue BasePtr;
|
|
|
|
SDValue Offset;
|
2006-11-11 00:56:29 +00:00
|
|
|
ISD::MemIndexedMode AM = ISD::UNINDEXED;
|
|
|
|
if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
|
|
|
|
return false;
|
2007-05-03 23:52:19 +00:00
|
|
|
// Don't create a indexed load / store with zero offset.
|
|
|
|
if (isa<ConstantSDNode>(Offset) &&
|
2008-03-13 22:13:53 +00:00
|
|
|
cast<ConstantSDNode>(Offset)->isNullValue())
|
2007-05-03 23:52:19 +00:00
|
|
|
return false;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-11-11 01:00:15 +00:00
|
|
|
// Try turning it into a pre-indexed load / store except when:
|
2007-05-24 02:35:39 +00:00
|
|
|
// 1) The new base ptr is a frame index.
|
|
|
|
// 2) If N is a store and the new base ptr is either the same as or is a
|
2006-11-11 00:56:29 +00:00
|
|
|
// predecessor of the value being stored.
|
2007-05-24 02:35:39 +00:00
|
|
|
// 3) Another use of old base ptr is a predecessor of N. If ptr is folded
|
2006-11-11 00:56:29 +00:00
|
|
|
// that would create a cycle.
|
2007-05-24 02:35:39 +00:00
|
|
|
// 4) All uses are load / store ops that use it as old base ptr.
|
2006-11-11 00:56:29 +00:00
|
|
|
|
2006-11-11 01:00:15 +00:00
|
|
|
// Check #1. Preinc'ing a frame index would require copying the stack pointer
|
|
|
|
// (plus the implicit offset) to a register to preinc anyway.
|
2009-05-06 18:25:01 +00:00
|
|
|
if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
|
2006-11-11 01:00:15 +00:00
|
|
|
return false;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-11-11 01:00:15 +00:00
|
|
|
// Check #2.
|
2006-11-11 00:56:29 +00:00
|
|
|
if (!isLoad) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Val = cast<StoreSDNode>(N)->getValue();
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
|
2006-11-11 00:56:29 +00:00
|
|
|
return false;
|
|
|
|
}
|
2006-11-11 00:39:41 +00:00
|
|
|
|
2007-05-24 02:35:39 +00:00
|
|
|
// Now check for #3 and #4.
|
2006-11-11 00:56:29 +00:00
|
|
|
bool RealUse = false;
|
2008-08-28 21:40:38 +00:00
|
|
|
for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
|
|
|
|
E = Ptr.getNode()->use_end(); I != E; ++I) {
|
2008-07-27 20:43:25 +00:00
|
|
|
SDNode *Use = *I;
|
2006-11-11 00:56:29 +00:00
|
|
|
if (Use == N)
|
|
|
|
continue;
|
2008-03-04 00:41:45 +00:00
|
|
|
if (Use->isPredecessorOf(N))
|
2006-11-11 00:56:29 +00:00
|
|
|
return false;
|
2006-11-11 00:39:41 +00:00
|
|
|
|
2006-11-11 00:56:29 +00:00
|
|
|
if (!((Use->getOpcode() == ISD::LOAD &&
|
|
|
|
cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
|
2008-02-20 11:10:28 +00:00
|
|
|
(Use->getOpcode() == ISD::STORE &&
|
|
|
|
cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
|
2006-11-11 00:56:29 +00:00
|
|
|
RealUse = true;
|
2006-11-11 00:39:41 +00:00
|
|
|
}
|
2009-01-30 23:27:35 +00:00
|
|
|
|
2006-11-11 00:56:29 +00:00
|
|
|
if (!RealUse)
|
|
|
|
return false;
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Result;
|
2006-11-11 00:56:29 +00:00
|
|
|
if (isLoad)
|
2009-01-30 23:27:35 +00:00
|
|
|
Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
|
|
|
|
BasePtr, Offset, AM);
|
2006-11-11 00:56:29 +00:00
|
|
|
else
|
2009-01-30 23:27:35 +00:00
|
|
|
Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
|
|
|
|
BasePtr, Offset, AM);
|
2006-11-11 00:56:29 +00:00
|
|
|
++PreIndexedNodes;
|
|
|
|
++NodesCombined;
|
2009-08-23 06:35:02 +00:00
|
|
|
DEBUG(errs() << "\nReplacing.4 ";
|
|
|
|
N->dump(&DAG);
|
|
|
|
errs() << "\nWith: ";
|
|
|
|
Result.getNode()->dump(&DAG);
|
|
|
|
errs() << '\n');
|
2008-02-03 06:49:24 +00:00
|
|
|
WorkListRemover DeadNodes(*this);
|
2006-11-11 00:56:29 +00:00
|
|
|
if (isLoad) {
|
2008-07-27 21:46:04 +00:00
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
|
2008-02-03 06:49:24 +00:00
|
|
|
&DeadNodes);
|
2008-07-27 21:46:04 +00:00
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
|
2008-02-03 06:49:24 +00:00
|
|
|
&DeadNodes);
|
2006-11-11 00:56:29 +00:00
|
|
|
} else {
|
2008-07-27 21:46:04 +00:00
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
|
2008-02-03 06:49:24 +00:00
|
|
|
&DeadNodes);
|
2006-11-11 00:56:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Finally, since the node is now dead, remove it from the graph.
|
|
|
|
DAG.DeleteNode(N);
|
|
|
|
|
|
|
|
// Replace the uses of Ptr with uses of the updated base value.
|
|
|
|
DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
|
2008-02-03 06:49:24 +00:00
|
|
|
&DeadNodes);
|
2008-08-28 21:40:38 +00:00
|
|
|
removeFromWorkList(Ptr.getNode());
|
|
|
|
DAG.DeleteNode(Ptr.getNode());
|
2006-11-11 00:56:29 +00:00
|
|
|
|
|
|
|
return true;
|
2006-11-11 00:39:41 +00:00
|
|
|
}
|
|
|
|
|
2008-06-15 20:12:31 +00:00
|
|
|
/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
|
2006-11-11 00:39:41 +00:00
|
|
|
/// add / sub of the base pointer node into a post-indexed load / store.
|
|
|
|
/// The transformation folded the add / subtract into the new indexed
|
|
|
|
/// load / store effectively and all of its uses are redirected to the
|
|
|
|
/// new load / store.
|
|
|
|
bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
|
2008-11-24 14:53:14 +00:00
|
|
|
if (!LegalOperations)
|
2006-11-11 00:39:41 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
bool isLoad = true;
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Ptr;
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT;
|
2006-11-11 00:39:41 +00:00
|
|
|
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
|
2008-01-17 19:59:44 +00:00
|
|
|
if (LD->isIndexed())
|
2006-12-16 06:25:23 +00:00
|
|
|
return false;
|
2008-01-30 00:15:11 +00:00
|
|
|
VT = LD->getMemoryVT();
|
2006-11-11 00:39:41 +00:00
|
|
|
if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
|
|
|
|
!TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
|
|
|
|
return false;
|
|
|
|
Ptr = LD->getBasePtr();
|
|
|
|
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
|
2008-01-17 19:59:44 +00:00
|
|
|
if (ST->isIndexed())
|
2006-12-16 06:25:23 +00:00
|
|
|
return false;
|
2008-01-30 00:15:11 +00:00
|
|
|
VT = ST->getMemoryVT();
|
2006-11-11 00:39:41 +00:00
|
|
|
if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
|
|
|
|
!TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
|
|
|
|
return false;
|
|
|
|
Ptr = ST->getBasePtr();
|
|
|
|
isLoad = false;
|
2009-01-30 23:27:35 +00:00
|
|
|
} else {
|
2006-11-11 00:39:41 +00:00
|
|
|
return false;
|
2009-01-30 23:27:35 +00:00
|
|
|
}
|
2006-11-11 00:39:41 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Ptr.getNode()->hasOneUse())
|
2006-11-11 00:56:29 +00:00
|
|
|
return false;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
|
|
|
|
E = Ptr.getNode()->use_end(); I != E; ++I) {
|
2008-07-27 20:43:25 +00:00
|
|
|
SDNode *Op = *I;
|
2006-11-11 00:56:29 +00:00
|
|
|
if (Op == N ||
|
|
|
|
(Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
|
|
|
|
continue;
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue BasePtr;
|
|
|
|
SDValue Offset;
|
2006-11-11 00:56:29 +00:00
|
|
|
ISD::MemIndexedMode AM = ISD::UNINDEXED;
|
|
|
|
if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
|
2009-09-10 22:09:31 +00:00
|
|
|
if (Ptr == Offset && Op->getOpcode() == ISD::ADD)
|
2006-11-11 00:56:29 +00:00
|
|
|
std::swap(BasePtr, Offset);
|
|
|
|
if (Ptr != BasePtr)
|
2006-11-11 00:39:41 +00:00
|
|
|
continue;
|
2007-05-03 23:52:19 +00:00
|
|
|
// Don't create a indexed load / store with zero offset.
|
|
|
|
if (isa<ConstantSDNode>(Offset) &&
|
2008-03-13 22:13:53 +00:00
|
|
|
cast<ConstantSDNode>(Offset)->isNullValue())
|
2007-05-03 23:52:19 +00:00
|
|
|
continue;
|
2006-11-11 00:39:41 +00:00
|
|
|
|
2006-11-11 00:56:29 +00:00
|
|
|
// Try turning it into a post-indexed load / store except when
|
|
|
|
// 1) All uses are load / store ops that use it as base ptr.
|
|
|
|
// 2) Op must be independent of N, i.e. Op is neither a predecessor
|
|
|
|
// nor a successor of N. Otherwise, if Op is folded that would
|
|
|
|
// create a cycle.
|
|
|
|
|
2009-05-06 18:25:01 +00:00
|
|
|
if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
|
|
|
|
continue;
|
|
|
|
|
2006-11-11 00:56:29 +00:00
|
|
|
// Check for #1.
|
|
|
|
bool TryNext = false;
|
2008-08-28 21:40:38 +00:00
|
|
|
for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
|
|
|
|
EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
|
2008-07-27 20:43:25 +00:00
|
|
|
SDNode *Use = *II;
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Use == Ptr.getNode())
|
2006-11-11 00:39:41 +00:00
|
|
|
continue;
|
|
|
|
|
2006-11-11 00:56:29 +00:00
|
|
|
// If all the uses are load / store addresses, then don't do the
|
|
|
|
// transformation.
|
|
|
|
if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
|
|
|
|
bool RealUse = false;
|
|
|
|
for (SDNode::use_iterator III = Use->use_begin(),
|
|
|
|
EEE = Use->use_end(); III != EEE; ++III) {
|
2008-07-27 20:43:25 +00:00
|
|
|
SDNode *UseUse = *III;
|
2006-11-11 00:56:29 +00:00
|
|
|
if (!((UseUse->getOpcode() == ISD::LOAD &&
|
2008-08-28 21:40:38 +00:00
|
|
|
cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
|
2008-02-20 11:10:28 +00:00
|
|
|
(UseUse->getOpcode() == ISD::STORE &&
|
2008-08-28 21:40:38 +00:00
|
|
|
cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
|
2006-11-11 00:56:29 +00:00
|
|
|
RealUse = true;
|
2006-11-11 00:39:41 +00:00
|
|
|
}
|
|
|
|
|
2006-11-11 00:56:29 +00:00
|
|
|
if (!RealUse) {
|
|
|
|
TryNext = true;
|
|
|
|
break;
|
2006-11-11 00:39:41 +00:00
|
|
|
}
|
2006-11-11 00:56:29 +00:00
|
|
|
}
|
|
|
|
}
|
2009-01-30 23:27:35 +00:00
|
|
|
|
2006-11-11 00:56:29 +00:00
|
|
|
if (TryNext)
|
|
|
|
continue;
|
2006-11-11 00:39:41 +00:00
|
|
|
|
2006-11-11 00:56:29 +00:00
|
|
|
// Check for #2
|
2008-03-04 00:41:45 +00:00
|
|
|
if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Result = isLoad
|
2009-01-30 23:27:35 +00:00
|
|
|
? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
|
|
|
|
BasePtr, Offset, AM)
|
|
|
|
: DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
|
|
|
|
BasePtr, Offset, AM);
|
2006-11-11 00:56:29 +00:00
|
|
|
++PostIndexedNodes;
|
|
|
|
++NodesCombined;
|
2009-08-23 06:35:02 +00:00
|
|
|
DEBUG(errs() << "\nReplacing.5 ";
|
|
|
|
N->dump(&DAG);
|
|
|
|
errs() << "\nWith: ";
|
|
|
|
Result.getNode()->dump(&DAG);
|
|
|
|
errs() << '\n');
|
2008-02-03 06:49:24 +00:00
|
|
|
WorkListRemover DeadNodes(*this);
|
2006-11-11 00:56:29 +00:00
|
|
|
if (isLoad) {
|
2008-07-27 21:46:04 +00:00
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
|
2008-02-03 06:49:24 +00:00
|
|
|
&DeadNodes);
|
2008-07-27 21:46:04 +00:00
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
|
2008-02-03 06:49:24 +00:00
|
|
|
&DeadNodes);
|
2006-11-11 00:56:29 +00:00
|
|
|
} else {
|
2008-07-27 21:46:04 +00:00
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
|
2008-02-03 06:49:24 +00:00
|
|
|
&DeadNodes);
|
2006-11-11 00:39:41 +00:00
|
|
|
}
|
2006-11-11 00:56:29 +00:00
|
|
|
|
|
|
|
// Finally, since the node is now dead, remove it from the graph.
|
|
|
|
DAG.DeleteNode(N);
|
|
|
|
|
|
|
|
// Replace the uses of Use with uses of the updated base value.
|
2008-07-27 21:46:04 +00:00
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
|
2006-11-11 00:56:29 +00:00
|
|
|
Result.getValue(isLoad ? 1 : 0),
|
2008-02-03 06:49:24 +00:00
|
|
|
&DeadNodes);
|
2006-11-11 00:56:29 +00:00
|
|
|
removeFromWorkList(Op);
|
|
|
|
DAG.DeleteNode(Op);
|
|
|
|
return true;
|
2006-11-11 00:39:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-01-30 23:27:35 +00:00
|
|
|
|
2006-11-11 00:39:41 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitLOAD(SDNode *N) {
|
2006-10-09 20:57:25 +00:00
|
|
|
LoadSDNode *LD = cast<LoadSDNode>(N);
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Chain = LD->getChain();
|
|
|
|
SDValue Ptr = LD->getBasePtr();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-01-25 07:20:16 +00:00
|
|
|
// Try to infer better alignment information than the load already has.
|
2009-04-29 23:29:43 +00:00
|
|
|
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
|
2009-12-09 01:04:59 +00:00
|
|
|
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
|
2008-01-25 07:20:16 +00:00
|
|
|
if (Align > LD->getAlignment())
|
2009-01-30 23:27:35 +00:00
|
|
|
return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
|
|
|
|
LD->getValueType(0),
|
2008-01-25 07:20:16 +00:00
|
|
|
Chain, Ptr, LD->getSrcValue(),
|
2008-01-30 00:15:11 +00:00
|
|
|
LD->getSrcValueOffset(), LD->getMemoryVT(),
|
2008-01-25 07:20:16 +00:00
|
|
|
LD->isVolatile(), Align);
|
|
|
|
}
|
|
|
|
}
|
2007-05-01 00:38:21 +00:00
|
|
|
|
|
|
|
// If load is not volatile and there are no uses of the loaded value (and
|
|
|
|
// the updated indexed value in case of indexed loads), change uses of the
|
|
|
|
// chain value into uses of the chain input (i.e. delete the dead load).
|
|
|
|
if (!LD->isVolatile()) {
|
2009-08-11 20:47:22 +00:00
|
|
|
if (N->getValueType(1) == MVT::Other) {
|
2007-05-01 08:53:39 +00:00
|
|
|
// Unindexed loads.
|
Fixes a nasty dag combiner bug that causes a bunch of tests to fail at -O0.
It's not safe to use the two value CombineTo variant to combine away a dead load.
e.g.
v1, chain2 = load chain1, loc
v2, chain3 = load chain2, loc
v3 = add v2, c
Now we replace use of v1 with undef, use of chain2 with chain1.
ReplaceAllUsesWith() will iterate through uses of the first load and update operands:
v1, chain2 = load chain1, loc
v2, chain3 = load chain1, loc
v3 = add v2, c
Now the second load is the same as the first load, SelectionDAG cse will ensure
the use of second load is replaced with the first load.
v1, chain2 = load chain1, loc
v3 = add v1, c
Then v1 is replaced with undef and bad things happen.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46099 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-16 23:11:54 +00:00
|
|
|
if (N->hasNUsesOfValue(0, 0)) {
|
|
|
|
// It's not safe to use the two value CombineTo variant here. e.g.
|
|
|
|
// v1, chain2 = load chain1, loc
|
|
|
|
// v2, chain3 = load chain2, loc
|
|
|
|
// v3 = add v2, c
|
2008-01-24 07:57:06 +00:00
|
|
|
// Now we replace use of chain2 with chain1. This makes the second load
|
|
|
|
// isomorphic to the one we are deleting, and thus makes this load live.
|
2009-08-23 06:35:02 +00:00
|
|
|
DEBUG(errs() << "\nReplacing.6 ";
|
|
|
|
N->dump(&DAG);
|
|
|
|
errs() << "\nWith chain: ";
|
|
|
|
Chain.getNode()->dump(&DAG);
|
|
|
|
errs() << "\n");
|
2008-02-03 06:49:24 +00:00
|
|
|
WorkListRemover DeadNodes(*this);
|
2008-07-27 21:46:04 +00:00
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
|
2009-01-30 23:27:35 +00:00
|
|
|
|
2008-01-24 07:57:06 +00:00
|
|
|
if (N->use_empty()) {
|
|
|
|
removeFromWorkList(N);
|
|
|
|
DAG.DeleteNode(N);
|
|
|
|
}
|
2009-01-30 23:27:35 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
Fixes a nasty dag combiner bug that causes a bunch of tests to fail at -O0.
It's not safe to use the two value CombineTo variant to combine away a dead load.
e.g.
v1, chain2 = load chain1, loc
v2, chain3 = load chain2, loc
v3 = add v2, c
Now we replace use of v1 with undef, use of chain2 with chain1.
ReplaceAllUsesWith() will iterate through uses of the first load and update operands:
v1, chain2 = load chain1, loc
v2, chain3 = load chain1, loc
v3 = add v2, c
Now the second load is the same as the first load, SelectionDAG cse will ensure
the use of second load is replaced with the first load.
v1, chain2 = load chain1, loc
v3 = add v1, c
Then v1 is replaced with undef and bad things happen.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46099 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-16 23:11:54 +00:00
|
|
|
}
|
2007-05-01 08:53:39 +00:00
|
|
|
} else {
|
|
|
|
// Indexed loads.
|
2009-08-11 20:47:22 +00:00
|
|
|
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
|
2007-05-01 08:53:39 +00:00
|
|
|
if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
|
2009-02-06 23:05:02 +00:00
|
|
|
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
|
2009-08-23 06:35:02 +00:00
|
|
|
DEBUG(errs() << "\nReplacing.6 ";
|
|
|
|
N->dump(&DAG);
|
|
|
|
errs() << "\nWith: ";
|
|
|
|
Undef.getNode()->dump(&DAG);
|
|
|
|
errs() << " and 2 other values\n");
|
2008-02-03 06:49:24 +00:00
|
|
|
WorkListRemover DeadNodes(*this);
|
2008-07-27 21:46:04 +00:00
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
|
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
|
2009-02-06 23:05:02 +00:00
|
|
|
DAG.getUNDEF(N->getValueType(1)),
|
2008-02-03 06:49:24 +00:00
|
|
|
&DeadNodes);
|
2008-07-27 21:46:04 +00:00
|
|
|
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
|
Fixes a nasty dag combiner bug that causes a bunch of tests to fail at -O0.
It's not safe to use the two value CombineTo variant to combine away a dead load.
e.g.
v1, chain2 = load chain1, loc
v2, chain3 = load chain2, loc
v3 = add v2, c
Now we replace use of v1 with undef, use of chain2 with chain1.
ReplaceAllUsesWith() will iterate through uses of the first load and update operands:
v1, chain2 = load chain1, loc
v2, chain3 = load chain1, loc
v3 = add v2, c
Now the second load is the same as the first load, SelectionDAG cse will ensure
the use of second load is replaced with the first load.
v1, chain2 = load chain1, loc
v3 = add v1, c
Then v1 is replaced with undef and bad things happen.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46099 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-16 23:11:54 +00:00
|
|
|
removeFromWorkList(N);
|
|
|
|
DAG.DeleteNode(N);
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
2007-05-01 00:38:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Add support for CombineTo, allowing the dag combiner to replace nodes with
multiple results.
Use this support to implement trivial store->load forwarding, implementing
CodeGen/PowerPC/store-load-fwd.ll. Though this is the most simple case and
can be extended in the future, it is still useful. For example, it speeds
up 197.parser by 6.2% by avoiding an LSU reject in xalloc:
stw r6, lo16(l5_end_of_array)(r2)
addi r2, r5, -4
stwx r5, r4, r2
- lwzx r5, r4, r2
- rlwinm r5, r5, 0, 0, 30
stwx r5, r4, r2
lwz r2, -4(r4)
ori r2, r2, 1
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23690 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-10 22:04:48 +00:00
|
|
|
// If this load is directly stored, replace the load value with the stored
|
|
|
|
// value.
|
|
|
|
// TODO: Handle store large -> read small portion.
|
2006-10-11 17:47:52 +00:00
|
|
|
// TODO: Handle TRUNCSTORE/LOADEXT
|
2008-03-31 20:32:52 +00:00
|
|
|
if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
|
|
|
|
!LD->isVolatile()) {
|
2008-08-28 21:40:38 +00:00
|
|
|
if (ISD::isNON_TRUNCStore(Chain.getNode())) {
|
2006-10-13 21:14:26 +00:00
|
|
|
StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
|
|
|
|
if (PrevST->getBasePtr() == Ptr &&
|
|
|
|
PrevST->getValue().getValueType() == N->getValueType(0))
|
2006-10-11 17:47:52 +00:00
|
|
|
return CombineTo(N, Chain.getOperand(1), Chain);
|
2006-10-13 21:14:26 +00:00
|
|
|
}
|
2006-10-11 17:47:52 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-11 13:47:09 +00:00
|
|
|
if (CombinerAA) {
|
2006-09-25 16:29:54 +00:00
|
|
|
// Walk up chain skipping non-aliasing memory nodes.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue BetterChain = FindBetterChain(N, Chain);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
// If there is a better chain.
|
2006-09-25 16:29:54 +00:00
|
|
|
if (Chain != BetterChain) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue ReplLoad;
|
2006-10-11 17:47:52 +00:00
|
|
|
|
2006-09-25 16:29:54 +00:00
|
|
|
// Replace the chain to void dependency.
|
2006-10-11 17:47:52 +00:00
|
|
|
if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
|
2009-01-30 23:27:35 +00:00
|
|
|
ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
|
|
|
|
BetterChain, Ptr,
|
2007-10-28 12:59:45 +00:00
|
|
|
LD->getSrcValue(), LD->getSrcValueOffset(),
|
|
|
|
LD->isVolatile(), LD->getAlignment());
|
2006-10-11 17:47:52 +00:00
|
|
|
} else {
|
2009-01-30 23:27:35 +00:00
|
|
|
ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
|
2006-10-11 17:47:52 +00:00
|
|
|
LD->getValueType(0),
|
|
|
|
BetterChain, Ptr, LD->getSrcValue(),
|
|
|
|
LD->getSrcValueOffset(),
|
2008-01-30 00:15:11 +00:00
|
|
|
LD->getMemoryVT(),
|
2009-02-17 22:15:04 +00:00
|
|
|
LD->isVolatile(),
|
2007-04-22 23:15:30 +00:00
|
|
|
LD->getAlignment());
|
2006-10-11 17:47:52 +00:00
|
|
|
}
|
2006-09-25 16:29:54 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
// Create token factor to keep old chain connected.
|
2009-01-30 23:27:35 +00:00
|
|
|
SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
|
2009-08-11 20:47:22 +00:00
|
|
|
MVT::Other, Chain, ReplLoad.getValue(1));
|
2009-09-15 00:18:30 +00:00
|
|
|
|
|
|
|
// Make sure the new and old chains are cleaned up.
|
|
|
|
AddToWorkList(Token.getNode());
|
|
|
|
|
2006-10-13 23:32:28 +00:00
|
|
|
// Replace uses with load result and token factor. Don't add users
|
|
|
|
// to work list.
|
|
|
|
return CombineTo(N, ReplLoad.getValue(0), Token, false);
|
2006-09-25 16:29:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-11-03 03:06:21 +00:00
|
|
|
// Try transforming N to an indexed load.
|
2006-11-07 09:03:05 +00:00
|
|
|
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0);
|
2006-11-03 03:06:21 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
Add support for CombineTo, allowing the dag combiner to replace nodes with
multiple results.
Use this support to implement trivial store->load forwarding, implementing
CodeGen/PowerPC/store-load-fwd.ll. Though this is the most simple case and
can be extended in the future, it is still useful. For example, it speeds
up 197.parser by 6.2% by avoiding an LSU reject in xalloc:
stw r6, lo16(l5_end_of_array)(r2)
addi r2, r5, -4
stwx r5, r4, r2
- lwzx r5, r4, r2
- rlwinm r5, r5, 0, 0, 30
stwx r5, r4, r2
lwz r2, -4(r4)
ori r2, r2, 1
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23690 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-10 22:04:48 +00:00
|
|
|
}
|
|
|
|
|
2009-05-28 00:35:15 +00:00
|
|
|
|
|
|
|
/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
|
|
|
|
/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
|
|
|
|
/// of the loaded bits, try narrowing the load and store if it would end up
|
|
|
|
/// being a win for performance or code size.
|
|
|
|
SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
|
|
|
|
StoreSDNode *ST = cast<StoreSDNode>(N);
|
2009-05-28 18:41:02 +00:00
|
|
|
if (ST->isVolatile())
|
|
|
|
return SDValue();
|
|
|
|
|
2009-05-28 00:35:15 +00:00
|
|
|
SDValue Chain = ST->getChain();
|
|
|
|
SDValue Value = ST->getValue();
|
|
|
|
SDValue Ptr = ST->getBasePtr();
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = Value.getValueType();
|
2009-05-28 00:35:15 +00:00
|
|
|
|
|
|
|
if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
|
2009-05-28 18:41:02 +00:00
|
|
|
return SDValue();
|
2009-05-28 00:35:15 +00:00
|
|
|
|
|
|
|
unsigned Opc = Value.getOpcode();
|
|
|
|
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
|
|
|
|
Value.getOperand(1).getOpcode() != ISD::Constant)
|
2009-05-28 18:41:02 +00:00
|
|
|
return SDValue();
|
2009-05-28 00:35:15 +00:00
|
|
|
|
|
|
|
SDValue N0 = Value.getOperand(0);
|
|
|
|
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
|
|
|
|
LoadSDNode *LD = cast<LoadSDNode>(N0);
|
2009-05-28 18:41:02 +00:00
|
|
|
if (LD->getBasePtr() != Ptr)
|
|
|
|
return SDValue();
|
2009-05-28 00:35:15 +00:00
|
|
|
|
|
|
|
// Find the type to narrow it the load / op / store to.
|
|
|
|
SDValue N1 = Value.getOperand(1);
|
|
|
|
unsigned BitWidth = N1.getValueSizeInBits();
|
|
|
|
APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
|
|
|
|
if (Opc == ISD::AND)
|
|
|
|
Imm ^= APInt::getAllOnesValue(BitWidth);
|
2009-05-28 23:52:18 +00:00
|
|
|
if (Imm == 0 || Imm.isAllOnesValue())
|
|
|
|
return SDValue();
|
2009-05-28 00:35:15 +00:00
|
|
|
unsigned ShAmt = Imm.countTrailingZeros();
|
|
|
|
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
|
|
|
|
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
|
2009-08-12 00:36:31 +00:00
|
|
|
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
|
2009-05-28 00:35:15 +00:00
|
|
|
while (NewBW < BitWidth &&
|
2009-05-28 18:41:02 +00:00
|
|
|
!(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
|
2009-05-28 00:35:15 +00:00
|
|
|
TLI.isNarrowingProfitable(VT, NewVT))) {
|
|
|
|
NewBW = NextPowerOf2(NewBW);
|
2009-08-12 00:36:31 +00:00
|
|
|
NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
|
2009-05-28 00:35:15 +00:00
|
|
|
}
|
2009-05-28 18:41:02 +00:00
|
|
|
if (NewBW >= BitWidth)
|
|
|
|
return SDValue();
|
2009-05-28 00:35:15 +00:00
|
|
|
|
|
|
|
// If the lsb changed does not start at the type bitwidth boundary,
|
|
|
|
// start at the previous one.
|
|
|
|
if (ShAmt % NewBW)
|
|
|
|
ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
|
|
|
|
APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
|
|
|
|
if ((Imm & Mask) == Imm) {
|
|
|
|
APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
|
|
|
|
if (Opc == ISD::AND)
|
|
|
|
NewImm ^= APInt::getAllOnesValue(NewBW);
|
|
|
|
uint64_t PtrOff = ShAmt / 8;
|
|
|
|
// For big endian targets, we need to adjust the offset to the pointer to
|
|
|
|
// load the correct bytes.
|
|
|
|
if (TLI.isBigEndian())
|
2009-05-28 18:41:02 +00:00
|
|
|
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
|
2009-05-28 00:35:15 +00:00
|
|
|
|
|
|
|
unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
|
2009-05-28 18:41:02 +00:00
|
|
|
if (NewAlign <
|
2009-08-12 00:36:31 +00:00
|
|
|
TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
|
2009-05-28 18:41:02 +00:00
|
|
|
return SDValue();
|
|
|
|
|
2009-05-28 00:35:15 +00:00
|
|
|
SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
|
|
|
|
Ptr.getValueType(), Ptr,
|
|
|
|
DAG.getConstant(PtrOff, Ptr.getValueType()));
|
|
|
|
SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
|
|
|
|
LD->getChain(), NewPtr,
|
|
|
|
LD->getSrcValue(), LD->getSrcValueOffset(),
|
|
|
|
LD->isVolatile(), NewAlign);
|
|
|
|
SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
|
|
|
|
DAG.getConstant(NewImm, NewVT));
|
|
|
|
SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
|
|
|
|
NewVal, NewPtr,
|
|
|
|
ST->getSrcValue(), ST->getSrcValueOffset(),
|
2009-05-28 18:41:02 +00:00
|
|
|
false, NewAlign);
|
2009-05-28 00:35:15 +00:00
|
|
|
|
|
|
|
AddToWorkList(NewPtr.getNode());
|
|
|
|
AddToWorkList(NewLD.getNode());
|
|
|
|
AddToWorkList(NewVal.getNode());
|
|
|
|
WorkListRemover DeadNodes(*this);
|
|
|
|
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
|
|
|
|
&DeadNodes);
|
|
|
|
++OpsNarrowed;
|
|
|
|
return NewST;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-28 18:41:02 +00:00
|
|
|
return SDValue();
|
2009-05-28 00:35:15 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitSTORE(SDNode *N) {
|
2006-10-13 21:14:26 +00:00
|
|
|
StoreSDNode *ST = cast<StoreSDNode>(N);
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Chain = ST->getChain();
|
|
|
|
SDValue Value = ST->getValue();
|
|
|
|
SDValue Ptr = ST->getBasePtr();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-01-25 07:20:16 +00:00
|
|
|
// Try to infer better alignment information than the store already has.
|
2009-04-29 23:29:43 +00:00
|
|
|
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
|
2009-12-09 01:04:59 +00:00
|
|
|
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
|
2008-01-25 07:20:16 +00:00
|
|
|
if (Align > ST->getAlignment())
|
2009-01-30 23:36:47 +00:00
|
|
|
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
|
|
|
|
Ptr, ST->getSrcValue(),
|
2008-01-30 00:15:11 +00:00
|
|
|
ST->getSrcValueOffset(), ST->getMemoryVT(),
|
2008-01-25 07:20:16 +00:00
|
|
|
ST->isVolatile(), Align);
|
|
|
|
}
|
|
|
|
}
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
|
2007-05-07 21:27:48 +00:00
|
|
|
// If this is a store of a bit convert, store the input value if the
|
2007-05-09 21:49:47 +00:00
|
|
|
// resultant store does not need a higher alignment than the original.
|
2007-05-16 22:45:30 +00:00
|
|
|
if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
|
2008-01-17 19:59:44 +00:00
|
|
|
ST->isUnindexed()) {
|
2009-02-20 23:29:13 +00:00
|
|
|
unsigned OrigAlign = ST->getAlignment();
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT SVT = Value.getOperand(0).getValueType();
|
2009-02-20 23:29:13 +00:00
|
|
|
unsigned Align = TLI.getTargetData()->
|
2009-08-12 00:36:31 +00:00
|
|
|
getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
if (Align <= OrigAlign &&
|
2008-11-24 14:53:14 +00:00
|
|
|
((!LegalOperations && !ST->isVolatile()) ||
|
2009-01-28 17:46:25 +00:00
|
|
|
TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
|
2009-01-30 23:36:47 +00:00
|
|
|
return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
|
|
|
|
Ptr, ST->getSrcValue(),
|
2008-06-28 00:45:22 +00:00
|
|
|
ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);
|
2006-09-25 16:29:54 +00:00
|
|
|
}
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
|
2006-12-11 02:23:46 +00:00
|
|
|
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
|
|
|
|
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
// NOTE: If the original store is volatile, this transform must not increase
|
|
|
|
// the number of stores. For example, on x86-32 an f64 can be stored in one
|
|
|
|
// processor operation but an i64 (which is not legal) requires two. So the
|
|
|
|
// transform should not be done in this case.
|
2006-12-11 17:25:19 +00:00
|
|
|
if (Value.getOpcode() != ISD::TargetConstantFP) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Tmp;
|
2009-08-11 20:47:22 +00:00
|
|
|
switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
|
2009-07-14 16:55:14 +00:00
|
|
|
default: llvm_unreachable("Unknown FP type");
|
2009-08-11 20:47:22 +00:00
|
|
|
case MVT::f80: // We don't do this for these yet.
|
|
|
|
case MVT::f128:
|
|
|
|
case MVT::ppcf128:
|
2007-09-18 18:36:59 +00:00
|
|
|
break;
|
2009-08-11 20:47:22 +00:00
|
|
|
case MVT::f32:
|
|
|
|
if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations &&
|
2009-01-28 17:46:25 +00:00
|
|
|
!ST->isVolatile()) ||
|
2009-08-11 20:47:22 +00:00
|
|
|
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
|
2007-09-12 03:30:33 +00:00
|
|
|
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
|
2009-08-11 20:47:22 +00:00
|
|
|
bitcastToAPInt().getZExtValue(), MVT::i32);
|
2009-01-30 23:36:47 +00:00
|
|
|
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
|
|
|
|
Ptr, ST->getSrcValue(),
|
2007-07-09 22:18:38 +00:00
|
|
|
ST->getSrcValueOffset(), ST->isVolatile(),
|
|
|
|
ST->getAlignment());
|
2006-12-12 04:16:14 +00:00
|
|
|
}
|
|
|
|
break;
|
2009-08-11 20:47:22 +00:00
|
|
|
case MVT::f64:
|
|
|
|
if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations &&
|
2009-01-28 17:46:25 +00:00
|
|
|
!ST->isVolatile()) ||
|
2009-08-11 20:47:22 +00:00
|
|
|
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
|
2008-10-09 18:53:47 +00:00
|
|
|
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
|
2009-08-11 20:47:22 +00:00
|
|
|
getZExtValue(), MVT::i64);
|
2009-01-30 23:36:47 +00:00
|
|
|
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
|
|
|
|
Ptr, ST->getSrcValue(),
|
2007-07-09 22:18:38 +00:00
|
|
|
ST->getSrcValueOffset(), ST->isVolatile(),
|
|
|
|
ST->getAlignment());
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
} else if (!ST->isVolatile() &&
|
2009-08-11 20:47:22 +00:00
|
|
|
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
|
2007-10-28 12:59:45 +00:00
|
|
|
// Many FP stores are not made apparent until after legalize, e.g. for
|
2006-12-12 04:16:14 +00:00
|
|
|
// argument passing. Since this is so common, custom legalize the
|
|
|
|
// 64-bit integer store into two 32-bit stores.
|
2008-10-09 18:53:47 +00:00
|
|
|
uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
|
2009-08-11 20:47:22 +00:00
|
|
|
SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
|
|
|
|
SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
|
2008-02-11 10:37:04 +00:00
|
|
|
if (TLI.isBigEndian()) std::swap(Lo, Hi);
|
2006-12-12 04:16:14 +00:00
|
|
|
|
2007-07-09 22:18:38 +00:00
|
|
|
int SVOffset = ST->getSrcValueOffset();
|
|
|
|
unsigned Alignment = ST->getAlignment();
|
|
|
|
bool isVolatile = ST->isVolatile();
|
|
|
|
|
2009-01-30 23:36:47 +00:00
|
|
|
SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
|
|
|
|
Ptr, ST->getSrcValue(),
|
|
|
|
ST->getSrcValueOffset(),
|
|
|
|
isVolatile, ST->getAlignment());
|
|
|
|
Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
|
2006-12-12 04:16:14 +00:00
|
|
|
DAG.getConstant(4, Ptr.getValueType()));
|
2007-07-09 22:18:38 +00:00
|
|
|
SVOffset += 4;
|
2007-10-28 12:59:45 +00:00
|
|
|
Alignment = MinAlign(Alignment, 4U);
|
2009-01-30 23:36:47 +00:00
|
|
|
SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
|
|
|
|
Ptr, ST->getSrcValue(),
|
|
|
|
SVOffset, isVolatile, Alignment);
|
2009-08-11 20:47:22 +00:00
|
|
|
return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
|
2009-01-30 23:36:47 +00:00
|
|
|
St0, St1);
|
2006-12-12 04:16:14 +00:00
|
|
|
}
|
2009-01-30 23:36:47 +00:00
|
|
|
|
2006-12-12 04:16:14 +00:00
|
|
|
break;
|
2006-12-11 17:25:19 +00:00
|
|
|
}
|
2006-12-11 02:23:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-02-17 22:15:04 +00:00
|
|
|
if (CombinerAA) {
|
2006-09-25 16:29:54 +00:00
|
|
|
// Walk up chain skipping non-aliasing memory nodes.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue BetterChain = FindBetterChain(N, Chain);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
// If there is a better chain.
|
2006-09-25 16:29:54 +00:00
|
|
|
if (Chain != BetterChain) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue ReplStore;
|
2009-09-15 00:18:30 +00:00
|
|
|
|
|
|
|
// Replace the chain to avoid dependency.
|
2006-10-14 12:14:27 +00:00
|
|
|
if (ST->isTruncatingStore()) {
|
2009-01-30 23:36:47 +00:00
|
|
|
ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
|
2008-01-17 07:20:38 +00:00
|
|
|
ST->getSrcValue(),ST->getSrcValueOffset(),
|
2008-01-30 00:15:11 +00:00
|
|
|
ST->getMemoryVT(),
|
2008-01-17 07:20:38 +00:00
|
|
|
ST->isVolatile(), ST->getAlignment());
|
2006-10-14 12:14:27 +00:00
|
|
|
} else {
|
2009-01-30 23:36:47 +00:00
|
|
|
ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
|
2008-01-17 07:20:38 +00:00
|
|
|
ST->getSrcValue(), ST->getSrcValueOffset(),
|
|
|
|
ST->isVolatile(), ST->getAlignment());
|
2006-10-14 12:14:27 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-25 16:29:54 +00:00
|
|
|
// Create token to keep both nodes around.
|
2009-01-30 23:36:47 +00:00
|
|
|
SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
|
2009-08-11 20:47:22 +00:00
|
|
|
MVT::Other, Chain, ReplStore);
|
2009-01-30 23:36:47 +00:00
|
|
|
|
2009-09-15 00:18:30 +00:00
|
|
|
// Make sure the new and old chains are cleaned up.
|
|
|
|
AddToWorkList(Token.getNode());
|
|
|
|
|
2006-10-13 23:32:28 +00:00
|
|
|
// Don't add users to work list.
|
|
|
|
return CombineTo(N, Token, false);
|
2006-09-25 16:29:54 +00:00
|
|
|
}
|
2006-09-21 16:28:59 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-11-05 09:31:14 +00:00
|
|
|
// Try transforming N to an indexed store.
|
2006-11-07 09:03:05 +00:00
|
|
|
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0);
|
2006-11-05 09:31:14 +00:00
|
|
|
|
2007-12-29 06:26:16 +00:00
|
|
|
// FIXME: is there such a thing as a truncating indexed store?
|
2008-01-17 19:59:44 +00:00
|
|
|
if (ST->isTruncatingStore() && ST->isUnindexed() &&
|
2008-06-06 12:08:01 +00:00
|
|
|
Value.getValueType().isInteger()) {
|
2007-10-13 06:35:54 +00:00
|
|
|
// See if we can simplify the input to this truncstore with knowledge that
|
|
|
|
// only the low bits are being used. For example:
|
|
|
|
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
|
2009-02-17 22:15:04 +00:00
|
|
|
SDValue Shorter =
|
2008-02-25 21:11:39 +00:00
|
|
|
GetDemandedBits(Value,
|
2009-01-30 23:36:47 +00:00
|
|
|
APInt::getLowBitsSet(Value.getValueSizeInBits(),
|
|
|
|
ST->getMemoryVT().getSizeInBits()));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Value.getNode());
|
|
|
|
if (Shorter.getNode())
|
2009-01-30 23:36:47 +00:00
|
|
|
return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
|
|
|
|
Ptr, ST->getSrcValue(),
|
2008-01-30 00:15:11 +00:00
|
|
|
ST->getSrcValueOffset(), ST->getMemoryVT(),
|
2007-10-13 06:35:54 +00:00
|
|
|
ST->isVolatile(), ST->getAlignment());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Enhance the truncstore optimization code to handle shifted
values and propagate demanded bits through them in simple cases.
This allows this code:
void foo(char *P) {
strcpy(P, "abc");
}
to compile to:
_foo:
ldrb r3, [r1]
ldrb r2, [r1, #+1]
ldrb r12, [r1, #+2]!
ldrb r1, [r1, #+1]
strb r1, [r0, #+3]
strb r2, [r0, #+1]
strb r12, [r0, #+2]
strb r3, [r0]
bx lr
instead of:
_foo:
ldrb r3, [r1, #+3]
ldrb r2, [r1, #+2]
orr r3, r2, r3, lsl #8
ldrb r2, [r1, #+1]
ldrb r1, [r1]
orr r2, r1, r2, lsl #8
orr r3, r2, r3, lsl #16
strb r3, [r0]
mov r2, r3, lsr #24
strb r2, [r0, #+3]
mov r2, r3, lsr #16
strb r2, [r0, #+2]
mov r3, r3, lsr #8
strb r3, [r0, #+1]
bx lr
testcase here: test/CodeGen/ARM/truncstore-dag-combine.ll
This also helps occasionally for X86 and other cases not involving
unaligned load/stores.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42954 91177308-0d34-0410-b5e6-96231b3b80d8
2007-10-13 06:58:48 +00:00
|
|
|
// Otherwise, see if we can simplify the operation with
|
|
|
|
// SimplifyDemandedBits, which only works if the value has a single use.
|
2008-02-27 00:25:32 +00:00
|
|
|
if (SimplifyDemandedBits(Value,
|
|
|
|
APInt::getLowBitsSet(
|
2009-12-14 23:40:38 +00:00
|
|
|
Value.getValueType().getScalarType().getSizeInBits(),
|
2008-06-06 12:08:01 +00:00
|
|
|
ST->getMemoryVT().getSizeInBits())))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue(N, 0);
|
2007-10-13 06:35:54 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-12-29 06:26:16 +00:00
|
|
|
// If this is a load followed by a store to the same location, then the store
|
|
|
|
// is dead/noop.
|
|
|
|
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
|
2008-01-30 00:15:11 +00:00
|
|
|
if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
|
2008-01-17 19:59:44 +00:00
|
|
|
ST->isUnindexed() && !ST->isVolatile() &&
|
2008-01-08 23:08:06 +00:00
|
|
|
// There can't be any side effects between the load and store, such as
|
|
|
|
// a call or store.
|
2008-07-27 21:46:04 +00:00
|
|
|
Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
|
2007-12-29 06:26:16 +00:00
|
|
|
// The store is dead, remove it.
|
|
|
|
return Chain;
|
|
|
|
}
|
|
|
|
}
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
|
2008-01-17 19:59:44 +00:00
|
|
|
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
|
|
|
|
// truncating store. We can do this even if this is already a truncstore.
|
|
|
|
if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
|
2008-08-28 21:40:38 +00:00
|
|
|
&& Value.getNode()->hasOneUse() && ST->isUnindexed() &&
|
2008-01-17 19:59:44 +00:00
|
|
|
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
|
2008-01-30 00:15:11 +00:00
|
|
|
ST->getMemoryVT())) {
|
2009-01-30 23:36:47 +00:00
|
|
|
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
|
|
|
|
Ptr, ST->getSrcValue(),
|
2008-01-30 00:15:11 +00:00
|
|
|
ST->getSrcValueOffset(), ST->getMemoryVT(),
|
2008-01-17 19:59:44 +00:00
|
|
|
ST->isVolatile(), ST->getAlignment());
|
|
|
|
}
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
|
2009-05-28 00:35:15 +00:00
|
|
|
return ReduceLoadOpStoreWidth(N);
|
2005-10-10 22:31:19 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
|
|
|
|
SDValue InVec = N->getOperand(0);
|
|
|
|
SDValue InVal = N->getOperand(1);
|
|
|
|
SDValue EltNo = N->getOperand(2);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-03-19 01:27:56 +00:00
|
|
|
// If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
|
|
|
|
// vector with the inserted element.
|
|
|
|
if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
|
2008-09-12 16:56:44 +00:00
|
|
|
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
|
2008-08-30 19:29:20 +00:00
|
|
|
SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),
|
|
|
|
InVec.getNode()->op_end());
|
2006-03-19 01:27:56 +00:00
|
|
|
if (Elt < Ops.size())
|
|
|
|
Ops[Elt] = InVal;
|
2009-02-25 22:49:59 +00:00
|
|
|
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
|
|
|
|
InVec.getValueType(), &Ops[0], Ops.size());
|
2006-03-19 01:27:56 +00:00
|
|
|
}
|
2009-04-27 18:41:29 +00:00
|
|
|
// If the invec is an UNDEF and if EltNo is a constant, create a new
|
|
|
|
// BUILD_VECTOR with undef elements and the inserted element.
|
|
|
|
if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
|
|
|
|
isa<ConstantSDNode>(EltNo)) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = InVec.getValueType();
|
2009-09-23 21:02:20 +00:00
|
|
|
EVT EltVT = VT.getVectorElementType();
|
2009-04-27 18:41:29 +00:00
|
|
|
unsigned NElts = VT.getVectorNumElements();
|
2009-09-23 21:02:20 +00:00
|
|
|
SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-04-27 18:41:29 +00:00
|
|
|
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
|
|
|
|
if (Elt < Ops.size())
|
|
|
|
Ops[Elt] = InVal;
|
|
|
|
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
|
|
|
|
InVec.getValueType(), &Ops[0], Ops.size());
|
|
|
|
}
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-03-19 01:27:56 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
2009-01-17 00:07:25 +00:00
|
|
|
// (vextract (scalar_to_vector val, 0) -> val
|
|
|
|
SDValue InVec = N->getOperand(0);
|
|
|
|
|
2009-04-18 20:16:54 +00:00
|
|
|
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
|
|
|
// If the operand is wider than the vector element type then it is implicitly
|
|
|
|
// truncated. Make that explicit here.
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT EltVT = InVec.getValueType().getVectorElementType();
|
2009-04-18 20:16:54 +00:00
|
|
|
SDValue InOp = InVec.getOperand(0);
|
|
|
|
if (InOp.getValueType() != EltVT)
|
|
|
|
return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp);
|
|
|
|
return InOp;
|
|
|
|
}
|
2008-05-13 08:35:03 +00:00
|
|
|
|
|
|
|
// Perform only after legalization to ensure build_vector / vector_shuffle
|
|
|
|
// optimizations have already been done.
|
2008-11-24 14:53:14 +00:00
|
|
|
if (!LegalOperations) return SDValue();
|
2008-05-13 08:35:03 +00:00
|
|
|
|
2009-01-17 00:07:25 +00:00
|
|
|
// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
|
|
|
|
// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
|
|
|
|
// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
|
2009-01-18 06:43:40 +00:00
|
|
|
SDValue EltNo = N->getOperand(1);
|
2007-10-06 08:19:55 +00:00
|
|
|
|
|
|
|
if (isa<ConstantSDNode>(EltNo)) {
|
2008-09-12 16:56:44 +00:00
|
|
|
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
|
2007-10-06 08:19:55 +00:00
|
|
|
bool NewLoad = false;
|
2008-12-11 00:26:16 +00:00
|
|
|
bool BCNumEltsChanged = false;
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = InVec.getValueType();
|
|
|
|
EVT ExtVT = VT.getVectorElementType();
|
|
|
|
EVT LVT = ExtVT;
|
2009-01-30 23:36:47 +00:00
|
|
|
|
2008-05-13 08:35:03 +00:00
|
|
|
if (InVec.getOpcode() == ISD::BIT_CONVERT) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT BCVT = InVec.getOperand(0).getValueType();
|
|
|
|
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2008-12-11 00:26:16 +00:00
|
|
|
if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
|
|
|
|
BCNumEltsChanged = true;
|
2008-05-13 08:35:03 +00:00
|
|
|
InVec = InVec.getOperand(0);
|
2009-08-10 22:56:29 +00:00
|
|
|
ExtVT = BCVT.getVectorElementType();
|
2008-05-13 08:35:03 +00:00
|
|
|
NewLoad = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
LoadSDNode *LN0 = NULL;
|
2009-04-29 05:20:52 +00:00
|
|
|
const ShuffleVectorSDNode *SVN = NULL;
|
2009-01-30 23:36:47 +00:00
|
|
|
if (ISD::isNormalLoad(InVec.getNode())) {
|
2008-05-13 08:35:03 +00:00
|
|
|
LN0 = cast<LoadSDNode>(InVec);
|
2009-01-30 23:36:47 +00:00
|
|
|
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
2009-08-10 22:56:29 +00:00
|
|
|
InVec.getOperand(0).getValueType() == ExtVT &&
|
2009-01-30 23:36:47 +00:00
|
|
|
ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
|
2008-05-13 08:35:03 +00:00
|
|
|
LN0 = cast<LoadSDNode>(InVec.getOperand(0));
|
2009-04-29 05:20:52 +00:00
|
|
|
} else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
|
2008-05-13 08:35:03 +00:00
|
|
|
// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
|
|
|
|
// =>
|
|
|
|
// (load $addr+1*size)
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-12-11 00:26:16 +00:00
|
|
|
// If the bit convert changed the number of elements, it is unsafe
|
|
|
|
// to examine the mask.
|
|
|
|
if (BCNumEltsChanged)
|
|
|
|
return SDValue();
|
2009-04-29 05:20:52 +00:00
|
|
|
|
|
|
|
// Select the input vector, guarding against out of range extract vector.
|
|
|
|
unsigned NumElems = VT.getVectorNumElements();
|
|
|
|
int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
|
|
|
|
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
|
|
|
|
|
2008-05-13 08:35:03 +00:00
|
|
|
if (InVec.getOpcode() == ISD::BIT_CONVERT)
|
2007-10-06 08:19:55 +00:00
|
|
|
InVec = InVec.getOperand(0);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (ISD::isNormalLoad(InVec.getNode())) {
|
2008-05-13 08:35:03 +00:00
|
|
|
LN0 = cast<LoadSDNode>(InVec);
|
2009-04-29 05:20:52 +00:00
|
|
|
Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems;
|
2007-10-06 08:19:55 +00:00
|
|
|
}
|
2008-05-13 08:35:03 +00:00
|
|
|
}
|
2009-01-30 23:36:47 +00:00
|
|
|
|
2008-06-15 20:12:31 +00:00
|
|
|
if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-10-06 08:19:55 +00:00
|
|
|
|
2008-05-13 08:35:03 +00:00
|
|
|
unsigned Align = LN0->getAlignment();
|
|
|
|
if (NewLoad) {
|
|
|
|
// Check the resultant load doesn't need a higher alignment than the
|
|
|
|
// original load.
|
2009-01-30 23:36:47 +00:00
|
|
|
unsigned NewAlign =
|
2009-08-12 00:36:31 +00:00
|
|
|
TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
|
2009-01-30 23:36:47 +00:00
|
|
|
|
2009-01-28 17:46:25 +00:00
|
|
|
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2009-01-30 23:36:47 +00:00
|
|
|
|
2008-05-13 08:35:03 +00:00
|
|
|
Align = NewAlign;
|
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue NewPtr = LN0->getBasePtr();
|
2008-05-13 08:35:03 +00:00
|
|
|
if (Elt) {
|
2008-06-06 12:08:01 +00:00
|
|
|
unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT PtrType = NewPtr.getValueType();
|
2008-05-13 08:35:03 +00:00
|
|
|
if (TLI.isBigEndian())
|
2008-06-06 12:08:01 +00:00
|
|
|
PtrOff = VT.getSizeInBits() / 8 - PtrOff;
|
2009-01-30 23:36:47 +00:00
|
|
|
NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
|
2008-05-13 08:35:03 +00:00
|
|
|
DAG.getConstant(PtrOff, PtrType));
|
2007-10-06 08:19:55 +00:00
|
|
|
}
|
2009-01-30 23:36:47 +00:00
|
|
|
|
|
|
|
return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
|
2008-05-13 08:35:03 +00:00
|
|
|
LN0->getSrcValue(), LN0->getSrcValueOffset(),
|
|
|
|
LN0->isVolatile(), Align);
|
2007-10-06 08:19:55 +00:00
|
|
|
}
|
2009-01-30 23:36:47 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2007-10-06 08:19:55 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
|
2007-06-25 16:23:39 +00:00
|
|
|
unsigned NumInScalars = N->getNumOperands();
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
|
|
|
|
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
|
|
|
|
// at most two distinct vectors, turn this into a shuffle node.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue VecIn1, VecIn2;
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
for (unsigned i = 0; i != NumInScalars; ++i) {
|
|
|
|
// Ignore undef inputs.
|
|
|
|
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// If this input is something other than a EXTRACT_VECTOR_ELT with a
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
// constant index, bail out.
|
2007-06-25 16:23:39 +00:00
|
|
|
if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
!isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
|
2008-07-27 21:46:04 +00:00
|
|
|
VecIn1 = VecIn2 = SDValue(0, 0);
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
break;
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// If the input vector type disagrees with the result of the build_vector,
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
// we can't make a shuffle.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
|
2007-06-25 16:23:39 +00:00
|
|
|
if (ExtractedFromVec.getValueType() != VT) {
|
2008-07-27 21:46:04 +00:00
|
|
|
VecIn1 = VecIn2 = SDValue(0, 0);
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
break;
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
// Otherwise, remember this. We allow up to two distinct input vectors.
|
|
|
|
if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
|
|
|
|
continue;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
if (VecIn1.getNode() == 0) {
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
VecIn1 = ExtractedFromVec;
|
2008-08-28 21:40:38 +00:00
|
|
|
} else if (VecIn2.getNode() == 0) {
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
VecIn2 = ExtractedFromVec;
|
|
|
|
} else {
|
|
|
|
// Too many inputs.
|
2008-07-27 21:46:04 +00:00
|
|
|
VecIn1 = VecIn2 = SDValue(0, 0);
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
// If everything is good, we can make a shuffle operation.
|
2008-08-28 21:40:38 +00:00
|
|
|
if (VecIn1.getNode()) {
|
2009-04-27 18:41:29 +00:00
|
|
|
SmallVector<int, 8> Mask;
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
for (unsigned i = 0; i != NumInScalars; ++i) {
|
|
|
|
if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
|
2009-04-27 18:41:29 +00:00
|
|
|
Mask.push_back(-1);
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
continue;
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-04-24 12:40:33 +00:00
|
|
|
// If extracting from the first vector, just use the index directly.
|
2009-04-27 18:41:29 +00:00
|
|
|
SDValue Extract = N->getOperand(i);
|
2009-03-17 06:33:10 +00:00
|
|
|
SDValue ExtVal = Extract.getOperand(1);
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
if (Extract.getOperand(0) == VecIn1) {
|
2009-04-29 05:20:52 +00:00
|
|
|
unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
|
|
|
|
if (ExtIndex > VT.getVectorNumElements())
|
|
|
|
return SDValue();
|
|
|
|
|
|
|
|
Mask.push_back(ExtIndex);
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, use InIdx + VecSize
|
2009-03-17 06:33:10 +00:00
|
|
|
unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
|
2009-04-27 18:41:29 +00:00
|
|
|
Mask.push_back(Idx+NumInScalars);
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
// Add count and size info.
|
2009-04-27 18:41:29 +00:00
|
|
|
if (!TLI.isTypeLegal(VT) && LegalTypes)
|
2008-11-24 14:53:14 +00:00
|
|
|
return SDValue();
|
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// Return the new VECTOR_SHUFFLE node.
|
2009-04-27 18:41:29 +00:00
|
|
|
SDValue Ops[2];
|
2006-08-08 02:23:42 +00:00
|
|
|
Ops[0] = VecIn1;
|
2009-04-27 18:41:29 +00:00
|
|
|
Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
|
|
|
|
return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
Turn a series of extract_element's feeding a build_vector into a
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27233 91177308-0d34-0410-b5e6-96231b3b80d8
2006-03-28 20:28:38 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
|
2007-06-25 16:23:39 +00:00
|
|
|
// TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
|
|
|
|
// EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
|
|
|
|
// inputs come from at most two distinct vectors, turn this into a shuffle
|
|
|
|
// node.
|
2006-03-31 22:16:43 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// If we only have one input vector, we don't need to do any concatenation.
|
2009-01-30 23:36:47 +00:00
|
|
|
if (N->getNumOperands() == 1)
|
2007-06-25 16:23:39 +00:00
|
|
|
return N->getOperand(0);
|
2006-07-21 08:25:53 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-03-28 22:11:53 +00:00
|
|
|
}
|
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
2009-04-27 18:41:29 +00:00
|
|
|
return SDValue();
|
|
|
|
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-04-27 18:41:29 +00:00
|
|
|
unsigned NumElts = VT.getVectorNumElements();
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2008-11-10 04:46:22 +00:00
|
|
|
SDValue N0 = N->getOperand(0);
|
|
|
|
|
|
|
|
assert(N0.getValueType().getVectorNumElements() == NumElts &&
|
|
|
|
"Vector shuffle must be normalized in DAG");
|
|
|
|
|
2009-04-27 18:41:29 +00:00
|
|
|
// FIXME: implement canonicalizations from DAG.getVectorShuffle()
|
2006-07-20 22:44:41 +00:00
|
|
|
|
2006-07-21 08:25:53 +00:00
|
|
|
// If it is a splat, check if the argument vector is a build_vector with
|
|
|
|
// all scalar elements the same.
|
2009-04-27 18:41:29 +00:00
|
|
|
if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
|
2008-08-28 21:40:38 +00:00
|
|
|
SDNode *V = N0.getNode();
|
2009-04-27 18:41:29 +00:00
|
|
|
|
Be careful when looking through a vbit_convert. Optimizing this:
(vector_shuffle
(vbitconvert (vbuildvector (copyfromreg v4f32), 1, v4f32), 4, f32),
(undef, undef, undef, undef), (0, 0, 0, 0), 4, f32)
to the
vbitconvert
is a very bad idea.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30989 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-16 22:49:37 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// If this is a bit convert that changes the element type of the vector but
|
Be careful when looking through a vbit_convert. Optimizing this:
(vector_shuffle
(vbitconvert (vbuildvector (copyfromreg v4f32), 1, v4f32), 4, f32),
(undef, undef, undef, undef), (0, 0, 0, 0), 4, f32)
to the
vbitconvert
is a very bad idea.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30989 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-16 22:49:37 +00:00
|
|
|
// not the number of vector elements, look through it. Be careful not to
|
|
|
|
// look though conversions that change things like v4f32 to v2f64.
|
2007-06-25 16:23:39 +00:00
|
|
|
if (V->getOpcode() == ISD::BIT_CONVERT) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue ConvInput = V->getOperand(0);
|
2008-07-22 20:42:56 +00:00
|
|
|
if (ConvInput.getValueType().isVector() &&
|
|
|
|
ConvInput.getValueType().getVectorNumElements() == NumElts)
|
2008-08-28 21:40:38 +00:00
|
|
|
V = ConvInput.getNode();
|
Be careful when looking through a vbit_convert. Optimizing this:
(vector_shuffle
(vbitconvert (vbuildvector (copyfromreg v4f32), 1, v4f32), 4, f32),
(undef, undef, undef, undef), (0, 0, 0, 0), 4, f32)
to the
vbitconvert
is a very bad idea.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30989 91177308-0d34-0410-b5e6-96231b3b80d8
2006-10-16 22:49:37 +00:00
|
|
|
}
|
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
if (V->getOpcode() == ISD::BUILD_VECTOR) {
|
|
|
|
unsigned NumElems = V->getNumOperands();
|
2009-04-27 18:41:29 +00:00
|
|
|
unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
|
2006-07-21 08:25:53 +00:00
|
|
|
if (NumElems > BaseIdx) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Base;
|
2006-07-21 08:25:53 +00:00
|
|
|
bool AllSame = true;
|
|
|
|
for (unsigned i = 0; i != NumElems; ++i) {
|
|
|
|
if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
|
|
|
|
Base = V->getOperand(i);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Splat of <u, u, u, u>, return <u, u, u, u>
|
2008-08-28 21:40:38 +00:00
|
|
|
if (!Base.getNode())
|
2006-07-21 08:25:53 +00:00
|
|
|
return N0;
|
|
|
|
for (unsigned i = 0; i != NumElems; ++i) {
|
Fix a bogus splat xform:
shuffle <undef, undef, x, undef>, <undef, undef, undef, undef>, <2, 2, 2, 2>
!=
<undef, undef, x, undef>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42111 91177308-0d34-0410-b5e6-96231b3b80d8
2007-09-18 21:54:37 +00:00
|
|
|
if (V->getOperand(i) != Base) {
|
2006-07-21 08:25:53 +00:00
|
|
|
AllSame = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Splat of <x, x, x, x>, return <x, x, x, x>
|
|
|
|
if (AllSame)
|
|
|
|
return N0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-03-31 22:16:43 +00:00
|
|
|
}
|
|
|
|
|
2006-04-20 08:56:16 +00:00
|
|
|
/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
|
2007-06-25 16:23:39 +00:00
|
|
|
/// an AND to a vector_shuffle with the destination vector and a zero vector.
|
|
|
|
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
|
2006-04-20 08:56:16 +00:00
|
|
|
/// vector_shuffle V, Zero, <0, 4, 2, 4>
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2009-04-27 18:41:29 +00:00
|
|
|
DebugLoc dl = N->getDebugLoc();
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue LHS = N->getOperand(0);
|
|
|
|
SDValue RHS = N->getOperand(1);
|
2007-06-25 16:23:39 +00:00
|
|
|
if (N->getOpcode() == ISD::AND) {
|
|
|
|
if (RHS.getOpcode() == ISD::BIT_CONVERT)
|
2006-04-20 08:56:16 +00:00
|
|
|
RHS = RHS.getOperand(0);
|
2007-06-25 16:23:39 +00:00
|
|
|
if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
|
2009-04-27 18:41:29 +00:00
|
|
|
SmallVector<int, 8> Indices;
|
|
|
|
unsigned NumElts = RHS.getNumOperands();
|
2006-04-20 08:56:16 +00:00
|
|
|
for (unsigned i = 0; i != NumElts; ++i) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Elt = RHS.getOperand(i);
|
2006-04-20 08:56:16 +00:00
|
|
|
if (!isa<ConstantSDNode>(Elt))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-04-20 08:56:16 +00:00
|
|
|
else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
|
2009-04-27 18:41:29 +00:00
|
|
|
Indices.push_back(i);
|
2006-04-20 08:56:16 +00:00
|
|
|
else if (cast<ConstantSDNode>(Elt)->isNullValue())
|
2009-04-27 18:41:29 +00:00
|
|
|
Indices.push_back(NumElts);
|
2006-04-20 08:56:16 +00:00
|
|
|
else
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-04-20 08:56:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Let's see if the target supports this vector_shuffle.
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT RVT = RHS.getValueType();
|
2009-04-27 18:41:29 +00:00
|
|
|
if (!TLI.isVectorClearMaskLegal(Indices, RVT))
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-04-20 08:56:16 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// Return the new VECTOR_SHUFFLE node.
|
2009-09-23 21:02:20 +00:00
|
|
|
EVT EltVT = RVT.getVectorElementType();
|
2009-04-27 18:41:29 +00:00
|
|
|
SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
|
2009-09-23 21:02:20 +00:00
|
|
|
DAG.getConstant(0, EltVT));
|
2009-04-27 18:41:29 +00:00
|
|
|
SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
|
|
|
|
RVT, &ZeroOps[0], ZeroOps.size());
|
|
|
|
LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
|
|
|
|
SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
|
|
|
|
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
|
2006-04-20 08:56:16 +00:00
|
|
|
}
|
|
|
|
}
|
2009-01-30 23:59:18 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2006-04-20 08:56:16 +00:00
|
|
|
}
|
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
|
2007-06-25 16:23:39 +00:00
|
|
|
// After legalize, the target may be depending on adds and other
|
|
|
|
// binary ops to provide legal ways to construct constants or other
|
|
|
|
// things. Simplifying them may result in a loss of legality.
|
2008-11-24 14:53:14 +00:00
|
|
|
if (LegalOperations) return SDValue();
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N->getValueType(0);
|
2008-06-06 12:08:01 +00:00
|
|
|
assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
|
2007-06-25 16:23:39 +00:00
|
|
|
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT EltType = VT.getVectorElementType();
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue LHS = N->getOperand(0);
|
|
|
|
SDValue RHS = N->getOperand(1);
|
|
|
|
SDValue Shuffle = XformToShuffleWithZero(N);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Shuffle.getNode()) return Shuffle;
|
2006-04-20 08:56:16 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
// If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
|
Constant fold all of the vector binops. This allows us to compile this:
"vector unsigned char mergeLowHigh = (vector unsigned char)
( 8, 9, 10, 11, 16, 17, 18, 19, 12, 13, 14, 15, 20, 21, 22, 23 );
vector unsigned char mergeHighLow = vec_xor( mergeLowHigh, vec_splat_u8(8));"
aka:
void %test2(<16 x sbyte>* %P) {
store <16 x sbyte> cast (<4 x int> xor (<4 x int> cast (<16 x ubyte> < ubyte 8, ubyte 9, ubyte 10, ubyte 11, ubyte 16, ubyte 17, ubyte 18, ubyte 19, ubyte 12, ubyte 13, ubyte 14, ubyte 15, ubyte 20, ubyte 21, ubyte 22, ubyte 23 > to <4 x int>), <4 x int> cast (<16 x sbyte> < sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8 > to <4 x int>)) to <16 x sbyte>), <16 x sbyte> * %P
ret void
}
into this:
_test2:
mfspr r2, 256
oris r4, r2, 32768
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
lvx v0, r5, r4
stvx v0, 0, r3
mtspr 256, r2
blr
instead of this:
_test2:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
vspltisb v0, 8
lvx v1, r5, r4
vxor v0, v1, v0
stvx v0, 0, r3
mtspr 256, r2
blr
... which occurs here:
http://developer.apple.com/hardware/ve/calcspeed.html
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27343 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 03:25:57 +00:00
|
|
|
// this operation.
|
2009-02-17 22:15:04 +00:00
|
|
|
if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
|
2007-06-25 16:23:39 +00:00
|
|
|
RHS.getOpcode() == ISD::BUILD_VECTOR) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SmallVector<SDValue, 8> Ops;
|
2007-06-25 16:23:39 +00:00
|
|
|
for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue LHSOp = LHS.getOperand(i);
|
|
|
|
SDValue RHSOp = RHS.getOperand(i);
|
Constant fold all of the vector binops. This allows us to compile this:
"vector unsigned char mergeLowHigh = (vector unsigned char)
( 8, 9, 10, 11, 16, 17, 18, 19, 12, 13, 14, 15, 20, 21, 22, 23 );
vector unsigned char mergeHighLow = vec_xor( mergeLowHigh, vec_splat_u8(8));"
aka:
void %test2(<16 x sbyte>* %P) {
store <16 x sbyte> cast (<4 x int> xor (<4 x int> cast (<16 x ubyte> < ubyte 8, ubyte 9, ubyte 10, ubyte 11, ubyte 16, ubyte 17, ubyte 18, ubyte 19, ubyte 12, ubyte 13, ubyte 14, ubyte 15, ubyte 20, ubyte 21, ubyte 22, ubyte 23 > to <4 x int>), <4 x int> cast (<16 x sbyte> < sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8 > to <4 x int>)) to <16 x sbyte>), <16 x sbyte> * %P
ret void
}
into this:
_test2:
mfspr r2, 256
oris r4, r2, 32768
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
lvx v0, r5, r4
stvx v0, 0, r3
mtspr 256, r2
blr
instead of this:
_test2:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
vspltisb v0, 8
lvx v1, r5, r4
vxor v0, v1, v0
stvx v0, 0, r3
mtspr 256, r2
blr
... which occurs here:
http://developer.apple.com/hardware/ve/calcspeed.html
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27343 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 03:25:57 +00:00
|
|
|
// If these two elements can't be folded, bail out.
|
|
|
|
if ((LHSOp.getOpcode() != ISD::UNDEF &&
|
|
|
|
LHSOp.getOpcode() != ISD::Constant &&
|
|
|
|
LHSOp.getOpcode() != ISD::ConstantFP) ||
|
|
|
|
(RHSOp.getOpcode() != ISD::UNDEF &&
|
|
|
|
RHSOp.getOpcode() != ISD::Constant &&
|
|
|
|
RHSOp.getOpcode() != ISD::ConstantFP))
|
|
|
|
break;
|
2009-01-30 23:59:18 +00:00
|
|
|
|
2006-05-31 06:08:35 +00:00
|
|
|
// Can't fold divide by zero.
|
2007-06-25 16:23:39 +00:00
|
|
|
if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
|
|
|
|
N->getOpcode() == ISD::FDIV) {
|
2006-05-31 06:08:35 +00:00
|
|
|
if ((RHSOp.getOpcode() == ISD::Constant &&
|
2008-08-28 21:40:38 +00:00
|
|
|
cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
|
2006-05-31 06:08:35 +00:00
|
|
|
(RHSOp.getOpcode() == ISD::ConstantFP &&
|
2008-08-28 21:40:38 +00:00
|
|
|
cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
|
2006-05-31 06:08:35 +00:00
|
|
|
break;
|
|
|
|
}
|
2009-01-30 23:59:18 +00:00
|
|
|
|
2009-01-31 03:12:48 +00:00
|
|
|
Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(),
|
2009-01-30 23:59:18 +00:00
|
|
|
EltType, LHSOp, RHSOp));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Ops.back().getNode());
|
Constant fold all of the vector binops. This allows us to compile this:
"vector unsigned char mergeLowHigh = (vector unsigned char)
( 8, 9, 10, 11, 16, 17, 18, 19, 12, 13, 14, 15, 20, 21, 22, 23 );
vector unsigned char mergeHighLow = vec_xor( mergeLowHigh, vec_splat_u8(8));"
aka:
void %test2(<16 x sbyte>* %P) {
store <16 x sbyte> cast (<4 x int> xor (<4 x int> cast (<16 x ubyte> < ubyte 8, ubyte 9, ubyte 10, ubyte 11, ubyte 16, ubyte 17, ubyte 18, ubyte 19, ubyte 12, ubyte 13, ubyte 14, ubyte 15, ubyte 20, ubyte 21, ubyte 22, ubyte 23 > to <4 x int>), <4 x int> cast (<16 x sbyte> < sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8 > to <4 x int>)) to <16 x sbyte>), <16 x sbyte> * %P
ret void
}
into this:
_test2:
mfspr r2, 256
oris r4, r2, 32768
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
lvx v0, r5, r4
stvx v0, 0, r3
mtspr 256, r2
blr
instead of this:
_test2:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
vspltisb v0, 8
lvx v1, r5, r4
vxor v0, v1, v0
stvx v0, 0, r3
mtspr 256, r2
blr
... which occurs here:
http://developer.apple.com/hardware/ve/calcspeed.html
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27343 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 03:25:57 +00:00
|
|
|
assert((Ops.back().getOpcode() == ISD::UNDEF ||
|
|
|
|
Ops.back().getOpcode() == ISD::Constant ||
|
|
|
|
Ops.back().getOpcode() == ISD::ConstantFP) &&
|
|
|
|
"Scalar binop didn't fold!");
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-06-25 16:23:39 +00:00
|
|
|
if (Ops.size() == LHS.getNumOperands()) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = LHS.getValueType();
|
2009-02-25 22:49:59 +00:00
|
|
|
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
|
|
|
|
&Ops[0], Ops.size());
|
2006-04-03 17:21:50 +00:00
|
|
|
}
|
Constant fold all of the vector binops. This allows us to compile this:
"vector unsigned char mergeLowHigh = (vector unsigned char)
( 8, 9, 10, 11, 16, 17, 18, 19, 12, 13, 14, 15, 20, 21, 22, 23 );
vector unsigned char mergeHighLow = vec_xor( mergeLowHigh, vec_splat_u8(8));"
aka:
void %test2(<16 x sbyte>* %P) {
store <16 x sbyte> cast (<4 x int> xor (<4 x int> cast (<16 x ubyte> < ubyte 8, ubyte 9, ubyte 10, ubyte 11, ubyte 16, ubyte 17, ubyte 18, ubyte 19, ubyte 12, ubyte 13, ubyte 14, ubyte 15, ubyte 20, ubyte 21, ubyte 22, ubyte 23 > to <4 x int>), <4 x int> cast (<16 x sbyte> < sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8 > to <4 x int>)) to <16 x sbyte>), <16 x sbyte> * %P
ret void
}
into this:
_test2:
mfspr r2, 256
oris r4, r2, 32768
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
lvx v0, r5, r4
stvx v0, 0, r3
mtspr 256, r2
blr
instead of this:
_test2:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
vspltisb v0, 8
lvx v1, r5, r4
vxor v0, v1, v0
stvx v0, 0, r3
mtspr 256, r2
blr
... which occurs here:
http://developer.apple.com/hardware/ve/calcspeed.html
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27343 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 03:25:57 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
Constant fold all of the vector binops. This allows us to compile this:
"vector unsigned char mergeLowHigh = (vector unsigned char)
( 8, 9, 10, 11, 16, 17, 18, 19, 12, 13, 14, 15, 20, 21, 22, 23 );
vector unsigned char mergeHighLow = vec_xor( mergeLowHigh, vec_splat_u8(8));"
aka:
void %test2(<16 x sbyte>* %P) {
store <16 x sbyte> cast (<4 x int> xor (<4 x int> cast (<16 x ubyte> < ubyte 8, ubyte 9, ubyte 10, ubyte 11, ubyte 16, ubyte 17, ubyte 18, ubyte 19, ubyte 12, ubyte 13, ubyte 14, ubyte 15, ubyte 20, ubyte 21, ubyte 22, ubyte 23 > to <4 x int>), <4 x int> cast (<16 x sbyte> < sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8 > to <4 x int>)) to <16 x sbyte>), <16 x sbyte> * %P
ret void
}
into this:
_test2:
mfspr r2, 256
oris r4, r2, 32768
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
lvx v0, r5, r4
stvx v0, 0, r3
mtspr 256, r2
blr
instead of this:
_test2:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
vspltisb v0, 8
lvx v1, r5, r4
vxor v0, v1, v0
stvx v0, 0, r3
mtspr 256, r2
blr
... which occurs here:
http://developer.apple.com/hardware/ve/calcspeed.html
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27343 91177308-0d34-0410-b5e6-96231b3b80d8
2006-04-02 03:25:57 +00:00
|
|
|
}
|
|
|
|
|
2009-01-30 23:59:18 +00:00
|
|
|
SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
|
|
|
|
SDValue N1, SDValue N2){
|
2005-10-08 00:29:44 +00:00
|
|
|
assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 23:59:18 +00:00
|
|
|
SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
|
2005-10-08 00:29:44 +00:00
|
|
|
cast<CondCodeSDNode>(N0.getOperand(2))->get());
|
2009-01-30 23:59:18 +00:00
|
|
|
|
2005-10-08 00:29:44 +00:00
|
|
|
// If we got a simplified select_cc node back from SimplifySelectCC, then
|
|
|
|
// break it down into a new SETCC node, and a new SELECT node, and then return
|
|
|
|
// the SELECT node, since we were called with a SELECT node.
|
2008-08-28 21:40:38 +00:00
|
|
|
if (SCC.getNode()) {
|
2005-10-08 00:29:44 +00:00
|
|
|
// Check to see if we got a select_cc back (to turn into setcc/select).
|
|
|
|
// Otherwise, just return whatever node we got back, like fabs.
|
|
|
|
if (SCC.getOpcode() == ISD::SELECT_CC) {
|
2009-01-30 23:59:18 +00:00
|
|
|
SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),
|
|
|
|
N0.getValueType(),
|
2009-02-17 22:15:04 +00:00
|
|
|
SCC.getOperand(0), SCC.getOperand(1),
|
2009-01-30 23:59:18 +00:00
|
|
|
SCC.getOperand(4));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(SETCC.getNode());
|
2009-01-30 23:59:18 +00:00
|
|
|
return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),
|
|
|
|
SCC.getOperand(2), SCC.getOperand(3), SETCC);
|
2005-10-08 00:29:44 +00:00
|
|
|
}
|
2009-01-30 23:59:18 +00:00
|
|
|
|
2005-10-08 00:29:44 +00:00
|
|
|
return SCC;
|
|
|
|
}
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-19 22:34:01 +00:00
|
|
|
}
|
|
|
|
|
Fold (select C, load A, load B) -> load (select C, A, B). This happens quite
a lot throughout many programs. In particular, specfp triggers it a bunch for
constant FP nodes when you have code like cond ? 1.0 : -1.0.
If the PPC ISel exposed the loads implicit in pic references to external globals,
we would be able to eliminate a load in cases like this as well:
%X = external global int
%Y = external global int
int* %test4(bool %C) {
%G = select bool %C, int* %X, int* %Y
ret int* %G
}
Note that this breaks things that use SrcValue's (see the fixme), but since nothing
uses them yet, this is ok.
Also, simplify some code to use hasOneUse() on an SDOperand instead of hasNUsesOfValue directly.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23781 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-18 06:04:22 +00:00
|
|
|
/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
|
|
|
|
/// are the two values being selected between, see if we can simplify the
|
2006-05-27 00:43:02 +00:00
|
|
|
/// select. Callers of this should assume that TheSelect is deleted if this
|
|
|
|
/// returns true. As such, they should return the appropriate thing (e.g. the
|
|
|
|
/// node) back to the top-level of the DAG combiner loop to avoid it being
|
|
|
|
/// looked at.
|
2009-02-17 22:15:04 +00:00
|
|
|
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue RHS) {
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Fold (select C, load A, load B) -> load (select C, A, B). This happens quite
a lot throughout many programs. In particular, specfp triggers it a bunch for
constant FP nodes when you have code like cond ? 1.0 : -1.0.
If the PPC ISel exposed the loads implicit in pic references to external globals,
we would be able to eliminate a load in cases like this as well:
%X = external global int
%Y = external global int
int* %test4(bool %C) {
%G = select bool %C, int* %X, int* %Y
ret int* %G
}
Note that this breaks things that use SrcValue's (see the fixme), but since nothing
uses them yet, this is ok.
Also, simplify some code to use hasOneUse() on an SDOperand instead of hasNUsesOfValue directly.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23781 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-18 06:04:22 +00:00
|
|
|
// If this is a select from two identical things, try to pull the operation
|
|
|
|
// through the select.
|
|
|
|
if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
|
|
|
|
// If this is a load and the token chain is identical, replace the select
|
|
|
|
// of two loads with a load through a select of the address to load from.
|
|
|
|
// This triggers in things like "select bool X, 10.0, 123.0" after the FP
|
|
|
|
// constants have been dropped into the constant pool.
|
2006-10-09 20:57:25 +00:00
|
|
|
if (LHS.getOpcode() == ISD::LOAD &&
|
Disable some DAG combiner optimizations that may be
wrong for volatile loads and stores. In fact this
is almost all of them! There are three types of
problems: (1) it is wrong to change the width of
a volatile memory access. These may be used to
do memory mapped i/o, in which case a load can have
an effect even if the result is not used. Consider
loading an i32 but only using the lower 8 bits. It
is wrong to change this into a load of an i8, because
you are no longer tickling the other three bytes. It
is also unwise to make a load/store wider. For
example, changing an i16 load into an i32 load is
wrong no matter how aligned things are, since the
fact of loading an additional 2 bytes can have
i/o side-effects. (2) it is wrong to change the
number of volatile load/stores: they may be counted
by the hardware. (3) it is wrong to change a volatile
load/store that requires one memory access into one
that requires several. For example on x86-32, you
can store a double in one processor operation, but to
store an i64 requires two (two i32 stores). In a
multi-threaded program you may want to bitcast an i64
to a double and store as a double because that will
occur atomically, and be indivisible to other threads.
So it would be wrong to convert the store-of-double
into a store of an i64, because this will become two
i32 stores - no longer atomic. My policy here is
to say that the number of processor operations for
an illegal operation is undefined. So it is alright
to change a store of an i64 (requires at least two
stores; but could be validly lowered to memcpy for
example) into a store of double (one processor op).
In short, if the new store is legal and has the same
size then I say that the transform is ok. It would
also be possible to say that transforms are always
ok if before they were illegal, whether after they
are illegal or not, but that's more awkward to do
and I doubt it buys us anything much.
However this exposed an interesting thing - on x86-32
a store of i64 is considered legal! That is because
operations are marked legal by default, regardless of
whether the type is legal or not. In some ways this
is clever: before type legalization this means that
operations on illegal types are considered legal;
after type legalization there are no illegal types
so now operations are only legal if they really are.
But I consider this to be too cunning for mere mortals.
Better to do things explicitly by testing AfterLegalize.
So I have changed things so that operations with illegal
types are considered illegal - indeed they can never
map to a machine operation. However this means that
the DAG combiner is more conservative because before
it was "accidentally" performing transforms where the
type was illegal because the operation was nonetheless
marked legal. So in a few such places I added a check
on AfterLegalize, which I suppose was actually just
forgotten before. This causes the DAG combiner to do
slightly more than it used to, which resulted in the X86
backend blowing up because it got a slightly surprising
node it wasn't expecting, so I tweaked it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52254 91177308-0d34-0410-b5e6-96231b3b80d8
2008-06-13 19:07:40 +00:00
|
|
|
// Do not let this transformation reduce the number of volatile loads.
|
|
|
|
!cast<LoadSDNode>(LHS)->isVolatile() &&
|
|
|
|
!cast<LoadSDNode>(RHS)->isVolatile() &&
|
Fold (select C, load A, load B) -> load (select C, A, B). This happens quite
a lot throughout many programs. In particular, specfp triggers it a bunch for
constant FP nodes when you have code like cond ? 1.0 : -1.0.
If the PPC ISel exposed the loads implicit in pic references to external globals,
we would be able to eliminate a load in cases like this as well:
%X = external global int
%Y = external global int
int* %test4(bool %C) {
%G = select bool %C, int* %X, int* %Y
ret int* %G
}
Note that this breaks things that use SrcValue's (see the fixme), but since nothing
uses them yet, this is ok.
Also, simplify some code to use hasOneUse() on an SDOperand instead of hasNUsesOfValue directly.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23781 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-18 06:04:22 +00:00
|
|
|
// Token chains must be identical.
|
2006-10-09 20:57:25 +00:00
|
|
|
LHS.getOperand(0) == RHS.getOperand(0)) {
|
|
|
|
LoadSDNode *LLD = cast<LoadSDNode>(LHS);
|
|
|
|
LoadSDNode *RLD = cast<LoadSDNode>(RHS);
|
|
|
|
|
|
|
|
// If this is an EXTLOAD, the VT's must match.
|
2008-01-30 00:15:11 +00:00
|
|
|
if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
|
2009-10-31 14:14:04 +00:00
|
|
|
// FIXME: this discards src value information. This is
|
|
|
|
// over-conservative. It would be beneficial to be able to remember
|
|
|
|
// both potential memory locations.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Addr;
|
2007-01-16 05:59:59 +00:00
|
|
|
if (TheSelect->getOpcode() == ISD::SELECT) {
|
|
|
|
// Check that the condition doesn't reach either load. If so, folding
|
|
|
|
// this will induce a cycle into the DAG.
|
2009-10-28 15:28:02 +00:00
|
|
|
if ((!LLD->hasAnyUseOfValue(1) ||
|
|
|
|
!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
|
|
|
|
(!RLD->hasAnyUseOfValue(1) ||
|
|
|
|
!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
|
2009-01-30 23:59:18 +00:00
|
|
|
Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
|
|
|
|
LLD->getBasePtr().getValueType(),
|
2007-01-16 05:59:59 +00:00
|
|
|
TheSelect->getOperand(0), LLD->getBasePtr(),
|
|
|
|
RLD->getBasePtr());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Check that the condition doesn't reach either load. If so, folding
|
|
|
|
// this will induce a cycle into the DAG.
|
2009-10-28 15:28:02 +00:00
|
|
|
if ((!LLD->hasAnyUseOfValue(1) ||
|
|
|
|
(!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
|
|
|
|
!LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
|
|
|
|
(!RLD->hasAnyUseOfValue(1) ||
|
|
|
|
(!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
|
|
|
|
!RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
|
2009-01-30 23:59:18 +00:00
|
|
|
Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
|
|
|
|
LLD->getBasePtr().getValueType(),
|
|
|
|
TheSelect->getOperand(0),
|
2009-02-17 22:15:04 +00:00
|
|
|
TheSelect->getOperand(1),
|
2009-01-30 23:59:18 +00:00
|
|
|
LLD->getBasePtr(), RLD->getBasePtr(),
|
|
|
|
TheSelect->getOperand(4));
|
2007-01-16 05:59:59 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
if (Addr.getNode()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Load;
|
2009-01-30 23:59:18 +00:00
|
|
|
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
|
|
|
|
Load = DAG.getLoad(TheSelect->getValueType(0),
|
|
|
|
TheSelect->getDebugLoc(),
|
|
|
|
LLD->getChain(),
|
2009-10-31 14:14:04 +00:00
|
|
|
Addr, 0, 0,
|
2009-02-17 22:15:04 +00:00
|
|
|
LLD->isVolatile(),
|
2007-04-22 23:15:30 +00:00
|
|
|
LLD->getAlignment());
|
2009-01-30 23:59:18 +00:00
|
|
|
} else {
|
2007-01-16 05:59:59 +00:00
|
|
|
Load = DAG.getExtLoad(LLD->getExtensionType(),
|
2009-01-30 23:59:18 +00:00
|
|
|
TheSelect->getDebugLoc(),
|
2007-01-16 05:59:59 +00:00
|
|
|
TheSelect->getValueType(0),
|
2009-10-31 14:14:04 +00:00
|
|
|
LLD->getChain(), Addr, 0, 0,
|
2008-01-30 00:15:11 +00:00
|
|
|
LLD->getMemoryVT(),
|
2009-02-17 22:15:04 +00:00
|
|
|
LLD->isVolatile(),
|
2007-04-22 23:15:30 +00:00
|
|
|
LLD->getAlignment());
|
2007-01-16 05:59:59 +00:00
|
|
|
}
|
2009-01-30 23:59:18 +00:00
|
|
|
|
2007-01-16 05:59:59 +00:00
|
|
|
// Users of the select now use the result of the load.
|
|
|
|
CombineTo(TheSelect, Load);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-01-16 05:59:59 +00:00
|
|
|
// Users of the old loads now use the new load's chain. We know the
|
|
|
|
// old-load value is dead now.
|
2008-08-28 21:40:38 +00:00
|
|
|
CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
|
|
|
|
CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
|
2007-01-16 05:59:59 +00:00
|
|
|
return true;
|
2006-10-09 20:57:25 +00:00
|
|
|
}
|
|
|
|
}
|
Fold (select C, load A, load B) -> load (select C, A, B). This happens quite
a lot throughout many programs. In particular, specfp triggers it a bunch for
constant FP nodes when you have code like cond ? 1.0 : -1.0.
If the PPC ISel exposed the loads implicit in pic references to external globals,
we would be able to eliminate a load in cases like this as well:
%X = external global int
%Y = external global int
int* %test4(bool %C) {
%G = select bool %C, int* %X, int* %Y
ret int* %G
}
Note that this breaks things that use SrcValue's (see the fixme), but since nothing
uses them yet, this is ok.
Also, simplify some code to use hasOneUse() on an SDOperand instead of hasNUsesOfValue directly.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23781 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-18 06:04:22 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
Fold (select C, load A, load B) -> load (select C, A, B). This happens quite
a lot throughout many programs. In particular, specfp triggers it a bunch for
constant FP nodes when you have code like cond ? 1.0 : -1.0.
If the PPC ISel exposed the loads implicit in pic references to external globals,
we would be able to eliminate a load in cases like this as well:
%X = external global int
%Y = external global int
int* %test4(bool %C) {
%G = select bool %C, int* %X, int* %Y
ret int* %G
}
Note that this breaks things that use SrcValue's (see the fixme), but since nothing
uses them yet, this is ok.
Also, simplify some code to use hasOneUse() on an SDOperand instead of hasNUsesOfValue directly.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@23781 91177308-0d34-0410-b5e6-96231b3b80d8
2005-10-18 06:04:22 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-03-11 05:08:08 +00:00
|
|
|
/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
|
|
|
|
/// where 'cond' is the comparison specified by CC.
|
2009-02-17 22:15:04 +00:00
|
|
|
SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue N2, SDValue N3,
|
|
|
|
ISD::CondCode CC, bool NotExtCompare) {
|
2009-03-11 05:08:08 +00:00
|
|
|
// (x ? y : y) -> y.
|
|
|
|
if (N2 == N3) return N2;
|
|
|
|
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT VT = N2.getValueType();
|
2008-08-28 21:40:38 +00:00
|
|
|
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
|
|
|
|
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
|
|
|
|
ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
|
2005-10-08 00:29:44 +00:00
|
|
|
|
|
|
|
// Determine if the condition we're dealing with is constant
|
2009-01-01 15:52:00 +00:00
|
|
|
SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
|
2009-02-03 00:47:48 +00:00
|
|
|
N0, N1, CC, DL, false);
|
2008-08-28 21:40:38 +00:00
|
|
|
if (SCC.getNode()) AddToWorkList(SCC.getNode());
|
|
|
|
ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
|
2005-10-08 00:29:44 +00:00
|
|
|
|
|
|
|
// fold select_cc true, x, y -> x
|
2008-03-13 22:13:53 +00:00
|
|
|
if (SCCC && !SCCC->isNullValue())
|
2005-10-08 00:29:44 +00:00
|
|
|
return N2;
|
|
|
|
// fold select_cc false, x, y -> y
|
2008-03-13 22:13:53 +00:00
|
|
|
if (SCCC && SCCC->isNullValue())
|
2005-10-08 00:29:44 +00:00
|
|
|
return N3;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-10-08 00:29:44 +00:00
|
|
|
// Check to see if we can simplify the select into an fabs node
|
|
|
|
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
|
|
|
|
// Allow either -0.0 or 0.0
|
2007-08-25 22:10:57 +00:00
|
|
|
if (CFP->getValueAPF().isZero()) {
|
2005-10-08 00:29:44 +00:00
|
|
|
// select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
|
|
|
|
if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
|
|
|
|
N0 == N2 && N3.getOpcode() == ISD::FNEG &&
|
|
|
|
N2 == N3.getOperand(0))
|
2009-01-30 23:59:18 +00:00
|
|
|
return DAG.getNode(ISD::FABS, DL, VT, N0);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-10-08 00:29:44 +00:00
|
|
|
// select (setl[te] X, +/-0.0), fneg(X), X -> fabs
|
|
|
|
if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
|
|
|
|
N0 == N3 && N2.getOpcode() == ISD::FNEG &&
|
|
|
|
N2.getOperand(0) == N3)
|
2009-01-30 23:59:18 +00:00
|
|
|
return DAG.getNode(ISD::FABS, DL, VT, N3);
|
2005-10-08 00:29:44 +00:00
|
|
|
}
|
|
|
|
}
|
2009-03-11 05:08:08 +00:00
|
|
|
|
|
|
|
// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
|
|
|
|
// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
|
|
|
|
// in it. This is a win when the constant is not otherwise available because
|
|
|
|
// it replaces two constant pool loads with one. We only do this if the FP
|
|
|
|
// type is known to be legal, because if it isn't, then we are before legalize
|
|
|
|
// types an we want the other legalization to happen first (e.g. to avoid
|
2009-03-14 00:25:19 +00:00
|
|
|
// messing with soft float) and if the ConstantFP is not legal, because if
|
|
|
|
// it is legal, we may not need to store the FP constant in a constant pool.
|
2009-03-11 05:08:08 +00:00
|
|
|
if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
|
|
|
|
if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
|
|
|
|
if (TLI.isTypeLegal(N2.getValueType()) &&
|
2009-03-14 00:25:19 +00:00
|
|
|
(TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
|
|
|
|
TargetLowering::Legal) &&
|
2009-03-11 05:08:08 +00:00
|
|
|
// If both constants have multiple uses, then we won't need to do an
|
|
|
|
// extra load, they are likely around in registers for other users.
|
|
|
|
(TV->hasOneUse() || FV->hasOneUse())) {
|
|
|
|
Constant *Elts[] = {
|
|
|
|
const_cast<ConstantFP*>(FV->getConstantFPValue()),
|
|
|
|
const_cast<ConstantFP*>(TV->getConstantFPValue())
|
|
|
|
};
|
|
|
|
const Type *FPTy = Elts[0]->getType();
|
|
|
|
const TargetData &TD = *TLI.getTargetData();
|
|
|
|
|
|
|
|
// Create a ConstantArray of the two constants.
|
2009-07-29 22:17:13 +00:00
|
|
|
Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
|
2009-03-11 05:08:08 +00:00
|
|
|
SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
|
|
|
|
TD.getPrefTypeAlignment(FPTy));
|
2009-03-13 07:51:59 +00:00
|
|
|
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
|
2009-03-11 05:08:08 +00:00
|
|
|
|
|
|
|
// Get the offsets to the 0 and 1 element of the array so that we can
|
|
|
|
// select between them.
|
|
|
|
SDValue Zero = DAG.getIntPtrConstant(0);
|
2009-05-09 07:06:46 +00:00
|
|
|
unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
|
2009-03-11 05:08:08 +00:00
|
|
|
SDValue One = DAG.getIntPtrConstant(EltSize);
|
|
|
|
|
|
|
|
SDValue Cond = DAG.getSetCC(DL,
|
|
|
|
TLI.getSetCCResultType(N0.getValueType()),
|
|
|
|
N0, N1, CC);
|
|
|
|
SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
|
|
|
|
Cond, One, Zero);
|
|
|
|
CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
|
|
|
|
CstOffset);
|
|
|
|
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
|
|
|
|
PseudoSourceValue::getConstantPool(), 0, false,
|
|
|
|
Alignment);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-10-08 00:29:44 +00:00
|
|
|
// Check to see if we can perform the "gzip trick", transforming
|
2009-01-30 23:59:18 +00:00
|
|
|
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
|
Compile:
int test3(int a, int b) { return (a < 0) ? a : 0; }
to:
_test3:
srawi r2, r3, 31
and r3, r2, r3
blr
instead of:
_test3:
cmpwi cr0, r3, 1
li r2, 0
blt cr0, LBB2_2 ;entry
LBB2_1: ;entry
mr r3, r2
LBB2_2: ;entry
blr
This implements: PowerPC/select_lt0.ll:seli32_a_a
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30517 91177308-0d34-0410-b5e6-96231b3b80d8
2006-09-20 06:41:35 +00:00
|
|
|
if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
|
2008-06-06 12:08:01 +00:00
|
|
|
N0.getValueType().isInteger() &&
|
|
|
|
N2.getValueType().isInteger() &&
|
2008-03-13 22:13:53 +00:00
|
|
|
(N1C->isNullValue() || // (a < 0) ? b : 0
|
|
|
|
(N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT XType = N0.getValueType();
|
|
|
|
EVT AType = N2.getValueType();
|
2008-06-08 20:54:56 +00:00
|
|
|
if (XType.bitsGE(AType)) {
|
2005-10-08 00:29:44 +00:00
|
|
|
// and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
|
2005-10-10 21:26:48 +00:00
|
|
|
// single-bit constant.
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
|
|
|
|
unsigned ShCtV = N2C->getAPIntValue().logBase2();
|
2008-06-06 12:08:01 +00:00
|
|
|
ShCtV = XType.getSizeInBits()-ShCtV-1;
|
2009-01-31 15:50:11 +00:00
|
|
|
SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy());
|
2009-01-31 03:12:48 +00:00
|
|
|
SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
|
2009-01-30 23:59:18 +00:00
|
|
|
XType, N0, ShCt);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Shift.getNode());
|
2009-01-30 23:59:18 +00:00
|
|
|
|
2008-06-08 20:54:56 +00:00
|
|
|
if (XType.bitsGT(AType)) {
|
2009-01-31 03:12:48 +00:00
|
|
|
Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Shift.getNode());
|
2005-10-08 00:29:44 +00:00
|
|
|
}
|
2009-01-30 23:59:18 +00:00
|
|
|
|
|
|
|
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
|
2005-10-08 00:29:44 +00:00
|
|
|
}
|
2009-01-30 23:59:18 +00:00
|
|
|
|
2009-01-31 03:12:48 +00:00
|
|
|
SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
|
2009-01-30 23:59:18 +00:00
|
|
|
XType, N0,
|
|
|
|
DAG.getConstant(XType.getSizeInBits()-1,
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Shift.getNode());
|
2009-01-30 23:59:18 +00:00
|
|
|
|
2008-06-08 20:54:56 +00:00
|
|
|
if (XType.bitsGT(AType)) {
|
2009-01-31 03:12:48 +00:00
|
|
|
Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Shift.getNode());
|
2005-10-08 00:29:44 +00:00
|
|
|
}
|
2009-01-30 23:59:18 +00:00
|
|
|
|
|
|
|
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
|
2005-10-08 00:29:44 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-10-10 21:26:48 +00:00
|
|
|
// fold select C, 16, 0 -> shl C, 4
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
|
2008-11-23 15:47:28 +00:00
|
|
|
TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2007-04-11 06:50:51 +00:00
|
|
|
// If the caller doesn't want us to simplify this into a zext of a compare,
|
|
|
|
// don't do it.
|
2008-03-13 22:13:53 +00:00
|
|
|
if (NotExtCompare && N2C->getAPIntValue() == 1)
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-10-10 21:26:48 +00:00
|
|
|
// Get a SetCC of the condition
|
|
|
|
// FIXME: Should probably make sure that setcc is legal if we ever have a
|
|
|
|
// target where it isn't.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Temp, SCC;
|
2005-10-10 21:26:48 +00:00
|
|
|
// cast from setcc result type to select result type
|
2008-11-24 14:53:14 +00:00
|
|
|
if (LegalTypes) {
|
2009-01-31 03:12:48 +00:00
|
|
|
SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
|
2009-01-01 15:52:00 +00:00
|
|
|
N0, N1, CC);
|
2008-06-08 20:54:56 +00:00
|
|
|
if (N2.getValueType().bitsLT(SCC.getValueType()))
|
2009-01-31 03:12:48 +00:00
|
|
|
Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
|
2006-12-07 22:36:47 +00:00
|
|
|
else
|
2009-01-31 03:12:48 +00:00
|
|
|
Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
|
2009-01-30 23:59:18 +00:00
|
|
|
N2.getValueType(), SCC);
|
2006-02-18 02:40:58 +00:00
|
|
|
} else {
|
2009-08-11 20:47:22 +00:00
|
|
|
SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
|
2009-01-31 03:12:48 +00:00
|
|
|
Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
|
2009-01-30 23:59:18 +00:00
|
|
|
N2.getValueType(), SCC);
|
2006-02-18 02:40:58 +00:00
|
|
|
}
|
2009-01-30 23:59:18 +00:00
|
|
|
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(SCC.getNode());
|
|
|
|
AddToWorkList(Temp.getNode());
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-03-13 22:13:53 +00:00
|
|
|
if (N2C->getAPIntValue() == 1)
|
2007-04-11 06:43:25 +00:00
|
|
|
return Temp;
|
2009-01-30 23:59:18 +00:00
|
|
|
|
2005-10-10 21:26:48 +00:00
|
|
|
// shl setcc result by log2 n2c
|
2009-01-30 23:59:18 +00:00
|
|
|
return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
|
2008-03-13 22:13:53 +00:00
|
|
|
DAG.getConstant(N2C->getAPIntValue().logBase2(),
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2005-10-10 21:26:48 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-10-08 00:29:44 +00:00
|
|
|
// Check to see if this is the equivalent of setcc
|
|
|
|
// FIXME: Turn all of these into setcc if setcc if setcc is legal
|
|
|
|
// otherwise, go ahead with the folds.
|
2008-03-13 22:13:53 +00:00
|
|
|
if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT XType = N0.getValueType();
|
2008-11-24 14:53:14 +00:00
|
|
|
if (!LegalOperations ||
|
2009-01-01 15:52:00 +00:00
|
|
|
TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
|
2009-01-30 23:59:18 +00:00
|
|
|
SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
|
2005-10-08 00:29:44 +00:00
|
|
|
if (Res.getValueType() != VT)
|
2009-01-30 23:59:18 +00:00
|
|
|
Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
|
2005-10-08 00:29:44 +00:00
|
|
|
return Res;
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-01-30 23:59:18 +00:00
|
|
|
// fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
|
2008-11-24 14:53:14 +00:00
|
|
|
(!LegalOperations ||
|
2008-06-14 17:48:34 +00:00
|
|
|
TLI.isOperationLegal(ISD::CTLZ, XType))) {
|
2009-01-31 03:12:48 +00:00
|
|
|
SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
|
2009-02-17 22:15:04 +00:00
|
|
|
return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
|
2008-06-06 12:08:01 +00:00
|
|
|
DAG.getConstant(Log2_32(XType.getSizeInBits()),
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2005-10-08 00:29:44 +00:00
|
|
|
}
|
2009-01-30 23:59:18 +00:00
|
|
|
// fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
|
2009-02-17 22:15:04 +00:00
|
|
|
if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
|
2009-01-30 23:59:18 +00:00
|
|
|
SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),
|
|
|
|
XType, DAG.getConstant(0, XType), N0);
|
2009-01-30 23:03:19 +00:00
|
|
|
SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);
|
2009-01-30 23:59:18 +00:00
|
|
|
return DAG.getNode(ISD::SRL, DL, XType,
|
2009-02-01 11:19:36 +00:00
|
|
|
DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
|
2008-06-06 12:08:01 +00:00
|
|
|
DAG.getConstant(XType.getSizeInBits()-1,
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2005-10-08 00:29:44 +00:00
|
|
|
}
|
2009-01-30 23:59:18 +00:00
|
|
|
// fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
|
2005-10-08 00:29:44 +00:00
|
|
|
if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
|
2009-01-31 03:12:48 +00:00
|
|
|
SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
|
2009-01-30 23:59:18 +00:00
|
|
|
DAG.getConstant(XType.getSizeInBits()-1,
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2009-01-30 23:59:18 +00:00
|
|
|
return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
|
2005-10-08 00:29:44 +00:00
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2005-10-08 00:29:44 +00:00
|
|
|
// Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->
|
|
|
|
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
|
|
|
|
if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
|
Codegen integer abs more efficiently using the trick from the PPC CWG. This
improves codegen on many architectures. Tests committed as CodeGen/*/iabs.ll
X86 Old: X86 New:
_test: _test:
movl 4(%esp), %ecx movl 4(%esp), %eax
movl %ecx, %eax movl %eax, %ecx
negl %eax sarl $31, %ecx
testl %ecx, %ecx addl %ecx, %eax
cmovns %ecx, %eax xorl %ecx, %eax
ret ret
PPC Old: PPC New:
_test: _test:
cmpwi cr0, r3, -1 srawi r2, r3, 31
neg r2, r3 add r3, r3, r2
bgt cr0, LBB1_2 ; xor r3, r3, r2
LBB1_1: ; blr
mr r3, r2
LBB1_2: ;
blr
ARM Old: ARM New:
_test: _test:
rsb r3, r0, #0 add r3, r0, r0, asr #31
cmp r0, #0 eor r0, r3, r0, asr #31
movge r3, r0 bx lr
mov r0, r3
bx lr
Thumb Old: Thumb New:
_test: _test:
neg r2, r0 asr r2, r0, #31
cmp r0, #0 add r0, r0, r2
bge LBB1_2 eor r0, r2
LBB1_1: @ bx lr
cpy r0, r2
LBB1_2: @
bx lr
Sparc Old: Sparc New:
test: test:
save -96, %o6, %o6 save -96, %o6, %o6
sethi 0, %l0 sra %i0, 31, %l0
sub %l0, %i0, %l0 add %i0, %l0, %l1
subcc %i0, -1, %l1 xor %l1, %l0, %i0
bg .BB1_2 restore %g0, %g0, %g0
nop retl
.BB1_1: nop
or %g0, %l0, %i0
.BB1_2:
restore %g0, %g0, %g0
retl
nop
It also helps alpha/ia64 :)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35881 91177308-0d34-0410-b5e6-96231b3b80d8
2007-04-11 05:11:38 +00:00
|
|
|
N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
|
2008-06-06 12:08:01 +00:00
|
|
|
N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT XType = N0.getValueType();
|
2009-01-31 03:12:48 +00:00
|
|
|
SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
|
2009-01-30 23:59:18 +00:00
|
|
|
DAG.getConstant(XType.getSizeInBits()-1,
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2009-01-31 03:12:48 +00:00
|
|
|
SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType,
|
2009-01-30 23:59:18 +00:00
|
|
|
N0, Shift);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Shift.getNode());
|
|
|
|
AddToWorkList(Add.getNode());
|
2009-01-30 23:59:18 +00:00
|
|
|
return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
|
Codegen integer abs more efficiently using the trick from the PPC CWG. This
improves codegen on many architectures. Tests committed as CodeGen/*/iabs.ll
X86 Old: X86 New:
_test: _test:
movl 4(%esp), %ecx movl 4(%esp), %eax
movl %ecx, %eax movl %eax, %ecx
negl %eax sarl $31, %ecx
testl %ecx, %ecx addl %ecx, %eax
cmovns %ecx, %eax xorl %ecx, %eax
ret ret
PPC Old: PPC New:
_test: _test:
cmpwi cr0, r3, -1 srawi r2, r3, 31
neg r2, r3 add r3, r3, r2
bgt cr0, LBB1_2 ; xor r3, r3, r2
LBB1_1: ; blr
mr r3, r2
LBB1_2: ;
blr
ARM Old: ARM New:
_test: _test:
rsb r3, r0, #0 add r3, r0, r0, asr #31
cmp r0, #0 eor r0, r3, r0, asr #31
movge r3, r0 bx lr
mov r0, r3
bx lr
Thumb Old: Thumb New:
_test: _test:
neg r2, r0 asr r2, r0, #31
cmp r0, #0 add r0, r0, r2
bge LBB1_2 eor r0, r2
LBB1_1: @ bx lr
cpy r0, r2
LBB1_2: @
bx lr
Sparc Old: Sparc New:
test: test:
save -96, %o6, %o6 save -96, %o6, %o6
sethi 0, %l0 sra %i0, 31, %l0
sub %l0, %i0, %l0 add %i0, %l0, %l1
subcc %i0, -1, %l1 xor %l1, %l0, %i0
bg .BB1_2 restore %g0, %g0, %g0
nop retl
.BB1_1: nop
or %g0, %l0, %i0
.BB1_2:
restore %g0, %g0, %g0
retl
nop
It also helps alpha/ia64 :)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35881 91177308-0d34-0410-b5e6-96231b3b80d8
2007-04-11 05:11:38 +00:00
|
|
|
}
|
|
|
|
// Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->
|
|
|
|
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
|
|
|
|
if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
|
|
|
|
N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
|
|
|
|
if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
|
2009-08-10 22:56:29 +00:00
|
|
|
EVT XType = N0.getValueType();
|
2008-06-06 12:08:01 +00:00
|
|
|
if (SubC->isNullValue() && XType.isInteger()) {
|
2009-01-31 03:12:48 +00:00
|
|
|
SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
|
2009-01-30 23:59:18 +00:00
|
|
|
N0,
|
|
|
|
DAG.getConstant(XType.getSizeInBits()-1,
|
2009-01-31 15:50:11 +00:00
|
|
|
getShiftAmountTy()));
|
2009-01-31 03:12:48 +00:00
|
|
|
SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
|
2009-01-30 23:59:18 +00:00
|
|
|
XType, N0, Shift);
|
2008-08-28 21:40:38 +00:00
|
|
|
AddToWorkList(Shift.getNode());
|
|
|
|
AddToWorkList(Add.getNode());
|
2009-01-30 23:59:18 +00:00
|
|
|
return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
|
2005-10-08 00:29:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2008-07-27 21:46:04 +00:00
|
|
|
return SDValue();
|
2005-09-19 22:34:01 +00:00
|
|
|
}
|
|
|
|
|
2007-02-08 22:13:59 +00:00
|
|
|
/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
|
2009-08-10 22:56:29 +00:00
|
|
|
SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue N1, ISD::CondCode Cond,
|
2009-02-03 00:47:48 +00:00
|
|
|
DebugLoc DL, bool foldBooleans) {
|
2009-02-17 22:15:04 +00:00
|
|
|
TargetLowering::DAGCombinerInfo
|
2009-07-24 18:22:59 +00:00
|
|
|
DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
|
2009-02-03 00:47:48 +00:00
|
|
|
return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
|
2005-09-16 00:54:12 +00:00
|
|
|
}
|
|
|
|
|
2005-10-20 02:15:44 +00:00
|
|
|
/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
|
|
|
|
/// return a DAG expression to select that will generate the same value by
|
|
|
|
/// multiplying by a magic number. See:
|
|
|
|
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
|
2006-06-12 16:07:18 +00:00
|
|
|
std::vector<SDNode*> Built;
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue S = TLI.BuildSDIV(N, DAG, &Built);
|
2006-05-16 17:42:15 +00:00
|
|
|
|
2006-06-12 16:07:18 +00:00
|
|
|
for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
|
2006-05-16 17:42:15 +00:00
|
|
|
ii != ee; ++ii)
|
|
|
|
AddToWorkList(*ii);
|
|
|
|
return S;
|
2005-10-20 02:15:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
|
|
|
|
/// return a DAG expression to select that will generate the same value by
|
|
|
|
/// multiplying by a magic number. See:
|
|
|
|
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
|
2006-06-12 16:07:18 +00:00
|
|
|
std::vector<SDNode*> Built;
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue S = TLI.BuildUDIV(N, DAG, &Built);
|
2006-05-16 17:42:15 +00:00
|
|
|
|
2006-06-12 16:07:18 +00:00
|
|
|
for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
|
2006-05-16 17:42:15 +00:00
|
|
|
ii != ee; ++ii)
|
|
|
|
AddToWorkList(*ii);
|
|
|
|
return S;
|
2005-10-20 02:15:44 +00:00
|
|
|
}
|
|
|
|
|
2009-09-25 06:05:26 +00:00
|
|
|
/// FindBaseOffset - Return true if base is a frame index, which is known not
|
|
|
|
// to alias with anything but itself. Provides base object and offset as results.
|
|
|
|
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
|
|
|
|
GlobalValue *&GV, void *&CV) {
|
2006-10-07 23:37:56 +00:00
|
|
|
// Assume it is a primitive operation.
|
2009-09-25 06:05:26 +00:00
|
|
|
Base = Ptr; Offset = 0; GV = 0; CV = 0;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-07 23:37:56 +00:00
|
|
|
// If it's an adding a simple constant then integrate the offset.
|
|
|
|
if (Base.getOpcode() == ISD::ADD) {
|
|
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
|
|
|
|
Base = Base.getOperand(0);
|
2008-09-12 16:56:44 +00:00
|
|
|
Offset += C->getZExtValue();
|
2006-10-07 23:37:56 +00:00
|
|
|
}
|
|
|
|
}
|
2009-09-25 06:05:26 +00:00
|
|
|
|
|
|
|
// Return the underlying GlobalValue, and update the Offset. Return false
|
|
|
|
// for GlobalAddressSDNode since the same GlobalAddress may be represented
|
|
|
|
// by multiple nodes with different offsets.
|
|
|
|
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
|
|
|
|
GV = G->getGlobal();
|
|
|
|
Offset += G->getOffset();
|
|
|
|
return false;
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-09-25 06:05:26 +00:00
|
|
|
// Return the underlying Constant value, and update the Offset. Return false
|
|
|
|
// for ConstantSDNodes since the same constant pool entry may be represented
|
|
|
|
// by multiple nodes with different offsets.
|
|
|
|
if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
|
|
|
|
CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
|
|
|
|
: (void *)C->getConstVal();
|
|
|
|
Offset += C->getOffset();
|
|
|
|
return false;
|
|
|
|
}
|
2006-10-07 23:37:56 +00:00
|
|
|
// If it's any of the following then it can't alias with anything but itself.
|
2009-09-25 06:05:26 +00:00
|
|
|
return isa<FrameIndexSDNode>(Base);
|
2006-10-07 23:37:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// isAlias - Return true if there is any possibility that the two addresses
|
|
|
|
/// overlap.
|
2008-07-27 21:46:04 +00:00
|
|
|
bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
|
2006-10-18 12:29:57 +00:00
|
|
|
const Value *SrcValue1, int SrcValueOffset1,
|
2009-09-15 00:18:30 +00:00
|
|
|
unsigned SrcValueAlign1,
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Ptr2, int64_t Size2,
|
2009-09-15 00:18:30 +00:00
|
|
|
const Value *SrcValue2, int SrcValueOffset2,
|
|
|
|
unsigned SrcValueAlign2) const {
|
2006-10-07 23:37:56 +00:00
|
|
|
// If they are the same then they must be aliases.
|
|
|
|
if (Ptr1 == Ptr2) return true;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-07 23:37:56 +00:00
|
|
|
// Gather base node and offset information.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Base1, Base2;
|
2006-10-07 23:37:56 +00:00
|
|
|
int64_t Offset1, Offset2;
|
2009-09-25 06:05:26 +00:00
|
|
|
GlobalValue *GV1, *GV2;
|
|
|
|
void *CV1, *CV2;
|
|
|
|
bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
|
|
|
|
bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-09-25 06:05:26 +00:00
|
|
|
// If they have a same base address then check to see if they overlap.
|
|
|
|
if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
|
2009-01-30 23:59:18 +00:00
|
|
|
return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-09-25 06:05:26 +00:00
|
|
|
// If we know what the bases are, and they aren't identical, then we know they
|
|
|
|
// cannot alias.
|
|
|
|
if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
|
|
|
|
return false;
|
2006-10-18 12:29:57 +00:00
|
|
|
|
2009-09-15 00:18:30 +00:00
|
|
|
// If we know required SrcValue1 and SrcValue2 have relatively large alignment
|
|
|
|
// compared to the size and offset of the access, we may be able to prove they
|
|
|
|
// do not alias. This check is conservative for now to catch cases created by
|
|
|
|
// splitting vector types.
|
|
|
|
if ((SrcValueAlign1 == SrcValueAlign2) &&
|
|
|
|
(SrcValueOffset1 != SrcValueOffset2) &&
|
|
|
|
(Size1 == Size2) && (SrcValueAlign1 > Size1)) {
|
|
|
|
int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
|
|
|
|
int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
|
|
|
|
|
|
|
|
// There is no overlap between these relatively aligned accesses of similar
|
|
|
|
// size, return no alias.
|
|
|
|
if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2006-10-18 19:08:31 +00:00
|
|
|
if (CombinerGlobalAA) {
|
|
|
|
// Use alias analysis information.
|
2007-08-27 16:32:11 +00:00
|
|
|
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
|
|
|
|
int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
|
|
|
|
int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
|
2009-02-17 22:15:04 +00:00
|
|
|
AliasAnalysis::AliasResult AAResult =
|
2006-10-18 12:29:57 +00:00
|
|
|
AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
|
2006-10-18 19:08:31 +00:00
|
|
|
if (AAResult == AliasAnalysis::NoAlias)
|
|
|
|
return false;
|
|
|
|
}
|
2006-10-18 12:29:57 +00:00
|
|
|
|
|
|
|
// Otherwise we have to assume they alias.
|
|
|
|
return true;
|
2006-10-07 23:37:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// FindAliasInfo - Extracts the relevant alias information from the memory
|
|
|
|
/// node. Returns true if the operand was a load.
|
2006-10-11 13:47:09 +00:00
|
|
|
bool DAGCombiner::FindAliasInfo(SDNode *N,
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue &Ptr, int64_t &Size,
|
2009-09-15 00:18:30 +00:00
|
|
|
const Value *&SrcValue,
|
|
|
|
int &SrcValueOffset,
|
|
|
|
unsigned &SrcValueAlign) const {
|
2006-10-11 13:47:09 +00:00
|
|
|
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
|
|
|
|
Ptr = LD->getBasePtr();
|
2008-06-06 12:08:01 +00:00
|
|
|
Size = LD->getMemoryVT().getSizeInBits() >> 3;
|
2006-10-11 13:47:09 +00:00
|
|
|
SrcValue = LD->getSrcValue();
|
2006-10-18 12:29:57 +00:00
|
|
|
SrcValueOffset = LD->getSrcValueOffset();
|
2009-09-15 00:18:30 +00:00
|
|
|
SrcValueAlign = LD->getOriginalAlignment();
|
2006-10-07 23:37:56 +00:00
|
|
|
return true;
|
2006-10-11 13:47:09 +00:00
|
|
|
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
|
|
|
|
Ptr = ST->getBasePtr();
|
2008-06-06 12:08:01 +00:00
|
|
|
Size = ST->getMemoryVT().getSizeInBits() >> 3;
|
2006-10-11 13:47:09 +00:00
|
|
|
SrcValue = ST->getSrcValue();
|
2006-10-18 12:29:57 +00:00
|
|
|
SrcValueOffset = ST->getSrcValueOffset();
|
2009-09-15 00:18:30 +00:00
|
|
|
SrcValueAlign = ST->getOriginalAlignment();
|
2006-10-11 13:47:09 +00:00
|
|
|
} else {
|
2009-07-14 16:55:14 +00:00
|
|
|
llvm_unreachable("FindAliasInfo expected a memory operand");
|
2006-10-07 23:37:56 +00:00
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-07 23:37:56 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
|
|
|
|
/// looking for aliasing nodes and adding them to the Aliases vector.
|
2008-07-27 21:46:04 +00:00
|
|
|
void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
|
|
|
|
SmallVector<SDValue, 8> &Aliases) {
|
|
|
|
SmallVector<SDValue, 8> Chains; // List of chains to visit.
|
2009-09-15 00:18:30 +00:00
|
|
|
SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-09-25 16:29:54 +00:00
|
|
|
// Get alias information for node.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Ptr;
|
2009-09-15 00:18:30 +00:00
|
|
|
int64_t Size;
|
|
|
|
const Value *SrcValue;
|
|
|
|
int SrcValueOffset;
|
|
|
|
unsigned SrcValueAlign;
|
|
|
|
bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
|
|
|
|
SrcValueAlign);
|
2006-09-25 16:29:54 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
// Starting off.
|
2006-10-05 15:07:25 +00:00
|
|
|
Chains.push_back(OriginalChain);
|
2009-10-12 05:53:58 +00:00
|
|
|
unsigned Depth = 0;
|
|
|
|
|
2006-10-05 15:07:25 +00:00
|
|
|
// Look at each chain and determine if it is an alias. If so, add it to the
|
|
|
|
// aliases list. If not, then continue up the chain looking for the next
|
2009-02-17 22:15:04 +00:00
|
|
|
// candidate.
|
2006-10-05 15:07:25 +00:00
|
|
|
while (!Chains.empty()) {
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue Chain = Chains.back();
|
2006-10-05 15:07:25 +00:00
|
|
|
Chains.pop_back();
|
2009-10-12 05:53:58 +00:00
|
|
|
|
|
|
|
// For TokenFactor nodes, look at each operand and only continue up the
|
|
|
|
// chain until we find two aliases. If we've seen two aliases, assume we'll
|
|
|
|
// find more and revert to original chain since the xform is unlikely to be
|
|
|
|
// profitable.
|
|
|
|
//
|
|
|
|
// FIXME: The depth check could be made to return the last non-aliasing
|
|
|
|
// chain we found before we hit a tokenfactor rather than the original
|
|
|
|
// chain.
|
|
|
|
if (Depth > 6 || Aliases.size() == 2) {
|
|
|
|
Aliases.clear();
|
|
|
|
Aliases.push_back(OriginalChain);
|
|
|
|
break;
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2009-09-15 00:18:30 +00:00
|
|
|
// Don't bother if we've been before.
|
|
|
|
if (!Visited.insert(Chain.getNode()))
|
|
|
|
continue;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-05 15:07:25 +00:00
|
|
|
switch (Chain.getOpcode()) {
|
|
|
|
case ISD::EntryToken:
|
|
|
|
// Entry token is ideal chain operand, but handled in FindBetterChain.
|
|
|
|
break;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-05 15:07:25 +00:00
|
|
|
case ISD::LOAD:
|
|
|
|
case ISD::STORE: {
|
|
|
|
// Get alias information for Chain.
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue OpPtr;
|
2009-09-15 00:18:30 +00:00
|
|
|
int64_t OpSize;
|
|
|
|
const Value *OpSrcValue;
|
|
|
|
int OpSrcValueOffset;
|
|
|
|
unsigned OpSrcValueAlign;
|
2008-08-28 21:40:38 +00:00
|
|
|
bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
|
2009-09-15 00:18:30 +00:00
|
|
|
OpSrcValue, OpSrcValueOffset,
|
|
|
|
OpSrcValueAlign);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-05 15:07:25 +00:00
|
|
|
// If chain is alias then stop here.
|
|
|
|
if (!(IsLoad && IsOpLoad) &&
|
2009-09-15 00:18:30 +00:00
|
|
|
isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
|
|
|
|
OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
|
|
|
|
OpSrcValueAlign)) {
|
2006-10-05 15:07:25 +00:00
|
|
|
Aliases.push_back(Chain);
|
|
|
|
} else {
|
|
|
|
// Look further up the chain.
|
2009-02-17 22:15:04 +00:00
|
|
|
Chains.push_back(Chain.getOperand(0));
|
2009-10-12 05:53:58 +00:00
|
|
|
++Depth;
|
2006-09-25 16:29:54 +00:00
|
|
|
}
|
2006-10-05 15:07:25 +00:00
|
|
|
break;
|
|
|
|
}
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-05 15:07:25 +00:00
|
|
|
case ISD::TokenFactor:
|
2009-09-15 00:18:30 +00:00
|
|
|
// We have to check each of the operands of the token factor for "small"
|
|
|
|
// token factors, so we queue them up. Adding the operands to the queue
|
|
|
|
// (stack) in reverse order maintains the original order and increases the
|
|
|
|
// likelihood that getNode will find a matching token factor (CSE.)
|
|
|
|
if (Chain.getNumOperands() > 16) {
|
|
|
|
Aliases.push_back(Chain);
|
|
|
|
break;
|
|
|
|
}
|
2006-10-05 15:07:25 +00:00
|
|
|
for (unsigned n = Chain.getNumOperands(); n;)
|
|
|
|
Chains.push_back(Chain.getOperand(--n));
|
2009-10-12 05:53:58 +00:00
|
|
|
++Depth;
|
2006-10-05 15:07:25 +00:00
|
|
|
break;
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-05 15:07:25 +00:00
|
|
|
default:
|
|
|
|
// For all other instructions we will just have to take what we can get.
|
|
|
|
Aliases.push_back(Chain);
|
|
|
|
break;
|
2006-09-25 16:29:54 +00:00
|
|
|
}
|
|
|
|
}
|
2006-10-04 16:53:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
|
|
|
|
/// for a better chain (aliasing node.)
|
2008-07-27 21:46:04 +00:00
|
|
|
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
|
|
|
|
SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
// Accumulate all the aliases to this node.
|
|
|
|
GatherAllAliases(N, OldChain, Aliases);
|
2009-02-17 22:15:04 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
if (Aliases.size() == 0) {
|
|
|
|
// If no operands then chain to entry token.
|
|
|
|
return DAG.getEntryNode();
|
|
|
|
} else if (Aliases.size() == 1) {
|
|
|
|
// If a single operand then chain to it. We don't need to revisit it.
|
|
|
|
return Aliases[0];
|
|
|
|
}
|
2009-10-12 05:53:58 +00:00
|
|
|
|
2006-10-04 16:53:27 +00:00
|
|
|
// Construct a custom tailored token factor.
|
2009-09-15 00:18:30 +00:00
|
|
|
return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
|
|
|
|
&Aliases[0], Aliases.size());
|
2006-09-25 16:29:54 +00:00
|
|
|
}
|
|
|
|
|
2005-09-01 00:19:25 +00:00
|
|
|
// SelectionDAG::Combine - This is the entry point for the file.
|
|
|
|
//
|
2009-04-29 00:15:41 +00:00
|
|
|
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
|
2009-04-29 23:29:43 +00:00
|
|
|
CodeGenOpt::Level OptLevel) {
|
2005-09-01 00:19:25 +00:00
|
|
|
/// run - This is the main entry point to this class.
|
|
|
|
///
|
2009-04-29 00:15:41 +00:00
|
|
|
DAGCombiner(*this, AA, OptLevel).Run(Level);
|
2005-09-01 00:19:25 +00:00
|
|
|
}
|