mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-05 02:07:56 +00:00
[DAGCombine] Improve alias analysis for chain of independent stores.
FindBetterNeighborChains simulateanously improves the chain dependencies of a chain of related stores avoiding the generation of extra token factors. For chains longer than the GatherAllAliasDepths, stores further down in the chain will necessarily fail, a potentially significant waste and preventing otherwise trivial parallelization. This patch directly parallelize the chains of stores before improving each store. This generally improves DAG-level parallelism. Reviewers: courbet, spatel, RKSimon, bogner, efriedma, craig.topper, rnk Subscribers: sdardis, javed.absar, hiraditya, jrtc27, atanasyan, llvm-commits Differential Revision: https://reviews.llvm.org/D53552 llvm-svn: 346432
This commit is contained in:
parent
526688e4bb
commit
8e3e378c03
@ -20,6 +20,7 @@
|
||||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/IntervalMap.h"
|
||||
#include "llvm/ADT/None.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
@ -490,6 +491,10 @@ namespace {
|
||||
/// returns false.
|
||||
bool findBetterNeighborChains(StoreSDNode *St);
|
||||
|
||||
// Helper for findBetterNeighborChains. Walk up store chain add additional
|
||||
// chained stores that do not overlap and can be parallelized.
|
||||
bool parallelizeChainedStores(StoreSDNode *St);
|
||||
|
||||
/// Holds a pointer to an LSBaseSDNode as well as information on where it
|
||||
/// is located in a sequence of memory operations connected by a chain.
|
||||
struct MemOpLink {
|
||||
@ -18905,6 +18910,11 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
|
||||
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
|
||||
}
|
||||
|
||||
// TODO: Replace with with std::monostate when we move to C++17.
|
||||
struct UnitT { } Unit;
|
||||
bool operator==(const UnitT &, const UnitT &) { return true; }
|
||||
bool operator!=(const UnitT &, const UnitT &) { return false; }
|
||||
|
||||
// This function tries to collect a bunch of potentially interesting
|
||||
// nodes to improve the chains of, all at once. This might seem
|
||||
// redundant, as this function gets called when visiting every store
|
||||
@ -18917,13 +18927,22 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
|
||||
// the nodes that will eventually be candidates, and then not be able
|
||||
// to go from a partially-merged state to the desired final
|
||||
// fully-merged state.
|
||||
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
|
||||
if (OptLevel == CodeGenOpt::None)
|
||||
return false;
|
||||
|
||||
bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
|
||||
SmallVector<StoreSDNode *, 8> ChainedStores;
|
||||
StoreSDNode *STChain = St;
|
||||
// Intervals records which offsets from BaseIndex have been covered. In
|
||||
// the common case, every store writes to the immediately previous address
|
||||
// space and thus merged with the previous interval at insertion time.
|
||||
|
||||
using IMap =
|
||||
llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
|
||||
IMap::Allocator A;
|
||||
IMap Intervals(A);
|
||||
|
||||
// This holds the base pointer, index, and the offset in bytes from the base
|
||||
// pointer.
|
||||
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
|
||||
const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
|
||||
|
||||
// We must have a base and an offset.
|
||||
if (!BasePtr.getBase().getNode())
|
||||
@ -18933,76 +18952,114 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
|
||||
if (BasePtr.getBase().isUndef())
|
||||
return false;
|
||||
|
||||
SmallVector<StoreSDNode *, 8> ChainedStores;
|
||||
ChainedStores.push_back(St);
|
||||
// Add ST's interval.
|
||||
Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
|
||||
|
||||
// Walk up the chain and look for nodes with offsets from the same
|
||||
// base pointer. Stop when reaching an instruction with a different kind
|
||||
// or instruction which has a different base pointer.
|
||||
StoreSDNode *Index = St;
|
||||
while (Index) {
|
||||
while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
|
||||
// If the chain has more than one use, then we can't reorder the mem ops.
|
||||
if (Index != St && !SDValue(Index, 0)->hasOneUse())
|
||||
if (!SDValue(Chain, 0)->hasOneUse())
|
||||
break;
|
||||
|
||||
if (Index->isVolatile() || Index->isIndexed())
|
||||
if (Chain->isVolatile() || Chain->isIndexed())
|
||||
break;
|
||||
|
||||
// Find the base pointer and offset for this memory node.
|
||||
BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
|
||||
|
||||
const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
|
||||
// Check that the base pointer is the same as the original one.
|
||||
if (!BasePtr.equalBaseIndex(Ptr, DAG))
|
||||
int64_t Offset;
|
||||
if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
|
||||
break;
|
||||
int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
|
||||
// Make sure we don't overlap with other intervals by checking the ones to
|
||||
// the left or right before inserting.
|
||||
auto I = Intervals.find(Offset);
|
||||
// If there's a next interval, we should end before it.
|
||||
if (I != Intervals.end() && I.start() < (Offset + Length))
|
||||
break;
|
||||
// If there's a previous interval, we should start after it.
|
||||
if (I != Intervals.begin() && (--I).stop() <= Offset)
|
||||
break;
|
||||
Intervals.insert(Offset, Offset + Length, Unit);
|
||||
|
||||
// Walk up the chain to find the next store node, ignoring any
|
||||
// intermediate loads. Any other kind of node will halt the loop.
|
||||
SDNode *NextInChain = Index->getChain().getNode();
|
||||
while (true) {
|
||||
if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
|
||||
// We found a store node. Use it for the next iteration.
|
||||
if (STn->isVolatile() || STn->isIndexed()) {
|
||||
Index = nullptr;
|
||||
break;
|
||||
}
|
||||
ChainedStores.push_back(STn);
|
||||
Index = STn;
|
||||
break;
|
||||
} else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
|
||||
NextInChain = Ldn->getChain().getNode();
|
||||
continue;
|
||||
} else {
|
||||
Index = nullptr;
|
||||
break;
|
||||
}
|
||||
}// end while
|
||||
ChainedStores.push_back(Chain);
|
||||
STChain = Chain;
|
||||
}
|
||||
|
||||
// At this point, ChainedStores lists all of the Store nodes
|
||||
// reachable by iterating up through chain nodes matching the above
|
||||
// conditions. For each such store identified, try to find an
|
||||
// earlier chain to attach the store to which won't violate the
|
||||
// required ordering.
|
||||
bool MadeChangeToSt = false;
|
||||
SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
|
||||
// If we didn't find a chained store, exit.
|
||||
if (ChainedStores.size() == 0)
|
||||
return false;
|
||||
|
||||
for (StoreSDNode *ChainedStore : ChainedStores) {
|
||||
SDValue Chain = ChainedStore->getChain();
|
||||
SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
|
||||
|
||||
if (Chain != BetterChain) {
|
||||
if (ChainedStore == St)
|
||||
MadeChangeToSt = true;
|
||||
BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
|
||||
}
|
||||
// Improve all chained stores (St and ChainedStores members) starting from
|
||||
// where the store chain ended and return single TokenFactor.
|
||||
SDValue NewChain = STChain->getChain();
|
||||
SmallVector<SDValue, 8> TFOps;
|
||||
for (unsigned I = ChainedStores.size(); I;) {
|
||||
StoreSDNode *S = ChainedStores[--I];
|
||||
SDValue BetterChain = FindBetterChain(S, NewChain);
|
||||
S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
|
||||
S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
|
||||
TFOps.push_back(SDValue(S, 0));
|
||||
ChainedStores[I] = S;
|
||||
}
|
||||
|
||||
// Do all replacements after finding the replacements to make to avoid making
|
||||
// the chains more complicated by introducing new TokenFactors.
|
||||
for (auto Replacement : BetterChains)
|
||||
replaceStoreChain(Replacement.first, Replacement.second);
|
||||
// Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
|
||||
SDValue BetterChain = FindBetterChain(St, NewChain);
|
||||
SDValue NewST;
|
||||
if (St->isTruncatingStore())
|
||||
NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
|
||||
St->getBasePtr(), St->getMemoryVT(),
|
||||
St->getMemOperand());
|
||||
else
|
||||
NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
|
||||
St->getBasePtr(), St->getMemOperand());
|
||||
|
||||
return MadeChangeToSt;
|
||||
TFOps.push_back(NewST);
|
||||
|
||||
// If we improved every element of TFOps, then we've lost the dependence on
|
||||
// NewChain to successors of St and we need to add it back to TFOps. Do so at
|
||||
// the beginning to keep relative order consistent with FindBetterChains.
|
||||
auto hasImprovedChain = [&](SDValue ST) -> bool {
|
||||
return ST->getOperand(0) != NewChain;
|
||||
};
|
||||
bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
|
||||
if (AddNewChain)
|
||||
TFOps.insert(TFOps.begin(), NewChain);
|
||||
|
||||
SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
|
||||
CombineTo(St, TF);
|
||||
|
||||
AddToWorklist(STChain);
|
||||
// Add TF operands worklist in reverse order.
|
||||
for (auto I = TF->getNumOperands(); I;)
|
||||
AddToWorklist(TF->getOperand(--I).getNode());
|
||||
AddToWorklist(TF.getNode());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
|
||||
if (OptLevel == CodeGenOpt::None)
|
||||
return false;
|
||||
|
||||
const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
|
||||
|
||||
// We must have a base and an offset.
|
||||
if (!BasePtr.getBase().getNode())
|
||||
return false;
|
||||
|
||||
// Do not handle stores to undef base pointers.
|
||||
if (BasePtr.getBase().isUndef())
|
||||
return false;
|
||||
|
||||
// Directly improve a chain of disjoint stores starting at St.
|
||||
if (parallelizeChainedStores(St))
|
||||
return true;
|
||||
|
||||
// Improve St's Chain..
|
||||
SDValue BetterChain = FindBetterChain(St, St->getChain());
|
||||
if (St->getChain() != BetterChain) {
|
||||
replaceStoreChain(St, BetterChain);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// This is the entry point for the file.
|
||||
|
@ -7,14 +7,13 @@ define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
|
||||
; CHECK-LABEL: fn9:
|
||||
; 9th fixed argument
|
||||
; CHECK: ldr {{w[0-9]+}}, [sp, #64]
|
||||
; CHECK: add [[ARGS:x[0-9]+]], sp, #72
|
||||
; CHECK: add {{x[0-9]+}}, [[ARGS]], #8
|
||||
; CHECK-DAG: add [[ARGS:x[0-9]+]], sp, #72
|
||||
; First vararg
|
||||
; CHECK: ldr {{w[0-9]+}}, [sp, #72]
|
||||
; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #72]
|
||||
; Second vararg
|
||||
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
|
||||
; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #80]
|
||||
; Third vararg
|
||||
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
|
||||
; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #88]
|
||||
%1 = alloca i32, align 4
|
||||
%2 = alloca i32, align 4
|
||||
%3 = alloca i32, align 4
|
||||
|
@ -1465,10 +1465,10 @@ entry:
|
||||
define void @merge_zr32_3vec(<3 x i32>* %p) {
|
||||
; CHECK-LABEL: merge_zr32_3vec:
|
||||
; CHECK: // %entry
|
||||
; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
|
||||
; NOSTRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
|
||||
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
|
||||
; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
|
||||
; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
|
||||
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #4]
|
||||
; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
store <3 x i32> zeroinitializer, <3 x i32>* %p
|
||||
@ -1480,8 +1480,8 @@ define void @merge_zr32_4vec(<4 x i32>* %p) {
|
||||
; CHECK-LABEL: merge_zr32_4vec:
|
||||
; CHECK: // %entry
|
||||
; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
|
||||
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
|
||||
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
|
||||
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
store <4 x i32> zeroinitializer, <4 x i32>* %p
|
||||
@ -1505,8 +1505,8 @@ define void @merge_zr32_4vecf(<4 x float>* %p) {
|
||||
; CHECK-LABEL: merge_zr32_4vecf:
|
||||
; CHECK: // %entry
|
||||
; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
|
||||
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
|
||||
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
|
||||
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
store <4 x float> zeroinitializer, <4 x float>* %p
|
||||
@ -1589,8 +1589,8 @@ entry:
|
||||
define void @merge_zr64_3vec(<3 x i64>* %p) {
|
||||
; CHECK-LABEL: merge_zr64_3vec:
|
||||
; CHECK: // %entry
|
||||
; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: str xzr, [x{{[0-9]+}}, #16]
|
||||
; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #8]
|
||||
; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
store <3 x i64> zeroinitializer, <3 x i64>* %p
|
||||
|
@ -314,13 +314,12 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
|
||||
; CHECK-APPLE-DAG: strb [[ID]], [x0, #8]
|
||||
|
||||
; First vararg
|
||||
; CHECK-APPLE-DAG: orr {{x[0-9]+}}, [[ARGS]], #0x8
|
||||
; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16]
|
||||
; Second vararg
|
||||
; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
|
||||
; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24]
|
||||
; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #16
|
||||
; Third vararg
|
||||
; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
|
||||
; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32]
|
||||
|
||||
; CHECK-APPLE: mov x21, x0
|
||||
; CHECK-APPLE-NOT: x21
|
||||
|
@ -8,101 +8,95 @@ target triple = "thumbv7em-arm-none-eabi"
|
||||
define arm_aapcs_vfpcc void @test(i8* %v50) #0 {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: movw r1, #35722
|
||||
; CHECK-NEXT: movt r1, #36236
|
||||
; CHECK-NEXT: str.w r1, [r0, #394]
|
||||
; CHECK-NEXT: movw r1, #36750
|
||||
; CHECK-NEXT: movt r1, #37264
|
||||
; CHECK-NEXT: str.w r1, [r0, #398]
|
||||
; CHECK-NEXT: movw r1, #37778
|
||||
; CHECK-NEXT: movt r1, #38292
|
||||
; CHECK-NEXT: str.w r1, [r0, #402]
|
||||
; CHECK-NEXT: movw r1, #38806
|
||||
; CHECK-NEXT: movt r1, #39320
|
||||
; CHECK-NEXT: str.w r1, [r0, #406]
|
||||
; CHECK-NEXT: movw r1, #39834
|
||||
; CHECK-NEXT: strh.w r1, [r0, #410]
|
||||
; CHECK-NEXT: movw r1, #40348
|
||||
; CHECK-NEXT: movt r1, #40862
|
||||
; CHECK-NEXT: str.w r1, [r0, #412]
|
||||
; CHECK-NEXT: movw r1, #41376
|
||||
; CHECK-NEXT: movt r1, #41890
|
||||
; CHECK-NEXT: str.w r1, [r0, #416]
|
||||
; CHECK-NEXT: movw r1, #42404
|
||||
; CHECK-NEXT: movt r1, #42918
|
||||
; CHECK-NEXT: str.w r1, [r0, #420]
|
||||
; CHECK-NEXT: movw r1, #43432
|
||||
; CHECK-NEXT: movt r1, #43946
|
||||
; CHECK-NEXT: str.w r1, [r0, #424]
|
||||
; CHECK-NEXT: movw r1, #44460
|
||||
; CHECK-NEXT: movt r1, #44974
|
||||
; CHECK-NEXT: str.w r1, [r0, #428]
|
||||
; CHECK-NEXT: movw r1, #45488
|
||||
; CHECK-NEXT: strh.w r1, [r0, #432]
|
||||
; CHECK-NEXT: movw r1, #65534
|
||||
; CHECK-NEXT: strh.w r1, [r0, #510]
|
||||
; CHECK-NEXT: movw r1, #64506
|
||||
; CHECK-NEXT: movt r1, #65020
|
||||
; CHECK-NEXT: str.w r1, [r0, #506]
|
||||
; CHECK-NEXT: movw r1, #63478
|
||||
; CHECK-NEXT: movt r1, #63992
|
||||
; CHECK-NEXT: str.w r1, [r0, #502]
|
||||
; CHECK-NEXT: movw r1, #62450
|
||||
; CHECK-NEXT: movt r1, #62964
|
||||
; CHECK-NEXT: str.w r1, [r0, #498]
|
||||
; CHECK-NEXT: movw r1, #61422
|
||||
; CHECK-NEXT: movt r1, #61936
|
||||
; CHECK-NEXT: str.w r1, [r0, #494]
|
||||
; CHECK-NEXT: movw r1, #60394
|
||||
; CHECK-NEXT: movt r1, #60908
|
||||
; CHECK-NEXT: str.w r1, [r0, #490]
|
||||
; CHECK-NEXT: movw r1, #59366
|
||||
; CHECK-NEXT: movt r1, #59880
|
||||
; CHECK-NEXT: str.w r1, [r0, #486]
|
||||
; CHECK-NEXT: movw r1, #58338
|
||||
; CHECK-NEXT: movt r1, #58852
|
||||
; CHECK-NEXT: str.w r1, [r0, #482]
|
||||
; CHECK-NEXT: movw r1, #57310
|
||||
; CHECK-NEXT: movt r1, #57824
|
||||
; CHECK-NEXT: str.w r1, [r0, #478]
|
||||
; CHECK-NEXT: movw r1, #56282
|
||||
; CHECK-NEXT: movt r1, #56796
|
||||
; CHECK-NEXT: str.w r1, [r0, #474]
|
||||
; CHECK-NEXT: movw r1, #55254
|
||||
; CHECK-NEXT: movt r1, #55768
|
||||
; CHECK-NEXT: str.w r1, [r0, #470]
|
||||
; CHECK-NEXT: movw r1, #54226
|
||||
; CHECK-NEXT: movt r1, #54740
|
||||
; CHECK-NEXT: str.w r1, [r0, #466]
|
||||
; CHECK-NEXT: movw r1, #53198
|
||||
; CHECK-NEXT: movt r1, #53712
|
||||
; CHECK-NEXT: str.w r1, [r0, #462]
|
||||
; CHECK-NEXT: movw r1, #52170
|
||||
; CHECK-NEXT: movt r1, #52684
|
||||
; CHECK-NEXT: str.w r1, [r0, #458]
|
||||
; CHECK-NEXT: movw r1, #51142
|
||||
; CHECK-NEXT: movt r1, #51656
|
||||
; CHECK-NEXT: str.w r1, [r0, #454]
|
||||
; CHECK-NEXT: movw r1, #50114
|
||||
; CHECK-NEXT: movt r1, #50628
|
||||
; CHECK-NEXT: str.w r1, [r0, #450]
|
||||
; CHECK-NEXT: movw r1, #49086
|
||||
; CHECK-NEXT: movt r1, #49600
|
||||
; CHECK-NEXT: str.w r1, [r0, #446]
|
||||
; CHECK-NEXT: movw r1, #48058
|
||||
; CHECK-NEXT: movt r1, #48572
|
||||
; CHECK-NEXT: str.w r1, [r0, #442]
|
||||
; CHECK-NEXT: movw r1, #47030
|
||||
; CHECK-NEXT: movt r1, #47544
|
||||
; CHECK-NEXT: str.w r1, [r0, #438]
|
||||
; CHECK-NEXT: movw r1, #46002
|
||||
; CHECK-NEXT: movt r1, #46516
|
||||
; CHECK-NEXT: str.w r1, [r0, #434]
|
||||
; CHECK-NEXT: movw r1, #47030
|
||||
; CHECK-NEXT: strh.w r1, [r0, #438]
|
||||
; CHECK-NEXT: movw r1, #47544
|
||||
; CHECK-NEXT: movt r1, #48058
|
||||
; CHECK-NEXT: str.w r1, [r0, #440]
|
||||
; CHECK-NEXT: movw r1, #48572
|
||||
; CHECK-NEXT: movt r1, #49086
|
||||
; CHECK-NEXT: str.w r1, [r0, #444]
|
||||
; CHECK-NEXT: movw r1, #49600
|
||||
; CHECK-NEXT: strh.w r1, [r0, #448]
|
||||
; CHECK-NEXT: movs r1, #194
|
||||
; CHECK-NEXT: strb.w r1, [r0, #450]
|
||||
; CHECK-NEXT: movw r1, #50371
|
||||
; CHECK-NEXT: movt r1, #50885
|
||||
; CHECK-NEXT: str.w r1, [r0, #451]
|
||||
; CHECK-NEXT: movw r1, #51399
|
||||
; CHECK-NEXT: movt r1, #51913
|
||||
; CHECK-NEXT: str.w r1, [r0, #455]
|
||||
; CHECK-NEXT: movw r1, #52427
|
||||
; CHECK-NEXT: movt r1, #52941
|
||||
; CHECK-NEXT: str.w r1, [r0, #459]
|
||||
; CHECK-NEXT: movw r1, #53455
|
||||
; CHECK-NEXT: movt r1, #53969
|
||||
; CHECK-NEXT: str.w r1, [r0, #463]
|
||||
; CHECK-NEXT: movw r1, #54483
|
||||
; CHECK-NEXT: strh.w r1, [r0, #467]
|
||||
; CHECK-NEXT: movw r1, #54997
|
||||
; CHECK-NEXT: movt r1, #55511
|
||||
; CHECK-NEXT: str.w r1, [r0, #469]
|
||||
; CHECK-NEXT: movw r1, #56025
|
||||
; CHECK-NEXT: movt r1, #56539
|
||||
; CHECK-NEXT: str.w r1, [r0, #473]
|
||||
; CHECK-NEXT: movw r1, #57053
|
||||
; CHECK-NEXT: movt r1, #57567
|
||||
; CHECK-NEXT: str.w r1, [r0, #477]
|
||||
; CHECK-NEXT: movw r1, #58081
|
||||
; CHECK-NEXT: movt r1, #58595
|
||||
; CHECK-NEXT: str.w r1, [r0, #481]
|
||||
; CHECK-NEXT: movw r1, #59109
|
||||
; CHECK-NEXT: movt r1, #59623
|
||||
; CHECK-NEXT: str.w r1, [r0, #485]
|
||||
; CHECK-NEXT: movw r1, #60137
|
||||
; CHECK-NEXT: strh.w r1, [r0, #489]
|
||||
; CHECK-NEXT: movw r1, #60651
|
||||
; CHECK-NEXT: movt r1, #61165
|
||||
; CHECK-NEXT: str.w r1, [r0, #491]
|
||||
; CHECK-NEXT: movw r1, #61679
|
||||
; CHECK-NEXT: strh.w r1, [r0, #495]
|
||||
; CHECK-NEXT: movw r1, #62193
|
||||
; CHECK-NEXT: movt r1, #62707
|
||||
; CHECK-NEXT: str.w r1, [r0, #497]
|
||||
; CHECK-NEXT: movw r1, #63221
|
||||
; CHECK-NEXT: movt r1, #63735
|
||||
; CHECK-NEXT: str.w r1, [r0, #501]
|
||||
; CHECK-NEXT: movw r1, #64249
|
||||
; CHECK-NEXT: strh.w r1, [r0, #505]
|
||||
; CHECK-NEXT: movs r1, #251
|
||||
; CHECK-NEXT: strb.w r1, [r0, #507]
|
||||
; CHECK-NEXT: movw r1, #65020
|
||||
; CHECK-NEXT: movt r1, #65534
|
||||
; CHECK-NEXT: str.w r1, [r0, #508]
|
||||
; CHECK-NEXT: movw r1, #44974
|
||||
; CHECK-NEXT: movt r1, #45488
|
||||
; CHECK-NEXT: str.w r1, [r0, #430]
|
||||
; CHECK-NEXT: movw r1, #43946
|
||||
; CHECK-NEXT: movt r1, #44460
|
||||
; CHECK-NEXT: str.w r1, [r0, #426]
|
||||
; CHECK-NEXT: movw r1, #42918
|
||||
; CHECK-NEXT: movt r1, #43432
|
||||
; CHECK-NEXT: str.w r1, [r0, #422]
|
||||
; CHECK-NEXT: movw r1, #41890
|
||||
; CHECK-NEXT: movt r1, #42404
|
||||
; CHECK-NEXT: str.w r1, [r0, #418]
|
||||
; CHECK-NEXT: movw r1, #40862
|
||||
; CHECK-NEXT: movt r1, #41376
|
||||
; CHECK-NEXT: str.w r1, [r0, #414]
|
||||
; CHECK-NEXT: movw r1, #39834
|
||||
; CHECK-NEXT: movt r1, #40348
|
||||
; CHECK-NEXT: str.w r1, [r0, #410]
|
||||
; CHECK-NEXT: movw r1, #38806
|
||||
; CHECK-NEXT: movt r1, #39320
|
||||
; CHECK-NEXT: str.w r1, [r0, #406]
|
||||
; CHECK-NEXT: movw r1, #37778
|
||||
; CHECK-NEXT: movt r1, #38292
|
||||
; CHECK-NEXT: str.w r1, [r0, #402]
|
||||
; CHECK-NEXT: movw r1, #36750
|
||||
; CHECK-NEXT: movt r1, #37264
|
||||
; CHECK-NEXT: str.w r1, [r0, #398]
|
||||
; CHECK-NEXT: movw r1, #35722
|
||||
; CHECK-NEXT: movt r1, #36236
|
||||
; CHECK-NEXT: str.w r1, [r0, #394]
|
||||
; CHECK-NEXT: bx lr
|
||||
%v190 = getelementptr inbounds i8, i8* %v50, i32 394
|
||||
store i8 -118, i8* %v190, align 1
|
||||
|
@ -72,20 +72,27 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
|
||||
; CHECK-LABEL: aesea:
|
||||
; CHECK: aese.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]]
|
||||
|
||||
; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]]
|
||||
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
|
||||
|
||||
; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]]
|
||||
|
||||
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
|
||||
; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]]
|
||||
|
||||
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
|
||||
; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]]
|
||||
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
|
||||
|
||||
; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]]
|
||||
|
||||
; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]]
|
||||
|
||||
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
|
||||
; CHECK: aese.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QH]]
|
||||
@ -160,14 +167,14 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
|
||||
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]]
|
||||
; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]]
|
||||
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
|
||||
; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]]
|
||||
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
|
||||
; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]]
|
||||
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
|
||||
; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]]
|
||||
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
|
||||
; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]]
|
||||
; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
|
||||
|
@ -223,24 +223,24 @@ entry:
|
||||
define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline {
|
||||
entry:
|
||||
; CHECK-LABEL: callee1:
|
||||
; CHECK-DAG: swc1 $f0
|
||||
; CHECK-DAG: swc1 $f1
|
||||
; CHECK-DAG: swc1 $f2
|
||||
; CHECK-DAG: swc1 $f3
|
||||
; CHECK-DAG: swc1 $f4
|
||||
; CHECK-DAG: swc1 $f5
|
||||
; CHECK-DAG: swc1 $f6
|
||||
; CHECK-DAG: swc1 $f7
|
||||
; CHECK-DAG: swc1 $f8
|
||||
; CHECK-DAG: swc1 $f9
|
||||
; CHECK-DAG: swc1 $f10
|
||||
; CHECK-DAG: swc1 $f11
|
||||
; CHECK-DAG: swc1 $f12
|
||||
; CHECK-DAG: swc1 $f13
|
||||
; CHECK-DAG: swc1 $f14
|
||||
; CHECK-DAG: swc1 $f15
|
||||
; CHECK-DAG: swc1 $f16
|
||||
; CHECK-DAG: swc1 $f17
|
||||
; CHECK-DAG: swc1 $f16
|
||||
; CHECK-DAG: swc1 $f15
|
||||
; CHECK-DAG: swc1 $f14
|
||||
; CHECK-DAG: swc1 $f13
|
||||
; CHECK-DAG: swc1 $f12
|
||||
; CHECK-DAG: swc1 $f11
|
||||
; CHECK-DAG: swc1 $f10
|
||||
; CHECK-DAG: swc1 $f9
|
||||
; CHECK-DAG: swc1 $f8
|
||||
; CHECK-DAG: swc1 $f7
|
||||
; CHECK-DAG: swc1 $f6
|
||||
; CHECK-DAG: swc1 $f5
|
||||
; CHECK-DAG: swc1 $f4
|
||||
; CHECK-DAG: swc1 $f3
|
||||
; CHECK-DAG: swc1 $f2
|
||||
; CHECK-DAG: swc1 $f1
|
||||
; CHECK-DAG: swc1 $f0
|
||||
; CHECK-DAG: swc1 $f18
|
||||
; CHECK-DAG: swc1 $f19
|
||||
|
||||
@ -330,7 +330,7 @@ entry:
|
||||
; NOODDSPREG-DAG: swc1 $f16, 32($[[R0]])
|
||||
; NOODDSPREG-DAG: swc1 $f18, 36($[[R0]])
|
||||
|
||||
; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], 0($sp)
|
||||
; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], {{[0-9]+}}($sp)
|
||||
; NOODDSPREG-DAG: swc1 $[[F0]], 40($[[R0]])
|
||||
|
||||
store float %a0, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4
|
||||
|
@ -15,54 +15,39 @@
|
||||
define void @main() local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: main:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
|
||||
; CHECK-NEXT: .cfi_offset %r12, -64
|
||||
; CHECK-NEXT: .cfi_offset %r13, -56
|
||||
; CHECK-NEXT: .cfi_offset %r14, -48
|
||||
; CHECK-NEXT: .cfi_offset %r15, -40
|
||||
; CHECK-NEXT: lhi %r0, 1
|
||||
; CHECK-NEXT: larl %r1, g_938
|
||||
; CHECK-NEXT: lhi %r2, 2
|
||||
; CHECK-NEXT: lhi %r3, 3
|
||||
; CHECK-NEXT: lhi %r4, 0
|
||||
; CHECK-NEXT: lhi %r5, 4
|
||||
; CHECK-NEXT: larl %r14, g_11
|
||||
; CHECK-NEXT: lhi %r2, 0
|
||||
; CHECK-NEXT: lhi %r3, 4
|
||||
; CHECK-NEXT: larl %r4, g_11
|
||||
; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: strl %r0, g_73
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: strl %r0, g_69
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-DAG: lghi %r13, 24
|
||||
; CHECK-DAG: strl %r2, g_69
|
||||
; CHECK-DAG: ag %r13, 0(%r1)
|
||||
; CHECK-NEXT: lrl %r12, g_832
|
||||
; CHECK-NEXT: strl %r3, g_69
|
||||
; CHECK-NEXT: lrl %r12, g_832
|
||||
; CHECK-NEXT: strl %r4, g_69
|
||||
; CHECK-NEXT: lrl %r12, g_832
|
||||
; CHECK-NEXT: strl %r0, g_69
|
||||
; CHECK-NEXT: lrl %r12, g_832
|
||||
; CHECK-NEXT: strl %r2, g_69
|
||||
; CHECK-NEXT: lrl %r12, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: agsi 0(%r1), 24
|
||||
; CHECK-NEXT: lrl %r5, g_832
|
||||
; CHECK-NEXT: strl %r3, g_69
|
||||
; CHECK-NEXT: stgrl %r13, g_938
|
||||
; CHECK-NEXT: lrl %r13, g_832
|
||||
; CHECK-NEXT: strl %r5, g_69
|
||||
; CHECK-NEXT: mvi 0(%r14), 1
|
||||
; CHECK-NEXT: mvi 0(%r4), 1
|
||||
; CHECK-NEXT: j .LBB0_1
|
||||
br label %1
|
||||
|
||||
|
@ -13,9 +13,8 @@
|
||||
define void @redundant_stores_merging() {
|
||||
; CHECK-LABEL: redundant_stores_merging:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movabsq $528280977409, %rax # imm = 0x7B00000001
|
||||
; CHECK-NEXT: movabsq $1958505086977, %rax # imm = 0x1C800000001
|
||||
; CHECK-NEXT: movq %rax, e+{{.*}}(%rip)
|
||||
; CHECK-NEXT: movl $456, e+{{.*}}(%rip) # imm = 0x1C8
|
||||
; CHECK-NEXT: retq
|
||||
store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
|
||||
store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
|
||||
|
Loading…
Reference in New Issue
Block a user