[DAGCombine] Improve alias analysis for chain of independent stores.

FindBetterNeighborChains simulateanously improves the chain
dependencies of a chain of related stores avoiding the generation of
extra token factors. For chains longer than the GatherAllAliasDepths,
stores further down in the chain will necessarily fail, a potentially
significant waste and preventing otherwise trivial parallelization.

This patch directly parallelize the chains of stores before improving
each store. This generally improves DAG-level parallelism.

Reviewers: courbet, spatel, RKSimon, bogner, efriedma, craig.topper, rnk

Subscribers: sdardis, javed.absar, hiraditya, jrtc27, atanasyan, llvm-commits

Differential Revision: https://reviews.llvm.org/D53552

llvm-svn: 346432
This commit is contained in:
Nirav Dave 2018-11-08 19:14:20 +00:00
parent 526688e4bb
commit 8e3e378c03
9 changed files with 272 additions and 232 deletions

View File

@ -20,6 +20,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
@ -490,6 +491,10 @@ namespace {
/// returns false.
bool findBetterNeighborChains(StoreSDNode *St);
// Helper for findBetterNeighborChains. Walk up store chain add additional
// chained stores that do not overlap and can be parallelized.
bool parallelizeChainedStores(StoreSDNode *St);
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
@ -18905,6 +18910,11 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
// TODO: Replace with with std::monostate when we move to C++17.
struct UnitT { } Unit;
bool operator==(const UnitT &, const UnitT &) { return true; }
bool operator!=(const UnitT &, const UnitT &) { return false; }
// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
// redundant, as this function gets called when visiting every store
@ -18917,13 +18927,22 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
// the nodes that will eventually be candidates, and then not be able
// to go from a partially-merged state to the desired final
// fully-merged state.
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
if (OptLevel == CodeGenOpt::None)
return false;
bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
SmallVector<StoreSDNode *, 8> ChainedStores;
StoreSDNode *STChain = St;
// Intervals records which offsets from BaseIndex have been covered. In
// the common case, every store writes to the immediately previous address
// space and thus merged with the previous interval at insertion time.
using IMap =
llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
IMap::Allocator A;
IMap Intervals(A);
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
@ -18933,76 +18952,114 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
if (BasePtr.getBase().isUndef())
return false;
SmallVector<StoreSDNode *, 8> ChainedStores;
ChainedStores.push_back(St);
// Add ST's interval.
Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
// Walk up the chain and look for nodes with offsets from the same
// base pointer. Stop when reaching an instruction with a different kind
// or instruction which has a different base pointer.
StoreSDNode *Index = St;
while (Index) {
while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
// If the chain has more than one use, then we can't reorder the mem ops.
if (Index != St && !SDValue(Index, 0)->hasOneUse())
if (!SDValue(Chain, 0)->hasOneUse())
break;
if (Index->isVolatile() || Index->isIndexed())
if (Chain->isVolatile() || Chain->isIndexed())
break;
// Find the base pointer and offset for this memory node.
BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
// Check that the base pointer is the same as the original one.
if (!BasePtr.equalBaseIndex(Ptr, DAG))
int64_t Offset;
if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
break;
int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
// Make sure we don't overlap with other intervals by checking the ones to
// the left or right before inserting.
auto I = Intervals.find(Offset);
// If there's a next interval, we should end before it.
if (I != Intervals.end() && I.start() < (Offset + Length))
break;
// If there's a previous interval, we should start after it.
if (I != Intervals.begin() && (--I).stop() <= Offset)
break;
Intervals.insert(Offset, Offset + Length, Unit);
// Walk up the chain to find the next store node, ignoring any
// intermediate loads. Any other kind of node will halt the loop.
SDNode *NextInChain = Index->getChain().getNode();
while (true) {
if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
// We found a store node. Use it for the next iteration.
if (STn->isVolatile() || STn->isIndexed()) {
Index = nullptr;
break;
}
ChainedStores.push_back(STn);
Index = STn;
break;
} else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
NextInChain = Ldn->getChain().getNode();
continue;
} else {
Index = nullptr;
break;
}
}// end while
ChainedStores.push_back(Chain);
STChain = Chain;
}
// At this point, ChainedStores lists all of the Store nodes
// reachable by iterating up through chain nodes matching the above
// conditions. For each such store identified, try to find an
// earlier chain to attach the store to which won't violate the
// required ordering.
bool MadeChangeToSt = false;
SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
// If we didn't find a chained store, exit.
if (ChainedStores.size() == 0)
return false;
for (StoreSDNode *ChainedStore : ChainedStores) {
SDValue Chain = ChainedStore->getChain();
SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
if (Chain != BetterChain) {
if (ChainedStore == St)
MadeChangeToSt = true;
BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
}
// Improve all chained stores (St and ChainedStores members) starting from
// where the store chain ended and return single TokenFactor.
SDValue NewChain = STChain->getChain();
SmallVector<SDValue, 8> TFOps;
for (unsigned I = ChainedStores.size(); I;) {
StoreSDNode *S = ChainedStores[--I];
SDValue BetterChain = FindBetterChain(S, NewChain);
S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
TFOps.push_back(SDValue(S, 0));
ChainedStores[I] = S;
}
// Do all replacements after finding the replacements to make to avoid making
// the chains more complicated by introducing new TokenFactors.
for (auto Replacement : BetterChains)
replaceStoreChain(Replacement.first, Replacement.second);
// Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
SDValue BetterChain = FindBetterChain(St, NewChain);
SDValue NewST;
if (St->isTruncatingStore())
NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
St->getBasePtr(), St->getMemoryVT(),
St->getMemOperand());
else
NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
St->getBasePtr(), St->getMemOperand());
return MadeChangeToSt;
TFOps.push_back(NewST);
// If we improved every element of TFOps, then we've lost the dependence on
// NewChain to successors of St and we need to add it back to TFOps. Do so at
// the beginning to keep relative order consistent with FindBetterChains.
auto hasImprovedChain = [&](SDValue ST) -> bool {
return ST->getOperand(0) != NewChain;
};
bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
if (AddNewChain)
TFOps.insert(TFOps.begin(), NewChain);
SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
CombineTo(St, TF);
AddToWorklist(STChain);
// Add TF operands worklist in reverse order.
for (auto I = TF->getNumOperands(); I;)
AddToWorklist(TF->getOperand(--I).getNode());
AddToWorklist(TF.getNode());
return true;
}
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
if (OptLevel == CodeGenOpt::None)
return false;
const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
return false;
// Do not handle stores to undef base pointers.
if (BasePtr.getBase().isUndef())
return false;
// Directly improve a chain of disjoint stores starting at St.
if (parallelizeChainedStores(St))
return true;
// Improve St's Chain..
SDValue BetterChain = FindBetterChain(St, St->getChain());
if (St->getChain() != BetterChain) {
replaceStoreChain(St, BetterChain);
return true;
}
return false;
}
/// This is the entry point for the file.

View File

@ -7,14 +7,13 @@ define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
; CHECK-LABEL: fn9:
; 9th fixed argument
; CHECK: ldr {{w[0-9]+}}, [sp, #64]
; CHECK: add [[ARGS:x[0-9]+]], sp, #72
; CHECK: add {{x[0-9]+}}, [[ARGS]], #8
; CHECK-DAG: add [[ARGS:x[0-9]+]], sp, #72
; First vararg
; CHECK: ldr {{w[0-9]+}}, [sp, #72]
; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #72]
; Second vararg
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #80]
; Third vararg
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #88]
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4

View File

@ -1465,10 +1465,10 @@ entry:
define void @merge_zr32_3vec(<3 x i32>* %p) {
; CHECK-LABEL: merge_zr32_3vec:
; CHECK: // %entry
; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
; NOSTRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8]
; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}]
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #4]
; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}]
; CHECK-NEXT: ret
entry:
store <3 x i32> zeroinitializer, <3 x i32>* %p
@ -1480,8 +1480,8 @@ define void @merge_zr32_4vec(<4 x i32>* %p) {
; CHECK-LABEL: merge_zr32_4vec:
; CHECK: // %entry
; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; CHECK-NEXT: ret
entry:
store <4 x i32> zeroinitializer, <4 x i32>* %p
@ -1505,8 +1505,8 @@ define void @merge_zr32_4vecf(<4 x float>* %p) {
; CHECK-LABEL: merge_zr32_4vecf:
; CHECK: // %entry
; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8]
; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}]
; CHECK-NEXT: ret
entry:
store <4 x float> zeroinitializer, <4 x float>* %p
@ -1589,8 +1589,8 @@ entry:
define void @merge_zr64_3vec(<3 x i64>* %p) {
; CHECK-LABEL: merge_zr64_3vec:
; CHECK: // %entry
; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
; CHECK-NEXT: str xzr, [x{{[0-9]+}}, #16]
; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #8]
; CHECK-NEXT: str xzr, [x{{[0-9]+}}]
; CHECK-NEXT: ret
entry:
store <3 x i64> zeroinitializer, <3 x i64>* %p

View File

@ -314,13 +314,12 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
; CHECK-APPLE-DAG: strb [[ID]], [x0, #8]
; First vararg
; CHECK-APPLE-DAG: orr {{x[0-9]+}}, [[ARGS]], #0x8
; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16]
; Second vararg
; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24]
; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #16
; Third vararg
; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8
; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32]
; CHECK-APPLE: mov x21, x0
; CHECK-APPLE-NOT: x21

View File

@ -8,101 +8,95 @@ target triple = "thumbv7em-arm-none-eabi"
define arm_aapcs_vfpcc void @test(i8* %v50) #0 {
; CHECK-LABEL: test:
; CHECK: @ %bb.0:
; CHECK-NEXT: movw r1, #35722
; CHECK-NEXT: movt r1, #36236
; CHECK-NEXT: str.w r1, [r0, #394]
; CHECK-NEXT: movw r1, #36750
; CHECK-NEXT: movt r1, #37264
; CHECK-NEXT: str.w r1, [r0, #398]
; CHECK-NEXT: movw r1, #37778
; CHECK-NEXT: movt r1, #38292
; CHECK-NEXT: str.w r1, [r0, #402]
; CHECK-NEXT: movw r1, #38806
; CHECK-NEXT: movt r1, #39320
; CHECK-NEXT: str.w r1, [r0, #406]
; CHECK-NEXT: movw r1, #39834
; CHECK-NEXT: strh.w r1, [r0, #410]
; CHECK-NEXT: movw r1, #40348
; CHECK-NEXT: movt r1, #40862
; CHECK-NEXT: str.w r1, [r0, #412]
; CHECK-NEXT: movw r1, #41376
; CHECK-NEXT: movt r1, #41890
; CHECK-NEXT: str.w r1, [r0, #416]
; CHECK-NEXT: movw r1, #42404
; CHECK-NEXT: movt r1, #42918
; CHECK-NEXT: str.w r1, [r0, #420]
; CHECK-NEXT: movw r1, #43432
; CHECK-NEXT: movt r1, #43946
; CHECK-NEXT: str.w r1, [r0, #424]
; CHECK-NEXT: movw r1, #44460
; CHECK-NEXT: movt r1, #44974
; CHECK-NEXT: str.w r1, [r0, #428]
; CHECK-NEXT: movw r1, #45488
; CHECK-NEXT: strh.w r1, [r0, #432]
; CHECK-NEXT: movw r1, #65534
; CHECK-NEXT: strh.w r1, [r0, #510]
; CHECK-NEXT: movw r1, #64506
; CHECK-NEXT: movt r1, #65020
; CHECK-NEXT: str.w r1, [r0, #506]
; CHECK-NEXT: movw r1, #63478
; CHECK-NEXT: movt r1, #63992
; CHECK-NEXT: str.w r1, [r0, #502]
; CHECK-NEXT: movw r1, #62450
; CHECK-NEXT: movt r1, #62964
; CHECK-NEXT: str.w r1, [r0, #498]
; CHECK-NEXT: movw r1, #61422
; CHECK-NEXT: movt r1, #61936
; CHECK-NEXT: str.w r1, [r0, #494]
; CHECK-NEXT: movw r1, #60394
; CHECK-NEXT: movt r1, #60908
; CHECK-NEXT: str.w r1, [r0, #490]
; CHECK-NEXT: movw r1, #59366
; CHECK-NEXT: movt r1, #59880
; CHECK-NEXT: str.w r1, [r0, #486]
; CHECK-NEXT: movw r1, #58338
; CHECK-NEXT: movt r1, #58852
; CHECK-NEXT: str.w r1, [r0, #482]
; CHECK-NEXT: movw r1, #57310
; CHECK-NEXT: movt r1, #57824
; CHECK-NEXT: str.w r1, [r0, #478]
; CHECK-NEXT: movw r1, #56282
; CHECK-NEXT: movt r1, #56796
; CHECK-NEXT: str.w r1, [r0, #474]
; CHECK-NEXT: movw r1, #55254
; CHECK-NEXT: movt r1, #55768
; CHECK-NEXT: str.w r1, [r0, #470]
; CHECK-NEXT: movw r1, #54226
; CHECK-NEXT: movt r1, #54740
; CHECK-NEXT: str.w r1, [r0, #466]
; CHECK-NEXT: movw r1, #53198
; CHECK-NEXT: movt r1, #53712
; CHECK-NEXT: str.w r1, [r0, #462]
; CHECK-NEXT: movw r1, #52170
; CHECK-NEXT: movt r1, #52684
; CHECK-NEXT: str.w r1, [r0, #458]
; CHECK-NEXT: movw r1, #51142
; CHECK-NEXT: movt r1, #51656
; CHECK-NEXT: str.w r1, [r0, #454]
; CHECK-NEXT: movw r1, #50114
; CHECK-NEXT: movt r1, #50628
; CHECK-NEXT: str.w r1, [r0, #450]
; CHECK-NEXT: movw r1, #49086
; CHECK-NEXT: movt r1, #49600
; CHECK-NEXT: str.w r1, [r0, #446]
; CHECK-NEXT: movw r1, #48058
; CHECK-NEXT: movt r1, #48572
; CHECK-NEXT: str.w r1, [r0, #442]
; CHECK-NEXT: movw r1, #47030
; CHECK-NEXT: movt r1, #47544
; CHECK-NEXT: str.w r1, [r0, #438]
; CHECK-NEXT: movw r1, #46002
; CHECK-NEXT: movt r1, #46516
; CHECK-NEXT: str.w r1, [r0, #434]
; CHECK-NEXT: movw r1, #47030
; CHECK-NEXT: strh.w r1, [r0, #438]
; CHECK-NEXT: movw r1, #47544
; CHECK-NEXT: movt r1, #48058
; CHECK-NEXT: str.w r1, [r0, #440]
; CHECK-NEXT: movw r1, #48572
; CHECK-NEXT: movt r1, #49086
; CHECK-NEXT: str.w r1, [r0, #444]
; CHECK-NEXT: movw r1, #49600
; CHECK-NEXT: strh.w r1, [r0, #448]
; CHECK-NEXT: movs r1, #194
; CHECK-NEXT: strb.w r1, [r0, #450]
; CHECK-NEXT: movw r1, #50371
; CHECK-NEXT: movt r1, #50885
; CHECK-NEXT: str.w r1, [r0, #451]
; CHECK-NEXT: movw r1, #51399
; CHECK-NEXT: movt r1, #51913
; CHECK-NEXT: str.w r1, [r0, #455]
; CHECK-NEXT: movw r1, #52427
; CHECK-NEXT: movt r1, #52941
; CHECK-NEXT: str.w r1, [r0, #459]
; CHECK-NEXT: movw r1, #53455
; CHECK-NEXT: movt r1, #53969
; CHECK-NEXT: str.w r1, [r0, #463]
; CHECK-NEXT: movw r1, #54483
; CHECK-NEXT: strh.w r1, [r0, #467]
; CHECK-NEXT: movw r1, #54997
; CHECK-NEXT: movt r1, #55511
; CHECK-NEXT: str.w r1, [r0, #469]
; CHECK-NEXT: movw r1, #56025
; CHECK-NEXT: movt r1, #56539
; CHECK-NEXT: str.w r1, [r0, #473]
; CHECK-NEXT: movw r1, #57053
; CHECK-NEXT: movt r1, #57567
; CHECK-NEXT: str.w r1, [r0, #477]
; CHECK-NEXT: movw r1, #58081
; CHECK-NEXT: movt r1, #58595
; CHECK-NEXT: str.w r1, [r0, #481]
; CHECK-NEXT: movw r1, #59109
; CHECK-NEXT: movt r1, #59623
; CHECK-NEXT: str.w r1, [r0, #485]
; CHECK-NEXT: movw r1, #60137
; CHECK-NEXT: strh.w r1, [r0, #489]
; CHECK-NEXT: movw r1, #60651
; CHECK-NEXT: movt r1, #61165
; CHECK-NEXT: str.w r1, [r0, #491]
; CHECK-NEXT: movw r1, #61679
; CHECK-NEXT: strh.w r1, [r0, #495]
; CHECK-NEXT: movw r1, #62193
; CHECK-NEXT: movt r1, #62707
; CHECK-NEXT: str.w r1, [r0, #497]
; CHECK-NEXT: movw r1, #63221
; CHECK-NEXT: movt r1, #63735
; CHECK-NEXT: str.w r1, [r0, #501]
; CHECK-NEXT: movw r1, #64249
; CHECK-NEXT: strh.w r1, [r0, #505]
; CHECK-NEXT: movs r1, #251
; CHECK-NEXT: strb.w r1, [r0, #507]
; CHECK-NEXT: movw r1, #65020
; CHECK-NEXT: movt r1, #65534
; CHECK-NEXT: str.w r1, [r0, #508]
; CHECK-NEXT: movw r1, #44974
; CHECK-NEXT: movt r1, #45488
; CHECK-NEXT: str.w r1, [r0, #430]
; CHECK-NEXT: movw r1, #43946
; CHECK-NEXT: movt r1, #44460
; CHECK-NEXT: str.w r1, [r0, #426]
; CHECK-NEXT: movw r1, #42918
; CHECK-NEXT: movt r1, #43432
; CHECK-NEXT: str.w r1, [r0, #422]
; CHECK-NEXT: movw r1, #41890
; CHECK-NEXT: movt r1, #42404
; CHECK-NEXT: str.w r1, [r0, #418]
; CHECK-NEXT: movw r1, #40862
; CHECK-NEXT: movt r1, #41376
; CHECK-NEXT: str.w r1, [r0, #414]
; CHECK-NEXT: movw r1, #39834
; CHECK-NEXT: movt r1, #40348
; CHECK-NEXT: str.w r1, [r0, #410]
; CHECK-NEXT: movw r1, #38806
; CHECK-NEXT: movt r1, #39320
; CHECK-NEXT: str.w r1, [r0, #406]
; CHECK-NEXT: movw r1, #37778
; CHECK-NEXT: movt r1, #38292
; CHECK-NEXT: str.w r1, [r0, #402]
; CHECK-NEXT: movw r1, #36750
; CHECK-NEXT: movt r1, #37264
; CHECK-NEXT: str.w r1, [r0, #398]
; CHECK-NEXT: movw r1, #35722
; CHECK-NEXT: movt r1, #36236
; CHECK-NEXT: str.w r1, [r0, #394]
; CHECK-NEXT: bx lr
%v190 = getelementptr inbounds i8, i8* %v50, i32 394
store i8 -118, i8* %v190, align 1

View File

@ -72,20 +72,27 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
; CHECK-LABEL: aesea:
; CHECK: aese.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]]
; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]]
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]]
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]]
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]]
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]]
; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]]
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QH]]
@ -160,14 +167,14 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]]
; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]]
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]]
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]]
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]]
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]]
; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}

View File

@ -223,24 +223,24 @@ entry:
define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline {
entry:
; CHECK-LABEL: callee1:
; CHECK-DAG: swc1 $f0
; CHECK-DAG: swc1 $f1
; CHECK-DAG: swc1 $f2
; CHECK-DAG: swc1 $f3
; CHECK-DAG: swc1 $f4
; CHECK-DAG: swc1 $f5
; CHECK-DAG: swc1 $f6
; CHECK-DAG: swc1 $f7
; CHECK-DAG: swc1 $f8
; CHECK-DAG: swc1 $f9
; CHECK-DAG: swc1 $f10
; CHECK-DAG: swc1 $f11
; CHECK-DAG: swc1 $f12
; CHECK-DAG: swc1 $f13
; CHECK-DAG: swc1 $f14
; CHECK-DAG: swc1 $f15
; CHECK-DAG: swc1 $f16
; CHECK-DAG: swc1 $f17
; CHECK-DAG: swc1 $f16
; CHECK-DAG: swc1 $f15
; CHECK-DAG: swc1 $f14
; CHECK-DAG: swc1 $f13
; CHECK-DAG: swc1 $f12
; CHECK-DAG: swc1 $f11
; CHECK-DAG: swc1 $f10
; CHECK-DAG: swc1 $f9
; CHECK-DAG: swc1 $f8
; CHECK-DAG: swc1 $f7
; CHECK-DAG: swc1 $f6
; CHECK-DAG: swc1 $f5
; CHECK-DAG: swc1 $f4
; CHECK-DAG: swc1 $f3
; CHECK-DAG: swc1 $f2
; CHECK-DAG: swc1 $f1
; CHECK-DAG: swc1 $f0
; CHECK-DAG: swc1 $f18
; CHECK-DAG: swc1 $f19
@ -330,7 +330,7 @@ entry:
; NOODDSPREG-DAG: swc1 $f16, 32($[[R0]])
; NOODDSPREG-DAG: swc1 $f18, 36($[[R0]])
; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], 0($sp)
; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], {{[0-9]+}}($sp)
; NOODDSPREG-DAG: swc1 $[[F0]], 40($[[R0]])
store float %a0, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4

View File

@ -15,54 +15,39 @@
define void @main() local_unnamed_addr #0 {
; CHECK-LABEL: main:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
; CHECK-NEXT: .cfi_offset %r12, -64
; CHECK-NEXT: .cfi_offset %r13, -56
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: lhi %r0, 1
; CHECK-NEXT: larl %r1, g_938
; CHECK-NEXT: lhi %r2, 2
; CHECK-NEXT: lhi %r3, 3
; CHECK-NEXT: lhi %r4, 0
; CHECK-NEXT: lhi %r5, 4
; CHECK-NEXT: larl %r14, g_11
; CHECK-NEXT: lhi %r2, 0
; CHECK-NEXT: lhi %r3, 4
; CHECK-NEXT: larl %r4, g_11
; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: strl %r0, g_73
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: strl %r0, g_69
; CHECK-NEXT: lrl %r13, g_832
; CHECK-DAG: lghi %r13, 24
; CHECK-DAG: strl %r2, g_69
; CHECK-DAG: ag %r13, 0(%r1)
; CHECK-NEXT: lrl %r12, g_832
; CHECK-NEXT: strl %r3, g_69
; CHECK-NEXT: lrl %r12, g_832
; CHECK-NEXT: strl %r4, g_69
; CHECK-NEXT: lrl %r12, g_832
; CHECK-NEXT: strl %r0, g_69
; CHECK-NEXT: lrl %r12, g_832
; CHECK-NEXT: strl %r2, g_69
; CHECK-NEXT: lrl %r12, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: agsi 0(%r1), 24
; CHECK-NEXT: lrl %r5, g_832
; CHECK-NEXT: strl %r3, g_69
; CHECK-NEXT: stgrl %r13, g_938
; CHECK-NEXT: lrl %r13, g_832
; CHECK-NEXT: strl %r5, g_69
; CHECK-NEXT: mvi 0(%r14), 1
; CHECK-NEXT: mvi 0(%r4), 1
; CHECK-NEXT: j .LBB0_1
br label %1

View File

@ -13,9 +13,8 @@
define void @redundant_stores_merging() {
; CHECK-LABEL: redundant_stores_merging:
; CHECK: # %bb.0:
; CHECK-NEXT: movabsq $528280977409, %rax # imm = 0x7B00000001
; CHECK-NEXT: movabsq $1958505086977, %rax # imm = 0x1C800000001
; CHECK-NEXT: movq %rax, e+{{.*}}(%rip)
; CHECK-NEXT: movl $456, e+{{.*}}(%rip) # imm = 0x1C8
; CHECK-NEXT: retq
store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4