mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-03 09:14:30 +00:00
Implement global merge optimization for global variables.
This commit implements two command line switches -global-merge-on-external and -global-merge-aligned, and both of them are false by default, so this optimization is disabled by default for all targets. For ARM64, some back-end behaviors need to be tuned to get this optimization further enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208934 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
01a02d91dc
commit
d5db8765d6
@ -82,6 +82,10 @@ public:
|
||||
static inline bool classof(const Value *V) {
|
||||
return V->getValueID() == Value::GlobalAliasVal;
|
||||
}
|
||||
|
||||
// return the constant offset of an expression, with which this global var
|
||||
// has alias.
|
||||
uint64_t calculateOffset(const DataLayout &DL) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
#include "llvm/IR/InlineAsm.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/Target/TargetCallingConv.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
@ -883,6 +884,12 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Returns the alignment required by global merge on external symbols.
|
||||
/// By default, returns the natural alignment of merged data structure.
|
||||
virtual unsigned getGlobalMergeAlignment(StructType *MergedTy) const {
|
||||
return getDataLayout()->getABITypeAlignment(MergedTy);
|
||||
}
|
||||
|
||||
/// Returns true if a cast between SrcAS and DestAS is a noop.
|
||||
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
|
||||
return false;
|
||||
|
@ -946,8 +946,11 @@ bool AsmPrinter::doFinalization(Module &M) {
|
||||
EmitVisibility(Name, Alias.getVisibility());
|
||||
|
||||
// Emit the directives as assignments aka .set:
|
||||
OutStreamer.EmitAssignment(Name,
|
||||
MCSymbolRefExpr::Create(Target, OutContext));
|
||||
const MCExpr *Expr = MCSymbolRefExpr::Create(Target, OutContext);
|
||||
if (uint64_t Offset = Alias.calculateOffset(*TM.getDataLayout()))
|
||||
Expr = MCBinaryExpr::CreateAdd(Expr,
|
||||
MCConstantExpr::Create(Offset, OutContext), OutContext);
|
||||
OutStreamer.EmitAssignment(Name, Expr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/GlobalAlias.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
@ -282,3 +283,27 @@ GlobalObject *GlobalAlias::getAliasedGlobal() {
|
||||
return cast<GlobalObject>(GV);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t GlobalAlias::calculateOffset(const DataLayout &DL) const {
|
||||
uint64_t Offset = 0;
|
||||
const Constant *C = this;
|
||||
while (C) {
|
||||
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(C)) {
|
||||
C = GA->getAliasee();
|
||||
} else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
|
||||
if (CE->getOpcode() == Instruction::GetElementPtr) {
|
||||
std::vector<Value*> Args;
|
||||
for (unsigned I = 1; I < CE->getNumOperands(); ++I)
|
||||
Args.push_back(CE->getOperand(I));
|
||||
Offset += DL.getIndexedOffset(CE->getOperand(0)->getType(), Args);
|
||||
}
|
||||
C = CE->getOperand(0);
|
||||
} else if (isa<GlobalValue>(C)) {
|
||||
return Offset;
|
||||
} else {
|
||||
assert(0 && "Unexpected type in alias chain!");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return Offset;
|
||||
}
|
||||
|
@ -5560,3 +5560,17 @@ unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
|
||||
return 4095;
|
||||
}
|
||||
|
||||
/// getGlobalMergeAlignment - Set alignment to be the max size of merged
|
||||
/// global variable data structure, and make it aligned up to power of 2.
|
||||
/// This way, we could guarantee the merged global variable data structure
|
||||
/// doesn't cross page boundary, because usually OS always allocates page at
|
||||
/// 4096-byte aligned boundary.
|
||||
unsigned AArch64TargetLowering::getGlobalMergeAlignment(
|
||||
StructType *MergedTy) const {
|
||||
unsigned Align = getDataLayout()->getTypeAllocSize(MergedTy);
|
||||
if (Align & (Align - 1))
|
||||
Align = llvm::NextPowerOf2(Align);
|
||||
|
||||
return Align;
|
||||
}
|
||||
|
||||
|
@ -386,6 +386,10 @@ public:
|
||||
/// be used for loads / stores from the global.
|
||||
unsigned getMaximalGlobalOffset() const override;
|
||||
|
||||
/// getGlobalMergeAlignment - Set alignment to be the max size of merged
|
||||
/// global variable data structure, and make it aligned up to power of 2.
|
||||
unsigned getGlobalMergeAlignment(StructType *MergedTy) const override;
|
||||
|
||||
protected:
|
||||
std::pair<const TargetRegisterClass*, uint8_t>
|
||||
findRepresentativeClass(MVT VT) const override;
|
||||
|
@ -630,6 +630,20 @@ unsigned ARM64TargetLowering::getMaximalGlobalOffset() const {
|
||||
return 4095;
|
||||
}
|
||||
|
||||
/// getGlobalMergeAlignment - Set alignment to be the max size of merged
|
||||
/// global variable data structure, and make it aligned up to power of 2.
|
||||
/// This way, we could guarantee the merged global variable data structure
|
||||
/// doesn't cross page boundary, because usually OS always allocates page at
|
||||
/// 4096-byte aligned boundary.
|
||||
unsigned ARM64TargetLowering::getGlobalMergeAlignment(
|
||||
StructType *MergedTy) const {
|
||||
unsigned Align = getDataLayout()->getTypeAllocSize(MergedTy);
|
||||
if (Align & (Align - 1))
|
||||
Align = llvm::NextPowerOf2(Align);
|
||||
|
||||
return Align;
|
||||
}
|
||||
|
||||
FastISel *
|
||||
ARM64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
|
||||
const TargetLibraryInfo *libInfo) const {
|
||||
|
@ -236,6 +236,10 @@ public:
|
||||
/// be used for loads / stores from the global.
|
||||
unsigned getMaximalGlobalOffset() const override;
|
||||
|
||||
/// getGlobalMergeAlignment - Set alignment to be the max size of merged
|
||||
/// global variable data structure, and make it aligned up to power of 2.
|
||||
unsigned getGlobalMergeAlignment(StructType *MergedTy) const override;
|
||||
|
||||
/// Returns true if a cast between SrcAS and DestAS is a noop.
|
||||
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
|
||||
// Addrspacecasts are always noops.
|
||||
|
@ -72,7 +72,7 @@ using namespace llvm;
|
||||
#define DEBUG_TYPE "global-merge"
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableGlobalMerge("global-merge", cl::Hidden,
|
||||
EnableGlobalMerge("enable-global-merge", cl::NotHidden,
|
||||
cl::desc("Enable global merge pass"),
|
||||
cl::init(true));
|
||||
|
||||
@ -81,6 +81,16 @@ EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
|
||||
cl::desc("Enable global merge pass on constants"),
|
||||
cl::init(false));
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
|
||||
cl::desc("Enable global merge pass on external linkage"),
|
||||
cl::init(false));
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableGlobalMergeAligned("global-merge-aligned", cl::Hidden,
|
||||
cl::desc("Set target specific alignment for global merge pass"),
|
||||
cl::init(false));
|
||||
|
||||
STATISTIC(NumMerged , "Number of globals merged");
|
||||
namespace {
|
||||
class GlobalMerge : public FunctionPass {
|
||||
@ -129,9 +139,21 @@ namespace {
|
||||
} // end anonymous namespace
|
||||
|
||||
char GlobalMerge::ID = 0;
|
||||
INITIALIZE_PASS(GlobalMerge, "global-merge",
|
||||
"Global Merge", false, false)
|
||||
|
||||
static void *initializeGlobalMergePassOnce(PassRegistry &Registry) {
|
||||
PassInfo *PI = new PassInfo(
|
||||
"Merge global variables",
|
||||
"global-merge", &GlobalMerge::ID,
|
||||
PassInfo::NormalCtor_t(callDefaultCtor<GlobalMerge>), false,
|
||||
false, PassInfo::TargetMachineCtor_t(
|
||||
callTargetMachineCtor<GlobalMerge>));
|
||||
Registry.registerPass(*PI, true);
|
||||
return PI;
|
||||
}
|
||||
|
||||
void llvm::initializeGlobalMergePass(PassRegistry &Registry) {
|
||||
CALL_ONCE_INITIALIZATION(initializeGlobalMergePassOnce)
|
||||
}
|
||||
|
||||
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
|
||||
Module &M, bool isConst, unsigned AddrSpace) const {
|
||||
@ -154,11 +176,16 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
|
||||
|
||||
Type *Int32Ty = Type::getInt32Ty(M.getContext());
|
||||
|
||||
assert (Globals.size() > 1);
|
||||
|
||||
for (size_t i = 0, e = Globals.size(); i != e; ) {
|
||||
size_t j = 0;
|
||||
uint64_t MergedSize = 0;
|
||||
std::vector<Type*> Tys;
|
||||
std::vector<Constant*> Inits;
|
||||
|
||||
bool HasExternal = false;
|
||||
GlobalVariable *TheFirstExternal = 0;
|
||||
for (j = i; j != e; ++j) {
|
||||
Type *Ty = Globals[j]->getType()->getElementType();
|
||||
MergedSize += DL->getTypeAllocSize(Ty);
|
||||
@ -167,17 +194,45 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
|
||||
}
|
||||
Tys.push_back(Ty);
|
||||
Inits.push_back(Globals[j]->getInitializer());
|
||||
|
||||
if (Globals[j]->hasExternalLinkage() && !HasExternal) {
|
||||
HasExternal = true;
|
||||
TheFirstExternal = Globals[j];
|
||||
}
|
||||
}
|
||||
|
||||
// If merged variables doesn't have external linkage, we needn't to expose
|
||||
// the symbol after merging.
|
||||
GlobalValue::LinkageTypes Linkage = HasExternal ?
|
||||
GlobalValue::ExternalLinkage :
|
||||
GlobalValue::InternalLinkage ;
|
||||
|
||||
// If merged variables have external linkage, we use symbol name of the
|
||||
// first variable merged as the suffix of global symbol name. This would
|
||||
// be able to avoid the link-time naming conflict for globalm symbols.
|
||||
Twine MergedGVName = HasExternal ?
|
||||
"_MergedGlobals_" + TheFirstExternal->getName() :
|
||||
"_MergedGlobals" ;
|
||||
|
||||
StructType *MergedTy = StructType::get(M.getContext(), Tys);
|
||||
Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
|
||||
|
||||
GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
|
||||
GlobalValue::InternalLinkage,
|
||||
MergedInit, "_MergedGlobals",
|
||||
nullptr,
|
||||
GlobalVariable::NotThreadLocal,
|
||||
AddrSpace);
|
||||
Linkage, MergedInit, MergedGVName,
|
||||
nullptr, GlobalVariable::NotThreadLocal,
|
||||
AddrSpace);
|
||||
|
||||
if (EnableGlobalMergeAligned) {
|
||||
unsigned Align = TLI->getGlobalMergeAlignment(MergedTy);
|
||||
assert(((Align % DL->getABITypeAlignment(MergedTy)) == 0) &&
|
||||
"Specified alignment doesn't meet natural alignment requirement.");
|
||||
MergedGV->setAlignment(Align);
|
||||
}
|
||||
|
||||
for (size_t k = i; k < j; ++k) {
|
||||
GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
|
||||
std::string Name = Globals[k]->getName();
|
||||
|
||||
Constant *Idx[2] = {
|
||||
ConstantInt::get(Int32Ty, 0),
|
||||
ConstantInt::get(Int32Ty, k-i)
|
||||
@ -185,6 +240,12 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
|
||||
Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
|
||||
Globals[k]->replaceAllUsesWith(GEP);
|
||||
Globals[k]->eraseFromParent();
|
||||
|
||||
if (Linkage != GlobalValue::InternalLinkage) {
|
||||
// Generate a new alias...
|
||||
new GlobalAlias(GEP->getType(), Linkage, Name, GEP, &M);
|
||||
}
|
||||
|
||||
NumMerged++;
|
||||
}
|
||||
i = j;
|
||||
@ -245,8 +306,12 @@ bool GlobalMerge::doInitialization(Module &M) {
|
||||
// Grab all non-const globals.
|
||||
for (Module::global_iterator I = M.global_begin(),
|
||||
E = M.global_end(); I != E; ++I) {
|
||||
// Merge is safe for "normal" internal globals only
|
||||
if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
|
||||
// Merge is safe for "normal" internal or external globals only
|
||||
if (I->isDeclaration() || I->isThreadLocal() || I->hasSection())
|
||||
continue;
|
||||
|
||||
if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage())
|
||||
&& !I->hasInternalLinkage())
|
||||
continue;
|
||||
|
||||
PointerType *PT = dyn_cast<PointerType>(I->getType());
|
||||
|
@ -38,6 +38,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
|
||||
initializeDSEPass(Registry);
|
||||
initializeGVNPass(Registry);
|
||||
initializeEarlyCSEPass(Registry);
|
||||
initializeGlobalMergePass(Registry);
|
||||
initializeIndVarSimplifyPass(Registry);
|
||||
initializeJumpThreadingPass(Registry);
|
||||
initializeLICMPass(Registry);
|
||||
|
34
test/CodeGen/AArch64/global-merge.ll
Normal file
34
test/CodeGen/AArch64/global-merge.ll
Normal file
@ -0,0 +1,34 @@
|
||||
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck --check-prefix=NO-MERGE %s
|
||||
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 -global-merge-on-external=true -global-merge-aligned=true | FileCheck --check-prefix=NO-MERGE %s
|
||||
|
||||
; RUN: llc < %s -mtriple=arm64-apple-ios -O0 | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
|
||||
; RUN: llc < %s -mtriple=arm64-apple-ios -O0 -global-merge-on-external=true -global-merge-aligned=false | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
|
||||
|
||||
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O1 -global-merge-on-external=true -global-merge-aligned=true | FileCheck %s
|
||||
|
||||
; RUN: llc < %s -mtriple=arm64-apple-ios -O1 | FileCheck %s --check-prefix=CHECK-APPLE-IOS
|
||||
; RUN: llc < %s -mtriple=arm64-apple-ios -O1 -global-merge-on-external=true -global-merge-aligned=false | FileCheck %s --check-prefix=CHECK-APPLE-IOS
|
||||
|
||||
@m = internal global i32 0, align 4
|
||||
@n = internal global i32 0, align 4
|
||||
|
||||
define void @f1(i32 %a1, i32 %a2) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: adrp x{{[0-9]+}}, _MergedGlobals
|
||||
; CHECK-NOT: adrp
|
||||
|
||||
; CHECK-APPLE-IOS-LABEL: f1:
|
||||
; CHECK-APPLE-IOS: adrp x{{[0-9]+}}, __MergedGlobals
|
||||
; CHECK-APPLE-IOS-NOT: adrp
|
||||
store i32 %a1, i32* @m, align 4
|
||||
store i32 %a2, i32* @n, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .local _MergedGlobals
|
||||
; CHECK: .comm _MergedGlobals,8,8
|
||||
; NO-MERGE-NOT: .local _MergedGlobals
|
||||
|
||||
; CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3
|
||||
; CHECK-APPLE-IOS-NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,8,3
|
@ -1,17 +0,0 @@
|
||||
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
|
||||
|
||||
@m = internal global i32 0, align 4
|
||||
@n = internal global i32 0, align 4
|
||||
|
||||
define void @f1(i32 %a1, i32 %a2) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: adrp x{{[0-9]+}}, _MergedGlobals
|
||||
; CHECK-NOT: adrp
|
||||
store i32 %a1, i32* @m, align 4
|
||||
store i32 %a2, i32* @n, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .local _MergedGlobals
|
||||
; CHECK: .comm _MergedGlobals,8,8
|
||||
|
85
test/CodeGen/ARM/global-merge-1.ll
Normal file
85
test/CodeGen/ARM/global-merge-1.ll
Normal file
@ -0,0 +1,85 @@
|
||||
; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
|
||||
; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s
|
||||
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
|
||||
; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
|
||||
|
||||
; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
|
||||
; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
|
||||
target triple = "thumbv7-apple-ios3.0.0"
|
||||
|
||||
@bar = internal global [5 x i32] zeroinitializer, align 4
|
||||
@baz = internal global [5 x i32] zeroinitializer, align 4
|
||||
@foo = internal global [5 x i32] zeroinitializer, align 4
|
||||
|
||||
; Function Attrs: nounwind ssp
|
||||
define internal void @initialize() #0 {
|
||||
%1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
||||
store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1
|
||||
%2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
||||
store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1
|
||||
%3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
||||
store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1
|
||||
%4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
||||
store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1
|
||||
%5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
||||
store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1
|
||||
%6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
||||
store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1
|
||||
%7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
||||
store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1
|
||||
%8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
||||
store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1
|
||||
%9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
||||
store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
|
||||
%10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
||||
store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @calc(...) #1
|
||||
|
||||
; Function Attrs: nounwind ssp
|
||||
define internal void @calculate() #0 {
|
||||
%1 = load <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4
|
||||
%2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
|
||||
%3 = mul <4 x i32> %2, %1
|
||||
store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4
|
||||
%4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
|
||||
%5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
|
||||
%6 = mul nsw i32 %5, %4
|
||||
store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone ssp
|
||||
define internal i32* @returnFoo() #2 {
|
||||
ret i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 0)
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { nounwind readnone ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #3 = { nounwind }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
|
||||
!0 = metadata !{metadata !"LLVM version 3.4 "}
|
||||
!1 = metadata !{metadata !2, metadata !2, i64 0}
|
||||
!2 = metadata !{metadata !"int", metadata !3, i64 0}
|
||||
!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
|
||||
!4 = metadata !{metadata !"Simple C/C++ TBAA"}
|
88
test/CodeGen/ARM64/global-merge.ll
Normal file
88
test/CodeGen/ARM64/global-merge.ll
Normal file
@ -0,0 +1,88 @@
|
||||
; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
|
||||
; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s
|
||||
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
|
||||
; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
|
||||
|
||||
; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
|
||||
; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
|
||||
target triple = "arm64-apple-ios7.0.0"
|
||||
|
||||
@bar = internal global [5 x i32] zeroinitializer, align 4
|
||||
@baz = internal global [5 x i32] zeroinitializer, align 4
|
||||
@foo = internal global [5 x i32] zeroinitializer, align 4
|
||||
|
||||
; Function Attrs: nounwind ssp
|
||||
define internal void @initialize() #0 {
|
||||
%1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
|
||||
store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
|
||||
%2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
|
||||
store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
|
||||
%3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
|
||||
store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
|
||||
%4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
|
||||
store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
|
||||
%5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
|
||||
store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
|
||||
%6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
|
||||
store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
|
||||
%7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
|
||||
store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
|
||||
%8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
|
||||
store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
|
||||
%9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
|
||||
store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
|
||||
%10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
|
||||
store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @calc(...)
|
||||
|
||||
; Function Attrs: nounwind ssp
|
||||
define internal void @calculate() #0 {
|
||||
%1 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
|
||||
%2 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
|
||||
%3 = mul nsw i32 %2, %1
|
||||
store i32 %3, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0), align 4
|
||||
%4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
|
||||
%5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
|
||||
%6 = mul nsw i32 %5, %4
|
||||
store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 1), align 4
|
||||
%7 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
|
||||
%8 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
|
||||
%9 = mul nsw i32 %8, %7
|
||||
store i32 %9, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 2), align 4
|
||||
%10 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
|
||||
%11 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
|
||||
%12 = mul nsw i32 %11, %10
|
||||
store i32 %12, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 3), align 4
|
||||
%13 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
|
||||
%14 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
|
||||
%15 = mul nsw i32 %14, %13
|
||||
store i32 %15, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 4), align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone ssp
|
||||
define internal i32* @returnFoo() #1 {
|
||||
ret i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0)
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind ssp }
|
||||
attributes #1 = { nounwind readnone ssp }
|
||||
attributes #2 = { nounwind }
|
22
test/Transforms/GlobalMerge/AArch64/global-merge-1.ll
Normal file
22
test/Transforms/GlobalMerge/AArch64/global-merge-1.ll
Normal file
@ -0,0 +1,22 @@
|
||||
; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -S -o - | FileCheck %s
|
||||
; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s
|
||||
|
||||
; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -S -o - | FileCheck %s
|
||||
; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s
|
||||
|
||||
; RUN: opt %s -mtriple=arm64-apple-ios -global-merge -S -o - | FileCheck %s
|
||||
; RUN: opt %s -mtriple=arm64-apple-ios -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s
|
||||
|
||||
@m = internal global i32 0, align 4
|
||||
@n = internal global i32 0, align 4
|
||||
|
||||
; CHECK: @_MergedGlobals = internal global { i32, i32 } zeroinitializer
|
||||
|
||||
define void @f1(i32 %a1, i32 %a2) {
|
||||
; CHECK-LABEL: @f1
|
||||
; CHECK: getelementptr inbounds ({ i32, i32 }* @_MergedGlobals, i32 0, i32 0)
|
||||
; CHECK: getelementptr inbounds ({ i32, i32 }* @_MergedGlobals, i32 0, i32 1)
|
||||
store i32 %a1, i32* @m, align 4
|
||||
store i32 %a2, i32* @n, align 4
|
||||
ret void
|
||||
}
|
30
test/Transforms/GlobalMerge/AArch64/global-merge-2.ll
Normal file
30
test/Transforms/GlobalMerge/AArch64/global-merge-2.ll
Normal file
@ -0,0 +1,30 @@
|
||||
; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s
|
||||
; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s
|
||||
; RUN: opt %s -mtriple=arm64-apple-ios -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s
|
||||
|
||||
@x = global i32 0, align 4
|
||||
@y = global i32 0, align 4
|
||||
@z = global i32 0, align 4
|
||||
|
||||
; CHECK: @_MergedGlobals_x = global { i32, i32, i32 } zeroinitializer, align 16
|
||||
; CHECK: @x = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 0)
|
||||
; CHECK: @y = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1)
|
||||
; CHECK: @z = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 2)
|
||||
|
||||
define void @f1(i32 %a1, i32 %a2) {
|
||||
; CHECK-LABEL: @f1
|
||||
; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 0)
|
||||
; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1)
|
||||
store i32 %a1, i32* @x, align 4
|
||||
store i32 %a2, i32* @y, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @g1(i32 %a1, i32 %a2) {
|
||||
; CHECK-LABEL: @g1
|
||||
; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1)
|
||||
; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 2)
|
||||
store i32 %a1, i32* @y, align 4
|
||||
store i32 %a2, i32* @z, align 4
|
||||
ret void
|
||||
}
|
27
test/Transforms/GlobalMerge/AArch64/global-merge-3.ll
Normal file
27
test/Transforms/GlobalMerge/AArch64/global-merge-3.ll
Normal file
@ -0,0 +1,27 @@
|
||||
; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s
|
||||
; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s
|
||||
; RUN: opt %s -mtriple=arm64-apple-ios -global-merge -global-merge-on-external -global-merge-aligned -S -o - | FileCheck %s
|
||||
|
||||
@x = global [1000 x i32] zeroinitializer, align 1
|
||||
@y = global [1000 x i32] zeroinitializer, align 1
|
||||
@z = internal global i32 1, align 4
|
||||
|
||||
; CHECK: @_MergedGlobals_x = global { i32, [1000 x i32] } { i32 1, [1000 x i32] zeroinitializer }, align 4096
|
||||
; CHECK: @_MergedGlobals_y = global { [1000 x i32] } zeroinitializer, align 4096
|
||||
|
||||
; CHECK: @x = alias getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 1)
|
||||
; CHECK: @y = alias getelementptr inbounds ({ [1000 x i32] }* @_MergedGlobals_y, i32 0, i32 0)
|
||||
|
||||
define void @f1(i32 %a1, i32 %a2, i32 %a3) {
|
||||
; CHECK-LABEL: @f1
|
||||
; CHECK: %x3 = getelementptr inbounds [1000 x i32]* getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 1), i32 0, i64 3
|
||||
; CHECK: %y3 = getelementptr inbounds [1000 x i32]* getelementptr inbounds ({ [1000 x i32] }* @_MergedGlobals_y, i32 0, i32 0), i32 0, i64 3
|
||||
; CHECK: store i32 %a3, i32* getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 0), align 4
|
||||
|
||||
%x3 = getelementptr inbounds [1000 x i32]* @x, i32 0, i64 3
|
||||
%y3 = getelementptr inbounds [1000 x i32]* @y, i32 0, i64 3
|
||||
store i32 %a1, i32* %x3, align 4
|
||||
store i32 %a2, i32* %y3, align 4
|
||||
store i32 %a3, i32* @z, align 4
|
||||
ret void
|
||||
}
|
4
test/Transforms/GlobalMerge/AArch64/lit.local.cfg
Normal file
4
test/Transforms/GlobalMerge/AArch64/lit.local.cfg
Normal file
@ -0,0 +1,4 @@
|
||||
targets = set(config.root.targets_to_build.split())
|
||||
if not 'AArch64' in targets:
|
||||
config.unsupported = True
|
||||
|
@ -1,23 +1,4 @@
|
||||
; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
|
||||
; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s
|
||||
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
|
||||
; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
|
||||
|
||||
; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
|
||||
; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
; RUN: opt %s -mtriple=arm-linux-gnuabi -global-merge -S -o - | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
|
||||
target triple = "thumbv7-apple-ios3.0.0"
|
||||
@ -26,6 +7,8 @@ target triple = "thumbv7-apple-ios3.0.0"
|
||||
@baz = internal global [5 x i32] zeroinitializer, align 4
|
||||
@foo = internal global [5 x i32] zeroinitializer, align 4
|
||||
|
||||
; CHECK: @_MergedGlobals = internal global { [5 x i32], [5 x i32], [5 x i32] } zeroinitializer
|
||||
|
||||
; Function Attrs: nounwind ssp
|
||||
define internal void @initialize() #0 {
|
||||
%1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
||||
|
@ -1,23 +1,6 @@
|
||||
; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
|
||||
; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s
|
||||
; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s
|
||||
; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -S -o - | FileCheck %s
|
||||
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
|
||||
; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
|
||||
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
|
||||
|
||||
; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
|
||||
; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
|
||||
; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
|
||||
; CHECK: @_MergedGlobals = internal global { [5 x i32], [5 x i32], [5 x i32] } zeroinitializer
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
|
||||
target triple = "arm64-apple-ios7.0.0"
|
||||
|
Loading…
x
Reference in New Issue
Block a user