mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-27 06:54:30 +00:00
Introducing llvm.invariant.group.barrier intrinsic
For more info for what reason it was invented, goto: http://lists.llvm.org/pipermail/cfe-dev/2015-July/044227.html invariant.group.barrier: http://reviews.llvm.org/D12310 docs: http://reviews.llvm.org/D11399 CodeGenPrepare: http://reviews.llvm.org/D12875 llvm-svn: 247711
This commit is contained in:
parent
4a4ca5700a
commit
7016a01b0d
@ -4421,6 +4421,50 @@ the loop identifier metadata node directly:
|
||||
The ``llvm.bitsets`` global metadata is used to implement
|
||||
:doc:`bitsets <BitSets>`.
|
||||
|
||||
'``invariant.group``' Metadata
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The ``invariant.group`` metadata may be attached to ``load``/``store`` instructions.
|
||||
The existence of the ``invariant.group`` metadata on the instruction tells
|
||||
the optimizer that every ``load`` and ``store`` to the same pointer operand
|
||||
within the same invariant group can be assumed to load or store the same
|
||||
value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects
|
||||
when two pointers are considered the same).
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
@unknownPtr = external global i8
|
||||
...
|
||||
%ptr = alloca i8
|
||||
store i8 42, i8* %ptr, !invariant.group !0
|
||||
call void @foo(i8* %ptr)
|
||||
|
||||
%a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
|
||||
call void @foo(i8* %ptr)
|
||||
%b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed
|
||||
|
||||
%newPtr = call i8* @getPointer(i8* %ptr)
|
||||
%c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr
|
||||
|
||||
%unknownValue = load i8, i8* @unknownPtr
|
||||
store i8 %unknownValue, i8* %ptr, !invariant.group !0 ; Can assume that %unknownValue == 42
|
||||
|
||||
call void @foo(i8* %ptr)
|
||||
%newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
|
||||
%d = load i8, i8* %newPtr2, !invariant.group !0 ; Can't step through invariant.group.barrier to get value of %ptr
|
||||
|
||||
...
|
||||
declare void @foo(i8*)
|
||||
declare i8* @getPointer(i8*)
|
||||
declare i8* @llvm.invariant.group.barrier(i8*)
|
||||
|
||||
!0 = !{!"magic ptr"}
|
||||
!1 = !{!"other ptr"}
|
||||
|
||||
|
||||
|
||||
Module Flags Metadata
|
||||
=====================
|
||||
|
||||
@ -6768,8 +6812,8 @@ Syntax:
|
||||
|
||||
::
|
||||
|
||||
<result> = load [volatile] <ty>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>][, !invariant.load !<index>][, !nonnull !<index>][, !dereferenceable !<index>][, !dereferenceable_or_null !<index>]
|
||||
<result> = load atomic [volatile] <ty>* <pointer> [singlethread] <ordering>, align <alignment>
|
||||
<result> = load [volatile] <ty>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>][, !invariant.load !<index>][, !invariant.group !<index>][, !nonnull !<index>][, !dereferenceable !<index>][, !dereferenceable_or_null !<index>]
|
||||
<result> = load atomic [volatile] <ty>* <pointer> [singlethread] <ordering>, align <alignment> [, !invariant.group !<index>]
|
||||
!<index> = !{ i32 1 }
|
||||
|
||||
Overview:
|
||||
@ -6825,6 +6869,9 @@ Being invariant does not imply that a location is dereferenceable,
|
||||
but it does imply that once the location is known dereferenceable
|
||||
its value is henceforth unchanging.
|
||||
|
||||
The optional ``!invariant.group`` metadata must reference a single metadata name
|
||||
``<index>`` corresponding to a metadata node. See ``invariant.group`` metadata.
|
||||
|
||||
The optional ``!nonnull`` metadata must reference a single
|
||||
metadata name ``<index>`` corresponding to a metadata node with no
|
||||
entries. The existence of the ``!nonnull`` metadata on the
|
||||
@ -6882,8 +6929,8 @@ Syntax:
|
||||
|
||||
::
|
||||
|
||||
store [volatile] <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>] ; yields void
|
||||
store atomic [volatile] <ty> <value>, <ty>* <pointer> [singlethread] <ordering>, align <alignment> ; yields void
|
||||
store [volatile] <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>][, !invariant.group !<index>] ; yields void
|
||||
store atomic [volatile] <ty> <value>, <ty>* <pointer> [singlethread] <ordering>, align <alignment> [, !invariant.group !<index>] ; yields void
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
@ -6930,6 +6977,9 @@ be reused in the cache. The code generator may select special
|
||||
instructions to save cache bandwidth, such as the MOVNT instruction on
|
||||
x86.
|
||||
|
||||
The optional ``!invariant.group`` metadata must reference a
|
||||
single metadata name ``<index>``. See ``invariant.group`` metadata.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
@ -11465,6 +11515,36 @@ Semantics:
|
||||
|
||||
This intrinsic indicates that the memory is mutable again.
|
||||
|
||||
'``llvm.invariant.group.barrier``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
declare i8* @llvm.invariant.group.barrier(i8* <ptr>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant
|
||||
established by invariant.group metadata no longer holds, to obtain a new pointer
|
||||
value that does not carry the invariant information.
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The ``llvm.invariant.group.barrier`` takes only one argument, which is
|
||||
the pointer to the memory for which the ``invariant.group`` no longer holds.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
Returns another pointer that aliases its argument but which is considered different
|
||||
for the purposes of ``load``/``store`` ``invariant.group`` metadata.
|
||||
|
||||
General Intrinsics
|
||||
------------------
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
@ -1634,6 +1635,32 @@ public:
|
||||
Name);
|
||||
}
|
||||
|
||||
/// \brief Create an invariant.group.barrier intrinsic call, that stops
|
||||
/// optimizer to propagate equality using invariant.group metadata.
|
||||
/// If Ptr type is different from i8*, it's casted to i8* before call
|
||||
/// and casted back to Ptr type after call.
|
||||
Value *CreateInvariantGroupBarrier(Value *Ptr) {
|
||||
Module *M = BB->getParent()->getParent();
|
||||
Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration(M,
|
||||
Intrinsic::invariant_group_barrier);
|
||||
|
||||
Type *ArgumentAndReturnType = FnInvariantGroupBarrier->getReturnType();
|
||||
assert(ArgumentAndReturnType ==
|
||||
FnInvariantGroupBarrier->getFunctionType()->getParamType(0) &&
|
||||
"InvariantGroupBarrier should take and return the same type");
|
||||
Type *PtrType = Ptr->getType();
|
||||
|
||||
bool PtrTypeConversionNeeded = PtrType != ArgumentAndReturnType;
|
||||
if (PtrTypeConversionNeeded)
|
||||
Ptr = CreateBitCast(Ptr, ArgumentAndReturnType);
|
||||
|
||||
CallInst *Fn = CreateCall(FnInvariantGroupBarrier, {Ptr});
|
||||
|
||||
if (PtrTypeConversionNeeded)
|
||||
return CreateBitCast(Fn, PtrType);
|
||||
return Fn;
|
||||
}
|
||||
|
||||
/// \brief Return a vector value that contains \arg V broadcasted to \p
|
||||
/// NumElts elements.
|
||||
Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") {
|
||||
|
@ -319,7 +319,7 @@ def int_instrprof_increment : Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_i64_ty,
|
||||
llvm_i32_ty, llvm_i32_ty],
|
||||
[]>;
|
||||
|
||||
|
||||
//===------------------- Standard C Library Intrinsics --------------------===//
|
||||
//
|
||||
|
||||
@ -530,6 +530,10 @@ def int_invariant_end : Intrinsic<[],
|
||||
llvm_ptr_ty],
|
||||
[IntrReadWriteArgMem, NoCapture<2>]>;
|
||||
|
||||
def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty],
|
||||
[llvm_ptr_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
//===------------------------ Stackmap Intrinsics -------------------------===//
|
||||
//
|
||||
def int_experimental_stackmap : Intrinsic<[],
|
||||
|
@ -187,6 +187,7 @@ class TypePromotionTransaction;
|
||||
unsigned CreatedInstCost);
|
||||
bool splitBranchCondition(Function &F);
|
||||
bool simplifyOffsetableRelocate(Instruction &I);
|
||||
void stripInvariantGroupMetadata(Instruction &I);
|
||||
};
|
||||
}
|
||||
|
||||
@ -1411,6 +1412,10 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
|
||||
InsertedInsts.insert(ExtVal);
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::invariant_group_barrier:
|
||||
II->replaceAllUsesWith(II->getArgOperand(0));
|
||||
II->eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (TLI) {
|
||||
@ -4421,6 +4426,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
|
||||
return OptimizeCmpExpression(CI);
|
||||
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
|
||||
stripInvariantGroupMetadata(*LI);
|
||||
if (TLI) {
|
||||
unsigned AS = LI->getPointerAddressSpace();
|
||||
return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
|
||||
@ -4429,6 +4435,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
|
||||
}
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
|
||||
stripInvariantGroupMetadata(*SI);
|
||||
if (TLI) {
|
||||
unsigned AS = SI->getPointerAddressSpace();
|
||||
return OptimizeMemoryInst(I, SI->getOperand(1),
|
||||
@ -4827,3 +4834,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
|
||||
}
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) {
|
||||
if (auto *InvariantMD = I.getMetadata("invariant.group"))
|
||||
I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID());
|
||||
}
|
||||
|
23
test/Transforms/CodeGenPrepare/invariant.group.ll
Normal file
23
test/Transforms/CodeGenPrepare/invariant.group.ll
Normal file
@ -0,0 +1,23 @@
|
||||
; RUN: opt -codegenprepare -S < %s | FileCheck %s
|
||||
|
||||
@tmp = global i8 0
|
||||
|
||||
; CHECK-LABEL: define void @foo() {
|
||||
define void @foo() {
|
||||
enter:
|
||||
; CHECK-NOT: !invariant.group
|
||||
; CHECK-NOT: @llvm.invariant.group.barrier(
|
||||
; CHECK: %val = load i8, i8* @tmp
|
||||
%val = load i8, i8* @tmp, !invariant.group !0
|
||||
%ptr = call i8* @llvm.invariant.group.barrier(i8* @tmp)
|
||||
|
||||
; CHECK: store i8 42, i8* @tmp
|
||||
store i8 42, i8* %ptr, !invariant.group !0
|
||||
|
||||
ret void
|
||||
}
|
||||
; CHECK-LABEL: }
|
||||
|
||||
declare i8* @llvm.invariant.group.barrier(i8*)
|
||||
|
||||
!0 = !{!"something"}
|
79
test/Transforms/GlobalOpt/invariant.group.barrier.ll
Normal file
79
test/Transforms/GlobalOpt/invariant.group.barrier.ll
Normal file
@ -0,0 +1,79 @@
|
||||
; RUN: opt -S -globalopt < %s | FileCheck %s
|
||||
|
||||
; This test is hint, what could globalOpt optimize and what it can't
|
||||
; FIXME: @tmp and @tmp2 can be safely set to 42
|
||||
; CHECK: @tmp = global i32 0
|
||||
; CHECK: @tmp2 = global i32 0
|
||||
; CHECK: @tmp3 = global i32 0
|
||||
|
||||
@tmp = global i32 0
|
||||
@tmp2 = global i32 0
|
||||
@tmp3 = global i32 0
|
||||
@ptrToTmp3 = global i32* null
|
||||
|
||||
@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
|
||||
|
||||
define i32 @TheAnswerToLifeTheUniverseAndEverything() {
|
||||
ret i32 42
|
||||
}
|
||||
|
||||
define void @_GLOBAL__I_a() {
|
||||
enter:
|
||||
call void @_optimizable()
|
||||
call void @_not_optimizable()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @_optimizable() {
|
||||
enter:
|
||||
%valptr = alloca i32
|
||||
|
||||
%val = call i32 @TheAnswerToLifeTheUniverseAndEverything()
|
||||
store i32 %val, i32* @tmp
|
||||
store i32 %val, i32* %valptr
|
||||
|
||||
%0 = bitcast i32* %valptr to i8*
|
||||
%barr = call i8* @llvm.invariant.group.barrier(i8* %0)
|
||||
%1 = bitcast i8* %barr to i32*
|
||||
|
||||
%val2 = load i32, i32* %1
|
||||
store i32 %val2, i32* @tmp2
|
||||
ret void
|
||||
}
|
||||
|
||||
; We can't step through invariant.group.barrier here, because that would change
|
||||
; this load in @usage_of_globals()
|
||||
; val = load i32, i32* %ptrVal, !invariant.group !0
|
||||
; into
|
||||
; %val = load i32, i32* @tmp3, !invariant.group !0
|
||||
; and then we could assume that %val and %val2 to be the same, which coud be
|
||||
; false, because @changeTmp3ValAndCallBarrierInside() may change the value
|
||||
; of @tmp3.
|
||||
define void @_not_optimizable() {
|
||||
enter:
|
||||
store i32 13, i32* @tmp3, !invariant.group !0
|
||||
|
||||
%0 = bitcast i32* @tmp3 to i8*
|
||||
%barr = call i8* @llvm.invariant.group.barrier(i8* %0)
|
||||
%1 = bitcast i8* %barr to i32*
|
||||
|
||||
store i32* %1, i32** @ptrToTmp3
|
||||
store i32 42, i32* %1, !invariant.group !0
|
||||
|
||||
ret void
|
||||
}
|
||||
define void @usage_of_globals() {
|
||||
entry:
|
||||
%ptrVal = load i32*, i32** @ptrToTmp3
|
||||
%val = load i32, i32* %ptrVal, !invariant.group !0
|
||||
|
||||
call void @changeTmp3ValAndCallBarrierInside()
|
||||
%val2 = load i32, i32* @tmp3, !invariant.group !0
|
||||
ret void;
|
||||
}
|
||||
|
||||
declare void @changeTmp3ValAndCallBarrierInside()
|
||||
|
||||
declare i8* @llvm.invariant.group.barrier(i8*)
|
||||
|
||||
!0 = !{!"something"}
|
Loading…
x
Reference in New Issue
Block a user