mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-15 20:12:50 +00:00
Separate invariant equivalence classes by type
We now distinguish invariant loads to the same memory location if they have different types. This will cause us to pre-load an invariant location once for each type that is used to access it. However, we can thereby avoid invalid casting, especially if an array is accessed though different typed/sized invariant loads. This basically reverts the changes in r260023 but keeps the test cases. llvm-svn: 260045
This commit is contained in:
parent
57b627b41c
commit
96e5471139
@ -209,7 +209,9 @@ protected:
|
||||
virtual void createFor(__isl_take isl_ast_node *For);
|
||||
|
||||
/// @brief Set to remember materialized invariant loads.
|
||||
SmallPtrSet<const SCEV *, 16> PreloadedPtrs;
|
||||
///
|
||||
/// An invariant load is identified by its pointer (the SCEV) and its type.
|
||||
SmallSet<std::pair<const SCEV *, Type *>, 16> PreloadedPtrs;
|
||||
|
||||
/// @brief Preload the memory access at @p AccessRange with @p Build.
|
||||
///
|
||||
|
@ -850,11 +850,13 @@ using MemoryAccessList = std::forward_list<MemoryAccess *>;
|
||||
/// The first element is the SCEV for the pointer/location that identifies this
|
||||
/// equivalence class. The second is a list of memory accesses to that location
|
||||
/// that are now treated as invariant and hoisted during code generation. The
|
||||
/// last element is the execution context under which the invariant memory
|
||||
/// third element is the execution context under which the invariant memory
|
||||
/// location is accessed, hence the union of all domain contexts for the memory
|
||||
/// accesses in the list.
|
||||
/// accesses in the list. The last element describes the type of the invariant
|
||||
/// accesss in order to differentiate between different typed invariant loads of
|
||||
/// the same location.
|
||||
using InvariantEquivClassTy =
|
||||
std::tuple<const SCEV *, MemoryAccessList, isl_set *>;
|
||||
std::tuple<const SCEV *, MemoryAccessList, isl_set *, Type *>;
|
||||
|
||||
/// @brief Type for invariant accesses equivalence classes.
|
||||
using InvariantEquivClassesTy = SmallVector<InvariantEquivClassTy, 8>;
|
||||
|
@ -1827,21 +1827,22 @@ void Scop::addUserContext() {
|
||||
}
|
||||
|
||||
void Scop::buildInvariantEquivalenceClasses() {
|
||||
DenseMap<const SCEV *, LoadInst *> EquivClasses;
|
||||
DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
|
||||
|
||||
const InvariantLoadsSetTy &RIL = *SD.getRequiredInvariantLoads(&getRegion());
|
||||
for (LoadInst *LInst : RIL) {
|
||||
const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
|
||||
|
||||
LoadInst *&ClassRep = EquivClasses[PointerSCEV];
|
||||
Type *Ty = LInst->getType();
|
||||
LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
|
||||
if (ClassRep) {
|
||||
InvEquivClassVMap[LInst] = ClassRep;
|
||||
continue;
|
||||
}
|
||||
|
||||
ClassRep = LInst;
|
||||
InvariantEquivClasses.emplace_back(PointerSCEV, MemoryAccessList(),
|
||||
nullptr);
|
||||
InvariantEquivClasses.emplace_back(PointerSCEV, MemoryAccessList(), nullptr,
|
||||
Ty);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2852,9 +2853,10 @@ const InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) const {
|
||||
if (Value *Rep = InvEquivClassVMap.lookup(LInst))
|
||||
LInst = cast<LoadInst>(Rep);
|
||||
|
||||
Type *Ty = LInst->getType();
|
||||
const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
|
||||
for (auto &IAClass : InvariantEquivClasses)
|
||||
if (PointerSCEV == std::get<0>(IAClass))
|
||||
if (PointerSCEV == std::get<0>(IAClass) && Ty == std::get<3>(IAClass))
|
||||
return &IAClass;
|
||||
|
||||
return nullptr;
|
||||
@ -2897,11 +2899,12 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) {
|
||||
// MA and if found consolidate them. Otherwise create a new equivalence
|
||||
// class at the end of InvariantEquivClasses.
|
||||
LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
|
||||
Type *Ty = LInst->getType();
|
||||
const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
|
||||
|
||||
bool Consolidated = false;
|
||||
for (auto &IAClass : InvariantEquivClasses) {
|
||||
if (PointerSCEV != std::get<0>(IAClass))
|
||||
if (PointerSCEV != std::get<0>(IAClass) || Ty != std::get<3>(IAClass))
|
||||
continue;
|
||||
|
||||
Consolidated = true;
|
||||
@ -2926,7 +2929,7 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) {
|
||||
// If we did not consolidate MA, thus did not find an equivalence class
|
||||
// for it, we create a new one.
|
||||
InvariantEquivClasses.emplace_back(PointerSCEV, MemoryAccessList{MA},
|
||||
isl_set_copy(DomainCtx));
|
||||
isl_set_copy(DomainCtx), Ty);
|
||||
}
|
||||
|
||||
isl_set_free(DomainCtx);
|
||||
@ -2971,15 +2974,6 @@ bool Scop::isHoistableAccess(MemoryAccess *Access,
|
||||
|
||||
isl_map *AccessRelation = Access->getAccessRelation();
|
||||
|
||||
// Invariant load hoisting of memory accesses with non-canonical element
|
||||
// types lacks support for equivalence classes that contain elements of
|
||||
// different width/size. Hence, do not yet consider loads with non-canonical
|
||||
// element size for load hoisting.
|
||||
if (!isl_map_is_single_valued(AccessRelation)) {
|
||||
isl_map_free(AccessRelation);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip accesses that have an empty access relation. These can be caused
|
||||
// by multiple offsets with a type cast in-between that cause the overall
|
||||
// byte offset to be not divisible by the new types sizes.
|
||||
|
@ -1015,7 +1015,8 @@ bool IslNodeBuilder::preloadInvariantEquivClass(
|
||||
// Check for recurrsion which can be caused by additional constraints, e.g.,
|
||||
// non-finitie loop contraints. In such a case we have to bail out and insert
|
||||
// a "false" runtime check that will cause the original code to be executed.
|
||||
if (!PreloadedPtrs.insert(std::get<0>(IAClass)).second)
|
||||
auto PtrId = std::make_pair(std::get<0>(IAClass), std::get<3>(IAClass));
|
||||
if (!PreloadedPtrs.insert(PtrId).second)
|
||||
return false;
|
||||
|
||||
// If the base pointer of this class is dependent on another one we have to
|
||||
@ -1033,13 +1034,10 @@ bool IslNodeBuilder::preloadInvariantEquivClass(
|
||||
if (!PreloadVal)
|
||||
return false;
|
||||
|
||||
assert(PreloadVal->getType() == AccInst->getType());
|
||||
for (const MemoryAccess *MA : MAs) {
|
||||
Instruction *MAAccInst = MA->getAccessInstruction();
|
||||
// TODO: The bitcast here is wrong. In case of floating and non-floating
|
||||
// point values we need to reload the value or convert it.
|
||||
ValueMap[MAAccInst] =
|
||||
Builder.CreateBitOrPointerCast(PreloadVal, MAAccInst->getType());
|
||||
assert(PreloadVal->getType() == MAAccInst->getType());
|
||||
ValueMap[MAAccInst] = PreloadVal;
|
||||
}
|
||||
|
||||
if (SE.isSCEVable(AccInstTy)) {
|
||||
@ -1063,11 +1061,8 @@ bool IslNodeBuilder::preloadInvariantEquivClass(
|
||||
// should only change the base pointer of the derived SAI if we actually
|
||||
// preloaded it.
|
||||
if (BasePtr == MA->getBaseAddr()) {
|
||||
// TODO: The bitcast here is wrong. In case of floating and non-floating
|
||||
// point values we need to reload the value or convert it.
|
||||
BasePtr =
|
||||
Builder.CreateBitOrPointerCast(PreloadVal, BasePtr->getType());
|
||||
DerivedSAI->setBasePtr(BasePtr);
|
||||
assert(BasePtr->getType() == PreloadVal->getType());
|
||||
DerivedSAI->setBasePtr(PreloadVal);
|
||||
}
|
||||
|
||||
// For scalar derived SAIs we remap the alloca used for the derived value.
|
||||
|
@ -1,18 +1,9 @@
|
||||
; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
|
||||
|
||||
; Invariant loads with non-canonical types are not yet fully supported.
|
||||
|
||||
; XFAIL: *
|
||||
; RUN: opt %loadPolly -polly-allow-differing-element-types -polly-codegen -S < %s | FileCheck %s
|
||||
|
||||
; CHECK: %polly.access.cast.global.load = bitcast %struct.hoge* %global.load to i32*
|
||||
; CHECK: %polly.access.global.load = getelementptr i32, i32* %polly.access.cast.global.load, i64 0
|
||||
; CHECK: %polly.access.global.load.load = load i32, i32* %polly.access.global.load
|
||||
|
||||
; CHECK: %polly.access.cast.global.load1 = bitcast %struct.hoge* %global.load to i32*
|
||||
; CHECK: %polly.access.global.load2 = getelementptr i32, i32* %polly.access.cast.global.load1, i64 2
|
||||
; CHECK: %polly.access.global.load2.cast = bitcast i32* %polly.access.global.load2 to double*
|
||||
; CHECK: %polly.access.global.load2.load = load double, double* %polly.access.global.load2.cast
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
|
@ -11,6 +11,9 @@
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_U[0] };
|
||||
; CHECK-NEXT: Execution Context: { : }
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_U[0] };
|
||||
; CHECK-NEXT: Execution Context: { : }
|
||||
; CHECK-NEXT: }
|
||||
;
|
||||
; CHECK: Statements {
|
||||
@ -24,13 +27,15 @@
|
||||
; CHECK-NEXT: }
|
||||
;
|
||||
; CODEGEN: entry:
|
||||
; CODEGEN: %U.f.preload.s2a = alloca float
|
||||
; CODEGEN-DAG: %U.f.preload.s2a = alloca float
|
||||
; CODEGEN-DAG: %U.i.preload.s2a = alloca i32
|
||||
; CODEGEN: br label %polly.split_new_and_old
|
||||
;
|
||||
; CODEGEN: polly.preload.begin:
|
||||
; CODEGEN: %U.load = load float, float* bitcast (i32* @U to float*)
|
||||
; CODEGEN: %0 = bitcast float %U.load to i32
|
||||
; CODEGEN: store float %U.load, float* %U.f.preload.s2a
|
||||
; CODEGEN-DAG: %U.load[[f:[.0-9]*]] = load float, float* bitcast (i32* @U to float*)
|
||||
; CODEGEN-DAG: store float %U.load[[f]], float* %U.f.preload.s2a
|
||||
; CODEGEN-DAG: %U.load[[i:[.0-9]*]] = load i32, i32* @U
|
||||
; CODEGEN-DAG: store i32 %U.load[[i]], i32* %U.i.preload.s2a
|
||||
;
|
||||
; CODEGEN: polly.merge_new_and_old:
|
||||
; CODEGEN-NOT: merge = phi
|
||||
@ -39,8 +44,7 @@
|
||||
; CODEGEN-NOT: final_reload
|
||||
;
|
||||
; CODEGEN: polly.stmt.for.body:
|
||||
; CODEGEN: %p_conv = fptosi float %U.load to i32
|
||||
; CODEGEN: %p_add = add nsw i32 %0, %p_conv
|
||||
; CODEGEN: %p_add = add nsw i32 %U.load[[i]], %p_conv
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
|
@ -16,6 +16,9 @@
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_do_body[i0] -> MemRef_U[0] };
|
||||
; CHECK-NEXT: Execution Context: { : }
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_do_body[i0] -> MemRef_U[0] };
|
||||
; CHECK-NEXT: Execution Context: { : }
|
||||
; CHECK-NEXT: }
|
||||
;
|
||||
; CHECK: Statements {
|
||||
@ -29,26 +32,26 @@
|
||||
; CHECK-NEXT: }
|
||||
;
|
||||
; CODEGEN: entry:
|
||||
; CODEGEN: %U.f.preload.s2a = alloca float
|
||||
; CODEGEN-DAG: %U.f.preload.s2a = alloca float
|
||||
; CODEGEN-DAG: %U.i.preload.s2a = alloca i32
|
||||
; CODEGEN: br label %polly.split_new_and_old
|
||||
;
|
||||
; CODEGEN: polly.preload.begin:
|
||||
; CODEGEN: %U.load = load float, float* bitcast (i32* @U to float*)
|
||||
; CODEGEN: %0 = bitcast float %U.load to i32
|
||||
; CODEGEN: store float %U.load, float* %U.f.preload.s2a
|
||||
; CODEGEN-DAG: %U.load[[f:[.0-9]*]] = load float, float* bitcast (i32* @U to float*)
|
||||
; CODEGEN-DAG: store float %U.load[[f]], float* %U.f.preload.s2a
|
||||
; CODEGEN-DAG: %U.load[[i:[.0-9]*]] = load i32, i32* @U
|
||||
; CODEGEN-DAG: store i32 %U.load[[i]], i32* %U.i.preload.s2a
|
||||
;
|
||||
; CODEGEN: polly.merge_new_and_old:
|
||||
; CODEGEN-DAG: %U.f.merge = phi float [ %U.f.final_reload, %polly.exiting ], [ %U.f, %do.cond ]
|
||||
; CODEGEN-DAG: %U.i.merge = phi i32 [ %5, %polly.exiting ], [ %U.i, %do.cond ]
|
||||
; CODEGEN-DAG: %U.i.merge = phi i32 [ %U.i.final_reload, %polly.exiting ], [ %U.i, %do.cond ]
|
||||
;
|
||||
; CODEGEN: polly.loop_exit:
|
||||
; CODEGEN-DAG: %U.f.final_reload = load float, float* %U.f.preload.s2a
|
||||
; CODEGEN-DAG: %U.i.final_reload = load float, float* %U.f.preload.s2a
|
||||
; CODEGEN-DAG: %5 = bitcast float %U.i.final_reload to i32
|
||||
; CODEGEN-DAG: %U.i.final_reload = load i32, i32* %U.i.preload.s2a
|
||||
;
|
||||
; CODEGEN: polly.stmt.do.body:
|
||||
; CODEGEN: %p_conv = fptosi float %U.load to i32
|
||||
; CODEGEN: %p_add = add nsw i32 %0, %p_conv
|
||||
; CODEGEN: %p_add = add nsw i32 %U.load[[i]], %p_conv
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user