[OpenMP] Adopt std::set in HostDataToTargetMap

Summary:
lookupMapping took significant time due to linear complexity searching.
This is bad for offloading from multiple host threads because lookupMapping is protected by mutex.
Use std::set for logarithmic complexity searching.

Before my change.
libomptarget inclusive time 16.7 sec, exclusive time 8.6 sec.
After the change
libomptarget inclusive time 7.3 sec, exclusive time 0.4 sec.

Most of the overhead of libomptarget (exclusive time) is gone.

Reviewers: jdoerfert, grokos

Reviewed By: grokos

Subscribers: tianshilei1992, yaxunl, guansong, sstefan1

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D82264
This commit is contained in:
Ye Luo 2020-06-24 12:18:00 -04:00 committed by Shilei Tian
parent 69d2fa9ed1
commit 6e5f64c44f
3 changed files with 75 additions and 53 deletions

View File

@ -25,22 +25,20 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
DataMapMtx.lock();
// Check if entry exists
for (auto &HT : HostDataToTargetMap) {
if ((uintptr_t)HstPtrBegin == HT.HstPtrBegin) {
// Mapping already exists
bool isValid = HT.HstPtrBegin == (uintptr_t) HstPtrBegin &&
HT.HstPtrEnd == (uintptr_t) HstPtrBegin + Size &&
HT.TgtPtrBegin == (uintptr_t) TgtPtrBegin;
DataMapMtx.unlock();
if (isValid) {
DP("Attempt to re-associate the same device ptr+offset with the same "
"host ptr, nothing to do\n");
return OFFLOAD_SUCCESS;
} else {
DP("Not allowed to re-associate a different device ptr+offset with the "
"same host ptr\n");
return OFFLOAD_FAIL;
}
auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin});
if (search != HostDataToTargetMap.end()) {
// Mapping already exists
bool isValid = search->HstPtrEnd == (uintptr_t)HstPtrBegin + Size &&
search->TgtPtrBegin == (uintptr_t)TgtPtrBegin;
DataMapMtx.unlock();
if (isValid) {
DP("Attempt to re-associate the same device ptr+offset with the same "
"host ptr, nothing to do\n");
return OFFLOAD_SUCCESS;
} else {
DP("Not allowed to re-associate a different device ptr+offset with the "
"same host ptr\n");
return OFFLOAD_FAIL;
}
}
@ -55,7 +53,7 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(newEntry.HstPtrBase),
DPxPTR(newEntry.HstPtrBegin), DPxPTR(newEntry.HstPtrEnd),
DPxPTR(newEntry.TgtPtrBegin));
HostDataToTargetMap.push_front(newEntry);
HostDataToTargetMap.insert(newEntry);
DataMapMtx.unlock();
@ -65,21 +63,17 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
int DeviceTy::disassociatePtr(void *HstPtrBegin) {
DataMapMtx.lock();
// Check if entry exists
for (HostDataToTargetListTy::iterator ii = HostDataToTargetMap.begin();
ii != HostDataToTargetMap.end(); ++ii) {
if ((uintptr_t)HstPtrBegin == ii->HstPtrBegin) {
// Mapping exists
if (ii->isRefCountInf()) {
DP("Association found, removing it\n");
HostDataToTargetMap.erase(ii);
DataMapMtx.unlock();
return OFFLOAD_SUCCESS;
} else {
DP("Trying to disassociate a pointer which was not mapped via "
"omp_target_associate_ptr\n");
break;
}
auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin});
if (search != HostDataToTargetMap.end()) {
// Mapping exists
if (search->isRefCountInf()) {
DP("Association found, removing it\n");
HostDataToTargetMap.erase(search);
DataMapMtx.unlock();
return OFFLOAD_SUCCESS;
} else {
DP("Trying to disassociate a pointer which was not mapped via "
"omp_target_associate_ptr\n");
}
}
@ -95,11 +89,14 @@ uint64_t DeviceTy::getMapEntryRefCnt(void *HstPtrBegin) {
uint64_t RefCnt = 0;
DataMapMtx.lock();
for (auto &HT : HostDataToTargetMap) {
if (hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd) {
DP("DeviceTy::getMapEntry: requested entry found\n");
RefCnt = HT.getRefCount();
break;
if (!HostDataToTargetMap.empty()) {
auto upper = HostDataToTargetMap.upper_bound(hp);
if (upper != HostDataToTargetMap.begin()) {
upper--;
if (hp >= upper->HstPtrBegin && hp < upper->HstPtrEnd) {
DP("DeviceTy::getMapEntry: requested entry found\n");
RefCnt = upper->getRefCount();
}
}
}
DataMapMtx.unlock();
@ -117,21 +114,31 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%ld)...\n", DPxPTR(hp),
Size);
for (lr.Entry = HostDataToTargetMap.begin();
lr.Entry != HostDataToTargetMap.end(); ++lr.Entry) {
if (HostDataToTargetMap.empty())
return lr;
auto upper = HostDataToTargetMap.upper_bound(hp);
// check the left bin
if (upper != HostDataToTargetMap.begin()) {
lr.Entry = std::prev(upper);
auto &HT = *lr.Entry;
// Is it contained?
lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd &&
(hp+Size) <= HT.HstPtrEnd;
// Does it extend beyond the mapped region?
lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd;
}
// check the right bin
if (!(lr.Flags.IsContained || lr.Flags.ExtendsAfter) &&
upper != HostDataToTargetMap.end()) {
lr.Entry = upper;
auto &HT = *lr.Entry;
// Does it extend into an already mapped region?
lr.Flags.ExtendsBefore = hp < HT.HstPtrBegin && (hp+Size) > HT.HstPtrBegin;
// Does it extend beyond the mapped region?
lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp+Size) > HT.HstPtrEnd;
if (lr.Flags.IsContained || lr.Flags.ExtendsBefore ||
lr.Flags.ExtendsAfter) {
break;
}
}
if (lr.Flags.ExtendsBefore) {
@ -203,8 +210,9 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
"HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
(uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
HostDataToTargetMap.emplace(
HostDataToTargetTy((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
(uintptr_t)HstPtrBegin + Size, tp));
rc = (void *)tp;
}
}

View File

@ -18,6 +18,7 @@
#include <list>
#include <map>
#include <mutex>
#include <set>
#include <vector>
// Forward declarations.
@ -35,7 +36,8 @@ struct HostDataToTargetTy {
uintptr_t TgtPtrBegin; // target info.
private:
uint64_t RefCount;
/// use mutable to allow modification via std::set iterator which is const.
mutable uint64_t RefCount;
static const uint64_t INFRefCount = ~(uint64_t)0;
public:
@ -48,14 +50,14 @@ public:
return RefCount;
}
uint64_t resetRefCount() {
uint64_t resetRefCount() const {
if (RefCount != INFRefCount)
RefCount = 1;
return RefCount;
}
uint64_t incRefCount() {
uint64_t incRefCount() const {
if (RefCount != INFRefCount) {
++RefCount;
assert(RefCount < INFRefCount && "refcount overflow");
@ -64,7 +66,7 @@ public:
return RefCount;
}
uint64_t decRefCount() {
uint64_t decRefCount() const {
if (RefCount != INFRefCount) {
assert(RefCount > 0 && "refcount underflow");
--RefCount;
@ -78,7 +80,19 @@ public:
}
};
typedef std::list<HostDataToTargetTy> HostDataToTargetListTy;
typedef uintptr_t HstPtrBeginTy;
inline bool operator<(const HostDataToTargetTy &lhs, const HstPtrBeginTy &rhs) {
return lhs.HstPtrBegin < rhs;
}
inline bool operator<(const HstPtrBeginTy &lhs, const HostDataToTargetTy &rhs) {
return lhs < rhs.HstPtrBegin;
}
inline bool operator<(const HostDataToTargetTy &lhs,
const HostDataToTargetTy &rhs) {
return lhs.HstPtrBegin < rhs.HstPtrBegin;
}
typedef std::set<HostDataToTargetTy, std::less<>> HostDataToTargetListTy;
struct LookupResult {
struct {

View File

@ -139,12 +139,12 @@ static int InitLibrary(DeviceTy& Device) {
DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
"\n", DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),
CurrDeviceEntry->size);
Device.HostDataToTargetMap.push_front(HostDataToTargetTy(
Device.HostDataToTargetMap.emplace(
(uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
(uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
(uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/,
(uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
true /*IsRefCountINF*/));
true /*IsRefCountINF*/);
}
}
Device.DataMapMtx.unlock();