mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-29 00:21:14 +00:00
Cleanup of affinity hierarchy code.
Some of this is improvement to code suggested by Hal Finkel. Four changes here: 1.Cleanup of hierarchy code to handle all hierarchy cases whether affinity is available or not 2.Separated this and other classes and common functions out to a header file 3.Added a destructor-like fini function for the hierarchy (and call in __kmp_cleanup) 4.Remove some redundant code that is hopefully no longer needed Differential Revision: http://reviews.llvm.org/D12449 llvm-svn: 247326
This commit is contained in:
parent
6f94087329
commit
1707836b68
@ -2984,6 +2984,7 @@ extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
|
||||
extern void __kmp_balanced_affinity( int tid, int team_size );
|
||||
#endif /* KMP_AFFINITY_SUPPORTED */
|
||||
|
||||
extern void __kmp_cleanup_hierarchy();
|
||||
extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
|
||||
|
||||
#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
|
||||
|
@ -18,6 +18,34 @@
|
||||
#include "kmp_io.h"
|
||||
#include "kmp_str.h"
|
||||
#include "kmp_wrapper_getpid.h"
|
||||
#include "kmp_affinity.h"
|
||||
|
||||
// Store the real or imagined machine hierarchy here
|
||||
static hierarchy_info machine_hierarchy;
|
||||
|
||||
void __kmp_cleanup_hierarchy() {
|
||||
machine_hierarchy.fini();
|
||||
}
|
||||
|
||||
void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
|
||||
kmp_uint32 depth;
|
||||
// The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
|
||||
if (TCR_1(machine_hierarchy.uninitialized))
|
||||
machine_hierarchy.init(NULL, nproc);
|
||||
// Adjust the hierarchy in case num threads exceeds original
|
||||
if (nproc > machine_hierarchy.base_num_threads)
|
||||
machine_hierarchy.resize(nproc);
|
||||
|
||||
depth = machine_hierarchy.depth;
|
||||
KMP_DEBUG_ASSERT(depth > 0);
|
||||
// The loop below adjusts the depth in the case of a resize
|
||||
while (nproc > machine_hierarchy.skipPerLevel[depth-1])
|
||||
depth++;
|
||||
|
||||
thr_bar->depth = depth;
|
||||
thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
|
||||
thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
|
||||
}
|
||||
|
||||
#if KMP_AFFINITY_SUPPORTED
|
||||
|
||||
@ -108,393 +136,6 @@ __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
|
||||
// functions.
|
||||
//
|
||||
// The icc codegen emits sections with extremely long names, of the form
|
||||
// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
|
||||
// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
|
||||
// some sort of memory corruption or table overflow that is triggered by
|
||||
// these long strings. I checked the latest version of the linker -
|
||||
// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
|
||||
// fixed.
|
||||
//
|
||||
// Unfortunately, my attempts to reproduce it in a smaller example have
|
||||
// failed - I'm not sure what the prospects are of getting it fixed
|
||||
// properly - but we need a reproducer smaller than all of libomp.
|
||||
//
|
||||
// Work around the problem by avoiding inline constructors in such builds.
|
||||
// We do this for all platforms, not just Linux* OS - non-inline functions are
|
||||
// more debuggable and provide better coverage into than inline functions.
|
||||
// Use inline functions in shipping libs, for performance.
|
||||
//
|
||||
|
||||
# if !defined(KMP_DEBUG) && !defined(COVER)
|
||||
|
||||
class Address {
|
||||
public:
|
||||
static const unsigned maxDepth = 32;
|
||||
unsigned labels[maxDepth];
|
||||
unsigned childNums[maxDepth];
|
||||
unsigned depth;
|
||||
unsigned leader;
|
||||
Address(unsigned _depth)
|
||||
: depth(_depth), leader(FALSE) {
|
||||
}
|
||||
Address &operator=(const Address &b) {
|
||||
depth = b.depth;
|
||||
for (unsigned i = 0; i < depth; i++) {
|
||||
labels[i] = b.labels[i];
|
||||
childNums[i] = b.childNums[i];
|
||||
}
|
||||
leader = FALSE;
|
||||
return *this;
|
||||
}
|
||||
bool operator==(const Address &b) const {
|
||||
if (depth != b.depth)
|
||||
return false;
|
||||
for (unsigned i = 0; i < depth; i++)
|
||||
if(labels[i] != b.labels[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
bool isClose(const Address &b, int level) const {
|
||||
if (depth != b.depth)
|
||||
return false;
|
||||
if ((unsigned)level >= depth)
|
||||
return true;
|
||||
for (unsigned i = 0; i < (depth - level); i++)
|
||||
if(labels[i] != b.labels[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
bool operator!=(const Address &b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
};
|
||||
|
||||
class AddrUnsPair {
|
||||
public:
|
||||
Address first;
|
||||
unsigned second;
|
||||
AddrUnsPair(Address _first, unsigned _second)
|
||||
: first(_first), second(_second) {
|
||||
}
|
||||
AddrUnsPair &operator=(const AddrUnsPair &b)
|
||||
{
|
||||
first = b.first;
|
||||
second = b.second;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
# else
|
||||
|
||||
class Address {
|
||||
public:
|
||||
static const unsigned maxDepth = 32;
|
||||
unsigned labels[maxDepth];
|
||||
unsigned childNums[maxDepth];
|
||||
unsigned depth;
|
||||
unsigned leader;
|
||||
Address(unsigned _depth);
|
||||
Address &operator=(const Address &b);
|
||||
bool operator==(const Address &b) const;
|
||||
bool isClose(const Address &b, int level) const;
|
||||
bool operator!=(const Address &b) const;
|
||||
};
|
||||
|
||||
Address::Address(unsigned _depth)
|
||||
{
|
||||
depth = _depth;
|
||||
leader = FALSE;
|
||||
}
|
||||
|
||||
Address &Address::operator=(const Address &b) {
|
||||
depth = b.depth;
|
||||
for (unsigned i = 0; i < depth; i++) {
|
||||
labels[i] = b.labels[i];
|
||||
childNums[i] = b.childNums[i];
|
||||
}
|
||||
leader = FALSE;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool Address::operator==(const Address &b) const {
|
||||
if (depth != b.depth)
|
||||
return false;
|
||||
for (unsigned i = 0; i < depth; i++)
|
||||
if(labels[i] != b.labels[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Address::isClose(const Address &b, int level) const {
|
||||
if (depth != b.depth)
|
||||
return false;
|
||||
if ((unsigned)level >= depth)
|
||||
return true;
|
||||
for (unsigned i = 0; i < (depth - level); i++)
|
||||
if(labels[i] != b.labels[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Address::operator!=(const Address &b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
|
||||
class AddrUnsPair {
|
||||
public:
|
||||
Address first;
|
||||
unsigned second;
|
||||
AddrUnsPair(Address _first, unsigned _second);
|
||||
AddrUnsPair &operator=(const AddrUnsPair &b);
|
||||
};
|
||||
|
||||
AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
|
||||
: first(_first), second(_second)
|
||||
{
|
||||
}
|
||||
|
||||
AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
|
||||
{
|
||||
first = b.first;
|
||||
second = b.second;
|
||||
return *this;
|
||||
}
|
||||
|
||||
# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
|
||||
|
||||
|
||||
static int
|
||||
__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
|
||||
{
|
||||
const Address *aa = (const Address *)&(((AddrUnsPair *)a)
|
||||
->first);
|
||||
const Address *bb = (const Address *)&(((AddrUnsPair *)b)
|
||||
->first);
|
||||
unsigned depth = aa->depth;
|
||||
unsigned i;
|
||||
KMP_DEBUG_ASSERT(depth == bb->depth);
|
||||
for (i = 0; i < depth; i++) {
|
||||
if (aa->labels[i] < bb->labels[i]) return -1;
|
||||
if (aa->labels[i] > bb->labels[i]) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
|
||||
{
|
||||
const Address *aa = (const Address *)&(((AddrUnsPair *)a)
|
||||
->first);
|
||||
const Address *bb = (const Address *)&(((AddrUnsPair *)b)
|
||||
->first);
|
||||
unsigned depth = aa->depth;
|
||||
unsigned i;
|
||||
KMP_DEBUG_ASSERT(depth == bb->depth);
|
||||
KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
|
||||
KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
|
||||
for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
|
||||
int j = depth - i - 1;
|
||||
if (aa->childNums[j] < bb->childNums[j]) return -1;
|
||||
if (aa->childNums[j] > bb->childNums[j]) return 1;
|
||||
}
|
||||
for (; i < depth; i++) {
|
||||
int j = i - __kmp_affinity_compact;
|
||||
if (aa->childNums[j] < bb->childNums[j]) return -1;
|
||||
if (aa->childNums[j] > bb->childNums[j]) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** A structure for holding machine-specific hierarchy info to be computed once at init.
|
||||
This structure represents a mapping of threads to the actual machine hierarchy, or to
|
||||
our best guess at what the hierarchy might be, for the purpose of performing an
|
||||
efficient barrier. In the worst case, when there is no machine hierarchy information,
|
||||
it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
|
||||
class hierarchy_info {
|
||||
public:
|
||||
/** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
|
||||
or socket, packages/node, nodes/machine, etc. We don't want to get specific with
|
||||
nomenclature. When the machine is oversubscribed we add levels to duplicate the
|
||||
hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
|
||||
kmp_uint32 maxLevels;
|
||||
|
||||
/** This is specifically the depth of the machine configuration hierarchy, in terms of the
|
||||
number of levels along the longest path from root to any leaf. It corresponds to the
|
||||
number of entries in numPerLevel if we exclude all but one trailing 1. */
|
||||
kmp_uint32 depth;
|
||||
kmp_uint32 base_num_threads;
|
||||
volatile kmp_int8 uninitialized; // 0=initialized, 1=uninitialized, 2=initialization in progress
|
||||
volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
|
||||
|
||||
/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
|
||||
node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
|
||||
and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
|
||||
kmp_uint32 *numPerLevel;
|
||||
kmp_uint32 *skipPerLevel;
|
||||
|
||||
void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
|
||||
int hier_depth = adr2os[0].first.depth;
|
||||
int level = 0;
|
||||
for (int i=hier_depth-1; i>=0; --i) {
|
||||
int max = -1;
|
||||
for (int j=0; j<num_addrs; ++j) {
|
||||
int next = adr2os[j].first.childNums[i];
|
||||
if (next > max) max = next;
|
||||
}
|
||||
numPerLevel[level] = max+1;
|
||||
++level;
|
||||
}
|
||||
}
|
||||
|
||||
hierarchy_info() : maxLevels(7), depth(1), uninitialized(1), resizing(0) {}
|
||||
|
||||
// TO FIX: This destructor causes a segfault in the library at shutdown.
|
||||
//~hierarchy_info() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
|
||||
|
||||
void init(AddrUnsPair *adr2os, int num_addrs)
|
||||
{
|
||||
kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, 1, 2);
|
||||
if (bool_result == 0) { // Wait for initialization
|
||||
while (TCR_1(uninitialized) != 0) KMP_CPU_PAUSE();
|
||||
return;
|
||||
}
|
||||
KMP_DEBUG_ASSERT(bool_result==1);
|
||||
|
||||
/* Added explicit initialization of the data fields here to prevent usage of dirty value
|
||||
observed when static library is re-initialized multiple times (e.g. when
|
||||
non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
|
||||
depth = 1;
|
||||
resizing = 0;
|
||||
maxLevels = 7;
|
||||
numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
|
||||
skipPerLevel = &(numPerLevel[maxLevels]);
|
||||
for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
|
||||
numPerLevel[i] = 1;
|
||||
skipPerLevel[i] = 1;
|
||||
}
|
||||
|
||||
// Sort table by physical ID
|
||||
if (adr2os) {
|
||||
qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
|
||||
deriveLevels(adr2os, num_addrs);
|
||||
}
|
||||
else {
|
||||
numPerLevel[0] = 4;
|
||||
numPerLevel[1] = num_addrs/4;
|
||||
if (num_addrs%4) numPerLevel[1]++;
|
||||
}
|
||||
|
||||
base_num_threads = num_addrs;
|
||||
for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
|
||||
if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
|
||||
depth++;
|
||||
|
||||
kmp_uint32 branch = 4;
|
||||
if (numPerLevel[0] == 1) branch = num_addrs/4;
|
||||
if (branch<4) branch=4;
|
||||
for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
|
||||
while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
|
||||
if (numPerLevel[d] & 1) numPerLevel[d]++;
|
||||
numPerLevel[d] = numPerLevel[d] >> 1;
|
||||
if (numPerLevel[d+1] == 1) depth++;
|
||||
numPerLevel[d+1] = numPerLevel[d+1] << 1;
|
||||
}
|
||||
if(numPerLevel[0] == 1) {
|
||||
branch = branch >> 1;
|
||||
if (branch<4) branch = 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (kmp_uint32 i=1; i<depth; ++i)
|
||||
skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
|
||||
// Fill in hierarchy in the case of oversubscription
|
||||
for (kmp_uint32 i=depth; i<maxLevels; ++i)
|
||||
skipPerLevel[i] = 2*skipPerLevel[i-1];
|
||||
|
||||
uninitialized = 0; // One writer
|
||||
|
||||
}
|
||||
|
||||
void resize(kmp_uint32 nproc)
|
||||
{
|
||||
kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
|
||||
if (bool_result == 0) { // Someone else is resizing
|
||||
while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
|
||||
return;
|
||||
}
|
||||
KMP_DEBUG_ASSERT(bool_result!=0);
|
||||
KMP_DEBUG_ASSERT(nproc > base_num_threads);
|
||||
|
||||
// Calculate new max_levels
|
||||
kmp_uint32 old_sz = skipPerLevel[depth-1];
|
||||
kmp_uint32 incs = 0, old_maxLevels= maxLevels;
|
||||
while (nproc > old_sz) {
|
||||
old_sz *=2;
|
||||
incs++;
|
||||
}
|
||||
maxLevels += incs;
|
||||
|
||||
// Resize arrays
|
||||
kmp_uint32 *old_numPerLevel = numPerLevel;
|
||||
kmp_uint32 *old_skipPerLevel = skipPerLevel;
|
||||
numPerLevel = skipPerLevel = NULL;
|
||||
numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
|
||||
skipPerLevel = &(numPerLevel[maxLevels]);
|
||||
|
||||
// Copy old elements from old arrays
|
||||
for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
|
||||
numPerLevel[i] = old_numPerLevel[i];
|
||||
skipPerLevel[i] = old_skipPerLevel[i];
|
||||
}
|
||||
|
||||
// Init new elements in arrays to 1
|
||||
for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
|
||||
numPerLevel[i] = 1;
|
||||
skipPerLevel[i] = 1;
|
||||
}
|
||||
|
||||
// Free old arrays
|
||||
__kmp_free(old_numPerLevel);
|
||||
|
||||
// Fill in oversubscription levels of hierarchy
|
||||
for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
|
||||
skipPerLevel[i] = 2*skipPerLevel[i-1];
|
||||
|
||||
base_num_threads = nproc;
|
||||
resizing = 0; // One writer
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
static hierarchy_info machine_hierarchy;
|
||||
|
||||
void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
|
||||
kmp_uint32 depth;
|
||||
// The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
|
||||
if (TCR_1(machine_hierarchy.uninitialized))
|
||||
machine_hierarchy.init(NULL, nproc);
|
||||
// Adjust the hierarchy in case num threads exceeds original
|
||||
if (nproc > machine_hierarchy.base_num_threads)
|
||||
machine_hierarchy.resize(nproc);
|
||||
|
||||
depth = machine_hierarchy.depth;
|
||||
KMP_DEBUG_ASSERT(depth > 0);
|
||||
// The loop below adjusts the depth in the case of a resize
|
||||
while (nproc > machine_hierarchy.skipPerLevel[depth-1])
|
||||
depth++;
|
||||
|
||||
thr_bar->depth = depth;
|
||||
thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
|
||||
thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
|
||||
}
|
||||
|
||||
//
|
||||
// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
|
||||
// called to renumber the labels from [0..n] and place them into the child_num
|
||||
@ -4683,73 +4324,4 @@ void __kmp_balanced_affinity( int tid, int nthreads )
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
// affinity not supported
|
||||
|
||||
static const kmp_uint32 noaff_maxLevels=7;
|
||||
kmp_uint32 noaff_skipPerLevel[noaff_maxLevels];
|
||||
kmp_uint32 noaff_depth;
|
||||
kmp_uint8 noaff_leaf_kids;
|
||||
kmp_int8 noaff_uninitialized=1;
|
||||
|
||||
void noaff_init(int nprocs)
|
||||
{
|
||||
kmp_int8 result = KMP_COMPARE_AND_STORE_ACQ8(&noaff_uninitialized, 1, 2);
|
||||
if (result == 0) return; // Already initialized
|
||||
else if (result == 2) { // Someone else is initializing
|
||||
while (TCR_1(noaff_uninitialized) != 0) KMP_CPU_PAUSE();
|
||||
return;
|
||||
}
|
||||
KMP_DEBUG_ASSERT(result==1);
|
||||
|
||||
kmp_uint32 numPerLevel[noaff_maxLevels];
|
||||
noaff_depth = 1;
|
||||
for (kmp_uint32 i=0; i<noaff_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
|
||||
numPerLevel[i] = 1;
|
||||
noaff_skipPerLevel[i] = 1;
|
||||
}
|
||||
|
||||
numPerLevel[0] = 4;
|
||||
numPerLevel[1] = nprocs/4;
|
||||
if (nprocs%4) numPerLevel[1]++;
|
||||
|
||||
for (int i=noaff_maxLevels-1; i>=0; --i) // count non-empty levels to get depth
|
||||
if (numPerLevel[i] != 1 || noaff_depth > 1) // only count one top-level '1'
|
||||
noaff_depth++;
|
||||
|
||||
kmp_uint32 branch = 4;
|
||||
if (numPerLevel[0] == 1) branch = nprocs/4;
|
||||
if (branch<4) branch=4;
|
||||
for (kmp_uint32 d=0; d<noaff_depth-1; ++d) { // optimize hierarchy width
|
||||
while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
|
||||
if (numPerLevel[d] & 1) numPerLevel[d]++;
|
||||
numPerLevel[d] = numPerLevel[d] >> 1;
|
||||
if (numPerLevel[d+1] == 1) noaff_depth++;
|
||||
numPerLevel[d+1] = numPerLevel[d+1] << 1;
|
||||
}
|
||||
if(numPerLevel[0] == 1) {
|
||||
branch = branch >> 1;
|
||||
if (branch<4) branch = 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (kmp_uint32 i=1; i<noaff_depth; ++i)
|
||||
noaff_skipPerLevel[i] = numPerLevel[i-1] * noaff_skipPerLevel[i-1];
|
||||
// Fill in hierarchy in the case of oversubscription
|
||||
for (kmp_uint32 i=noaff_depth; i<noaff_maxLevels; ++i)
|
||||
noaff_skipPerLevel[i] = 2*noaff_skipPerLevel[i-1];
|
||||
noaff_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
|
||||
noaff_uninitialized = 0; // One writer
|
||||
|
||||
}
|
||||
|
||||
void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
|
||||
if (noaff_uninitialized)
|
||||
noaff_init(nproc);
|
||||
|
||||
thr_bar->depth = noaff_depth;
|
||||
thr_bar->base_leaf_kids = noaff_leaf_kids;
|
||||
thr_bar->skip_per_level = noaff_skipPerLevel;
|
||||
}
|
||||
|
||||
#endif // KMP_AFFINITY_SUPPORTED
|
||||
|
280
openmp/runtime/src/kmp_affinity.h
Normal file
280
openmp/runtime/src/kmp_affinity.h
Normal file
@ -0,0 +1,280 @@
|
||||
/*
|
||||
* kmp_affinity.h -- header for affinity management
|
||||
*/
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is dual licensed under the MIT and the University of Illinois Open
|
||||
// Source Licenses. See LICENSE.txt for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef KMP_AFFINITY_H
|
||||
#define KMP_AFFINITY_H
|
||||
|
||||
extern int __kmp_affinity_compact; /* Affinity 'compact' value */
|
||||
|
||||
class Address {
|
||||
public:
|
||||
static const unsigned maxDepth = 32;
|
||||
unsigned labels[maxDepth];
|
||||
unsigned childNums[maxDepth];
|
||||
unsigned depth;
|
||||
unsigned leader;
|
||||
Address(unsigned _depth)
|
||||
: depth(_depth), leader(FALSE) {
|
||||
}
|
||||
Address &operator=(const Address &b) {
|
||||
depth = b.depth;
|
||||
for (unsigned i = 0; i < depth; i++) {
|
||||
labels[i] = b.labels[i];
|
||||
childNums[i] = b.childNums[i];
|
||||
}
|
||||
leader = FALSE;
|
||||
return *this;
|
||||
}
|
||||
bool operator==(const Address &b) const {
|
||||
if (depth != b.depth)
|
||||
return false;
|
||||
for (unsigned i = 0; i < depth; i++)
|
||||
if(labels[i] != b.labels[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
bool isClose(const Address &b, int level) const {
|
||||
if (depth != b.depth)
|
||||
return false;
|
||||
if ((unsigned)level >= depth)
|
||||
return true;
|
||||
for (unsigned i = 0; i < (depth - level); i++)
|
||||
if(labels[i] != b.labels[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
bool operator!=(const Address &b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
};
|
||||
|
||||
class AddrUnsPair {
|
||||
public:
|
||||
Address first;
|
||||
unsigned second;
|
||||
AddrUnsPair(Address _first, unsigned _second)
|
||||
: first(_first), second(_second) {
|
||||
}
|
||||
AddrUnsPair &operator=(const AddrUnsPair &b)
|
||||
{
|
||||
first = b.first;
|
||||
second = b.second;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static int
|
||||
__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
|
||||
{
|
||||
const Address *aa = (const Address *)&(((AddrUnsPair *)a)
|
||||
->first);
|
||||
const Address *bb = (const Address *)&(((AddrUnsPair *)b)
|
||||
->first);
|
||||
unsigned depth = aa->depth;
|
||||
unsigned i;
|
||||
KMP_DEBUG_ASSERT(depth == bb->depth);
|
||||
for (i = 0; i < depth; i++) {
|
||||
if (aa->labels[i] < bb->labels[i]) return -1;
|
||||
if (aa->labels[i] > bb->labels[i]) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
|
||||
{
|
||||
const Address *aa = (const Address *)&(((AddrUnsPair *)a)
|
||||
->first);
|
||||
const Address *bb = (const Address *)&(((AddrUnsPair *)b)
|
||||
->first);
|
||||
unsigned depth = aa->depth;
|
||||
unsigned i;
|
||||
KMP_DEBUG_ASSERT(depth == bb->depth);
|
||||
KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
|
||||
KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
|
||||
for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
|
||||
int j = depth - i - 1;
|
||||
if (aa->childNums[j] < bb->childNums[j]) return -1;
|
||||
if (aa->childNums[j] > bb->childNums[j]) return 1;
|
||||
}
|
||||
for (; i < depth; i++) {
|
||||
int j = i - __kmp_affinity_compact;
|
||||
if (aa->childNums[j] < bb->childNums[j]) return -1;
|
||||
if (aa->childNums[j] > bb->childNums[j]) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/** A structure for holding machine-specific hierarchy info to be computed once at init. */
|
||||
class hierarchy_info {
|
||||
public:
|
||||
/** Good default values for number of leaves and branching factor, given no affinity information.
|
||||
Behaves a bit like hyper barrier. */
|
||||
static const kmp_uint32 maxLeaves=4;
|
||||
static const kmp_uint32 minBranch=4;
|
||||
/** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
|
||||
etc. We don't want to get specific with nomenclature */
|
||||
kmp_uint32 maxLevels;
|
||||
|
||||
/** This is specifically the depth of the machine configuration hierarchy, in terms of the
|
||||
number of levels along the longest path from root to any leaf. It corresponds to the
|
||||
number of entries in numPerLevel if we exclude all but one trailing 1. */
|
||||
kmp_uint32 depth;
|
||||
kmp_uint32 base_num_threads;
|
||||
enum init_status { initialized=0, not_initialized=1, initializing=2 };
|
||||
volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
|
||||
volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
|
||||
|
||||
/** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
|
||||
node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
|
||||
and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
|
||||
kmp_uint32 *numPerLevel;
|
||||
kmp_uint32 *skipPerLevel;
|
||||
|
||||
void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
|
||||
int hier_depth = adr2os[0].first.depth;
|
||||
int level = 0;
|
||||
for (int i=hier_depth-1; i>=0; --i) {
|
||||
int max = -1;
|
||||
for (int j=0; j<num_addrs; ++j) {
|
||||
int next = adr2os[j].first.childNums[i];
|
||||
if (next > max) max = next;
|
||||
}
|
||||
numPerLevel[level] = max+1;
|
||||
++level;
|
||||
}
|
||||
}
|
||||
|
||||
hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
|
||||
|
||||
void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
|
||||
|
||||
void init(AddrUnsPair *adr2os, int num_addrs)
|
||||
{
|
||||
kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
|
||||
if (bool_result == 0) { // Wait for initialization
|
||||
while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
|
||||
return;
|
||||
}
|
||||
KMP_DEBUG_ASSERT(bool_result==1);
|
||||
|
||||
/* Added explicit initialization of the data fields here to prevent usage of dirty value
|
||||
observed when static library is re-initialized multiple times (e.g. when
|
||||
non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
|
||||
depth = 1;
|
||||
resizing = 0;
|
||||
maxLevels = 7;
|
||||
numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
|
||||
skipPerLevel = &(numPerLevel[maxLevels]);
|
||||
for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
|
||||
numPerLevel[i] = 1;
|
||||
skipPerLevel[i] = 1;
|
||||
}
|
||||
|
||||
// Sort table by physical ID
|
||||
if (adr2os) {
|
||||
qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
|
||||
deriveLevels(adr2os, num_addrs);
|
||||
}
|
||||
else {
|
||||
numPerLevel[0] = maxLeaves;
|
||||
numPerLevel[1] = num_addrs/maxLeaves;
|
||||
if (num_addrs%maxLeaves) numPerLevel[1]++;
|
||||
}
|
||||
|
||||
base_num_threads = num_addrs;
|
||||
for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
|
||||
if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
|
||||
depth++;
|
||||
|
||||
kmp_uint32 branch = minBranch;
|
||||
if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
|
||||
if (branch<minBranch) branch=minBranch;
|
||||
for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
|
||||
while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
|
||||
if (numPerLevel[d] & 1) numPerLevel[d]++;
|
||||
numPerLevel[d] = numPerLevel[d] >> 1;
|
||||
if (numPerLevel[d+1] == 1) depth++;
|
||||
numPerLevel[d+1] = numPerLevel[d+1] << 1;
|
||||
}
|
||||
if(numPerLevel[0] == 1) {
|
||||
branch = branch >> 1;
|
||||
if (branch<4) branch = minBranch;
|
||||
}
|
||||
}
|
||||
|
||||
for (kmp_uint32 i=1; i<depth; ++i)
|
||||
skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
|
||||
// Fill in hierarchy in the case of oversubscription
|
||||
for (kmp_uint32 i=depth; i<maxLevels; ++i)
|
||||
skipPerLevel[i] = 2*skipPerLevel[i-1];
|
||||
|
||||
uninitialized = initialized; // One writer
|
||||
|
||||
}
|
||||
|
||||
void resize(kmp_uint32 nproc)
|
||||
{
|
||||
kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
|
||||
if (bool_result == 0) { // Someone else is resizing
|
||||
while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
|
||||
return;
|
||||
}
|
||||
KMP_DEBUG_ASSERT(bool_result!=0);
|
||||
KMP_DEBUG_ASSERT(nproc > base_num_threads);
|
||||
|
||||
// Calculate new max_levels
|
||||
kmp_uint32 old_sz = skipPerLevel[depth-1];
|
||||
kmp_uint32 incs = 0, old_maxLevels= maxLevels;
|
||||
while (nproc > old_sz) {
|
||||
old_sz *=2;
|
||||
incs++;
|
||||
}
|
||||
maxLevels += incs;
|
||||
|
||||
// Resize arrays
|
||||
kmp_uint32 *old_numPerLevel = numPerLevel;
|
||||
kmp_uint32 *old_skipPerLevel = skipPerLevel;
|
||||
numPerLevel = skipPerLevel = NULL;
|
||||
numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
|
||||
skipPerLevel = &(numPerLevel[maxLevels]);
|
||||
|
||||
// Copy old elements from old arrays
|
||||
for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
|
||||
numPerLevel[i] = old_numPerLevel[i];
|
||||
skipPerLevel[i] = old_skipPerLevel[i];
|
||||
}
|
||||
|
||||
// Init new elements in arrays to 1
|
||||
for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
|
||||
numPerLevel[i] = 1;
|
||||
skipPerLevel[i] = 1;
|
||||
}
|
||||
|
||||
// Free old arrays
|
||||
__kmp_free(old_numPerLevel);
|
||||
|
||||
// Fill in oversubscription levels of hierarchy
|
||||
for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
|
||||
skipPerLevel[i] = 2*skipPerLevel[i-1];
|
||||
|
||||
base_num_threads = nproc;
|
||||
resizing = 0; // One writer
|
||||
|
||||
}
|
||||
};
|
||||
#endif // KMP_AFFINITY_H
|
@ -7286,6 +7286,7 @@ __kmp_cleanup( void )
|
||||
#if KMP_AFFINITY_SUPPORTED
|
||||
__kmp_affinity_uninitialize();
|
||||
#endif /* KMP_AFFINITY_SUPPORTED */
|
||||
__kmp_cleanup_hierarchy();
|
||||
TCW_4(__kmp_init_middle, FALSE);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user