mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-09 10:55:03 +00:00
[OpenMP] Fix HWLOC topology detection for 2.0.x
HWLOC 2.0 has numa nodes as separate children and are not in the main parent/child topology tree anymore. This change takes this into account. The main topology detection loop in the create_hwloc_map() routine starts at a hardware thread within the initial affinity mask and goes up the topology tree setting the socket/core/thread labels correctly. This change also introduces some of the more generic changes that the future kmp_topology_t structure will take advantage of including a generic ratio & count array (finding all ratios of topology layers like threads/core cores/socket and finding all counts of each topology layer), generic radix1 reduction step, generic uniformity check, and generic printing of topology (en_US.txt) Differential Revision: https://reviews.llvm.org/D95156
This commit is contained in:
parent
ff038b316d
commit
9f87c6b47d
@ -103,6 +103,26 @@ DisplayEnvEnd "OPENMP DISPLAY ENVIRONMENT END"
|
||||
Device "[device]"
|
||||
Host "[host]"
|
||||
Tile "tile"
|
||||
Tiles "tiles"
|
||||
Threads "threads"
|
||||
Cores "cores"
|
||||
Socket "socket"
|
||||
Sockets "sockets"
|
||||
Die "die"
|
||||
Dice "dice"
|
||||
Module "module"
|
||||
Modules "modules"
|
||||
L1Cache "L1 cache"
|
||||
L1Caches "L1 caches"
|
||||
L2Cache "L2 cache"
|
||||
L2Caches "L2 caches"
|
||||
L3Cache "L3 cache"
|
||||
L3Caches "L3 caches"
|
||||
NumaDomain "NUMA domain"
|
||||
NumaDomains "NUMA domains"
|
||||
ProcGroup "processor group"
|
||||
ProcGroups "processor groups"
|
||||
Unknown "unknown"
|
||||
|
||||
|
||||
|
||||
@ -333,6 +353,7 @@ TopologyExtraNoTi "%1$s: %2$d packages x %3$d nodes/pkg x %4$d tiles/
|
||||
OmptOutdatedWorkshare "OMPT: Cannot determine workshare type; using the default (loop) instead. "
|
||||
"This issue is fixed in an up-to-date compiler."
|
||||
OmpNoAllocator "Allocator %1$s is not available, will use default allocator."
|
||||
TopologyGeneric "%1$s: %2$s (%3$d total cores)"
|
||||
|
||||
# --- OpenMP errors detected at runtime ---
|
||||
#
|
||||
|
@ -595,6 +595,32 @@ typedef int PACKED_REDUCTION_METHOD_T;
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
enum kmp_hw_t : int {
|
||||
KMP_HW_UNKNOWN = -1,
|
||||
KMP_HW_MACHINE = 0,
|
||||
KMP_HW_SOCKET,
|
||||
KMP_HW_PROC_GROUP,
|
||||
KMP_HW_NUMA,
|
||||
KMP_HW_DIE,
|
||||
KMP_HW_L3,
|
||||
KMP_HW_TILE,
|
||||
KMP_HW_MODULE,
|
||||
KMP_HW_L2,
|
||||
KMP_HW_L1,
|
||||
KMP_HW_CORE,
|
||||
KMP_HW_THREAD,
|
||||
KMP_HW_LAST
|
||||
};
|
||||
|
||||
#define KMP_ASSERT_VALID_HW_TYPE(type) \
|
||||
KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)
|
||||
|
||||
#define KMP_FOREACH_HW_TYPE(type) \
|
||||
for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; \
|
||||
type = (kmp_hw_t)((int)type + 1))
|
||||
|
||||
const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false);
|
||||
|
||||
/* Only Linux* OS and Windows* OS support thread affinity. */
|
||||
#if KMP_AFFINITY_SUPPORTED
|
||||
|
||||
|
@ -45,6 +45,218 @@ void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
|
||||
thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
|
||||
}
|
||||
|
||||
const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural) {
|
||||
switch (type) {
|
||||
case KMP_HW_SOCKET:
|
||||
return ((plural) ? KMP_I18N_STR(Sockets) : KMP_I18N_STR(Socket));
|
||||
case KMP_HW_DIE:
|
||||
return ((plural) ? KMP_I18N_STR(Dice) : KMP_I18N_STR(Die));
|
||||
case KMP_HW_MODULE:
|
||||
return ((plural) ? KMP_I18N_STR(Modules) : KMP_I18N_STR(Module));
|
||||
case KMP_HW_TILE:
|
||||
return ((plural) ? KMP_I18N_STR(Tiles) : KMP_I18N_STR(Tile));
|
||||
case KMP_HW_NUMA:
|
||||
return ((plural) ? KMP_I18N_STR(NumaDomains) : KMP_I18N_STR(NumaDomain));
|
||||
case KMP_HW_L3:
|
||||
return ((plural) ? KMP_I18N_STR(L3Caches) : KMP_I18N_STR(L3Cache));
|
||||
case KMP_HW_L2:
|
||||
return ((plural) ? KMP_I18N_STR(L2Caches) : KMP_I18N_STR(L2Cache));
|
||||
case KMP_HW_L1:
|
||||
return ((plural) ? KMP_I18N_STR(L1Caches) : KMP_I18N_STR(L1Cache));
|
||||
case KMP_HW_CORE:
|
||||
return ((plural) ? KMP_I18N_STR(Cores) : KMP_I18N_STR(Core));
|
||||
case KMP_HW_THREAD:
|
||||
return ((plural) ? KMP_I18N_STR(Threads) : KMP_I18N_STR(Thread));
|
||||
case KMP_HW_PROC_GROUP:
|
||||
return ((plural) ? KMP_I18N_STR(ProcGroups) : KMP_I18N_STR(ProcGroup));
|
||||
}
|
||||
return KMP_I18N_STR(Unknown);
|
||||
}
|
||||
|
||||
#if KMP_USE_HWLOC
|
||||
// This function removes the topology levels that are radix 1 and don't offer
|
||||
// further information about the topology. The most common example is when you
|
||||
// have one thread context per core, we don't want the extra thread context
|
||||
// level if it offers no unique labels. So they are removed.
|
||||
// return value: the new depth of address2os
|
||||
static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP, int nTh,
|
||||
int depth, kmp_hw_t *types) {
|
||||
int preference[KMP_HW_LAST];
|
||||
int top_index1, top_index2;
|
||||
// Set up preference associative array
|
||||
preference[KMP_HW_PROC_GROUP] = 110;
|
||||
preference[KMP_HW_SOCKET] = 100;
|
||||
preference[KMP_HW_CORE] = 95;
|
||||
preference[KMP_HW_THREAD] = 90;
|
||||
preference[KMP_HW_DIE] = 85;
|
||||
preference[KMP_HW_NUMA] = 80;
|
||||
preference[KMP_HW_TILE] = 75;
|
||||
preference[KMP_HW_MODULE] = 73;
|
||||
preference[KMP_HW_L3] = 70;
|
||||
preference[KMP_HW_L2] = 65;
|
||||
preference[KMP_HW_L1] = 60;
|
||||
top_index1 = 0;
|
||||
top_index2 = 1;
|
||||
while (top_index1 < depth - 1 && top_index2 < depth) {
|
||||
KMP_DEBUG_ASSERT(top_index1 >= 0 && top_index1 < depth);
|
||||
KMP_DEBUG_ASSERT(top_index2 >= 0 && top_index2 < depth);
|
||||
kmp_hw_t type1 = types[top_index1];
|
||||
kmp_hw_t type2 = types[top_index2];
|
||||
if (type1 == KMP_HW_SOCKET && type2 == KMP_HW_CORE) {
|
||||
top_index1 = top_index2++;
|
||||
continue;
|
||||
}
|
||||
bool radix1 = true;
|
||||
bool all_same = true;
|
||||
int id1 = addrP[0].first.labels[top_index1];
|
||||
int id2 = addrP[0].first.labels[top_index2];
|
||||
int pref1 = preference[type1];
|
||||
int pref2 = preference[type2];
|
||||
for (int hwidx = 1; hwidx < nTh; ++hwidx) {
|
||||
if (addrP[hwidx].first.labels[top_index1] == id1 &&
|
||||
addrP[hwidx].first.labels[top_index2] != id2) {
|
||||
radix1 = false;
|
||||
break;
|
||||
}
|
||||
if (addrP[hwidx].first.labels[top_index2] != id2)
|
||||
all_same = false;
|
||||
id1 = addrP[hwidx].first.labels[top_index1];
|
||||
id2 = addrP[hwidx].first.labels[top_index2];
|
||||
}
|
||||
if (radix1) {
|
||||
// Select the layer to remove based on preference
|
||||
kmp_hw_t remove_type, keep_type;
|
||||
int remove_layer, remove_layer_ids;
|
||||
if (pref1 > pref2) {
|
||||
remove_type = type2;
|
||||
remove_layer = remove_layer_ids = top_index2;
|
||||
keep_type = type1;
|
||||
} else {
|
||||
remove_type = type1;
|
||||
remove_layer = remove_layer_ids = top_index1;
|
||||
keep_type = type2;
|
||||
}
|
||||
// If all the indexes for the second (deeper) layer are the same.
|
||||
// e.g., all are zero, then make sure to keep the first layer's ids
|
||||
if (all_same)
|
||||
remove_layer_ids = top_index2;
|
||||
// Remove radix one type by setting the equivalence, removing the id from
|
||||
// the hw threads and removing the layer from types and depth
|
||||
for (int idx = 0; idx < nTh; ++idx) {
|
||||
Address &hw_thread = addrP[idx].first;
|
||||
for (int d = remove_layer_ids; d < depth - 1; ++d)
|
||||
hw_thread.labels[d] = hw_thread.labels[d + 1];
|
||||
hw_thread.depth--;
|
||||
}
|
||||
for (int idx = remove_layer; idx < depth - 1; ++idx)
|
||||
types[idx] = types[idx + 1];
|
||||
depth--;
|
||||
} else {
|
||||
top_index1 = top_index2++;
|
||||
}
|
||||
}
|
||||
KMP_ASSERT(depth > 0);
|
||||
return depth;
|
||||
}
|
||||
// Gather the count of each topology layer and the ratio
|
||||
// ratio contains the number of types[i] / types[i+1] and so forth
|
||||
// count contains the absolute number of types[i]
|
||||
static void __kmp_affinity_gather_enumeration_information(AddrUnsPair *addrP,
|
||||
int nTh, int depth,
|
||||
kmp_hw_t *types,
|
||||
int *ratio,
|
||||
int *count) {
|
||||
int previous_id[KMP_HW_LAST];
|
||||
int max[KMP_HW_LAST];
|
||||
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
previous_id[i] = -1;
|
||||
max[i] = 0;
|
||||
count[i] = 0;
|
||||
ratio[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < nTh; ++i) {
|
||||
Address &hw_thread = addrP[i].first;
|
||||
for (int layer = 0; layer < depth; ++layer) {
|
||||
int id = hw_thread.labels[layer];
|
||||
if (id != previous_id[layer]) {
|
||||
// Add an additional increment to each count
|
||||
for (int l = layer; l < depth; ++l)
|
||||
count[l]++;
|
||||
// Keep track of topology layer ratio statistics
|
||||
max[layer]++;
|
||||
for (int l = layer + 1; l < depth; ++l) {
|
||||
if (max[l] > ratio[l])
|
||||
ratio[l] = max[l];
|
||||
max[l] = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int layer = 0; layer < depth; ++layer) {
|
||||
previous_id[layer] = hw_thread.labels[layer];
|
||||
}
|
||||
}
|
||||
for (int layer = 0; layer < depth; ++layer) {
|
||||
if (max[layer] > ratio[layer])
|
||||
ratio[layer] = max[layer];
|
||||
}
|
||||
}
|
||||
|
||||
// Find out if the topology is uniform
|
||||
static bool __kmp_affinity_discover_uniformity(int depth, int *ratio,
|
||||
int *count) {
|
||||
int num = 1;
|
||||
for (int level = 0; level < depth; ++level)
|
||||
num *= ratio[level];
|
||||
return (num == count[depth - 1]);
|
||||
}
|
||||
|
||||
// calculate the number of X's per Y
|
||||
static inline int __kmp_hwloc_calculate_ratio(int *ratio, int deep_level,
|
||||
int shallow_level) {
|
||||
int retval = 1;
|
||||
if (deep_level < 0 || shallow_level < 0)
|
||||
return retval;
|
||||
for (int level = deep_level; level > shallow_level; --level)
|
||||
retval *= ratio[level];
|
||||
return retval;
|
||||
}
|
||||
#endif // KMP_USE_HWLOC
|
||||
|
||||
// Print out the detailed machine topology map, i.e. the physical locations
|
||||
// of each OS proc.
|
||||
static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len,
|
||||
int depth, int pkgLevel,
|
||||
int coreLevel, int threadLevel) {
|
||||
int proc;
|
||||
|
||||
KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
|
||||
for (proc = 0; proc < len; proc++) {
|
||||
int level;
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_str_buf_init(&buf);
|
||||
for (level = 0; level < depth; level++) {
|
||||
if (level == threadLevel) {
|
||||
__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
|
||||
} else if (level == coreLevel) {
|
||||
__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
|
||||
} else if (level == pkgLevel) {
|
||||
__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
|
||||
} else if (level > pkgLevel) {
|
||||
__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
|
||||
level - pkgLevel - 1);
|
||||
} else {
|
||||
__kmp_str_buf_print(&buf, "L%d ", level);
|
||||
}
|
||||
__kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]);
|
||||
}
|
||||
KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
|
||||
buf.str);
|
||||
__kmp_str_buf_free(&buf);
|
||||
}
|
||||
}
|
||||
|
||||
#if KMP_AFFINITY_SUPPORTED
|
||||
|
||||
bool KMPAffinity::picked_api = false;
|
||||
@ -313,72 +525,18 @@ inline static bool __kmp_affinity_uniform_topology() {
|
||||
return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
|
||||
}
|
||||
|
||||
// Print out the detailed machine topology map, i.e. the physical locations
|
||||
// of each OS proc.
|
||||
static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len,
|
||||
int depth, int pkgLevel,
|
||||
int coreLevel, int threadLevel) {
|
||||
int proc;
|
||||
|
||||
KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
|
||||
for (proc = 0; proc < len; proc++) {
|
||||
int level;
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_str_buf_init(&buf);
|
||||
for (level = 0; level < depth; level++) {
|
||||
if (level == threadLevel) {
|
||||
__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
|
||||
} else if (level == coreLevel) {
|
||||
__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
|
||||
} else if (level == pkgLevel) {
|
||||
__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
|
||||
} else if (level > pkgLevel) {
|
||||
__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
|
||||
level - pkgLevel - 1);
|
||||
} else {
|
||||
__kmp_str_buf_print(&buf, "L%d ", level);
|
||||
}
|
||||
__kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]);
|
||||
}
|
||||
KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
|
||||
buf.str);
|
||||
__kmp_str_buf_free(&buf);
|
||||
}
|
||||
}
|
||||
|
||||
#if KMP_USE_HWLOC
|
||||
|
||||
static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP, int len,
|
||||
int depth, int *levels) {
|
||||
int depth, kmp_hw_t *types) {
|
||||
int proc;
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_str_buf_init(&buf);
|
||||
KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
|
||||
for (proc = 0; proc < len; proc++) {
|
||||
__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Package),
|
||||
addrP[proc].first.labels[0]);
|
||||
if (depth > 1) {
|
||||
int level = 1; // iterate over levels
|
||||
int label = 1; // iterate over labels
|
||||
if (__kmp_numa_detected)
|
||||
// node level follows package
|
||||
if (levels[level++] > 0)
|
||||
__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Node),
|
||||
addrP[proc].first.labels[label++]);
|
||||
if (__kmp_tile_depth > 0)
|
||||
// tile level follows node if any, or package
|
||||
if (levels[level++] > 0)
|
||||
__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Tile),
|
||||
addrP[proc].first.labels[label++]);
|
||||
if (levels[level++] > 0)
|
||||
// core level follows
|
||||
__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Core),
|
||||
addrP[proc].first.labels[label++]);
|
||||
if (levels[level++] > 0)
|
||||
// thread level is the latest
|
||||
__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Thread),
|
||||
addrP[proc].first.labels[label++]);
|
||||
KMP_DEBUG_ASSERT(label == depth);
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
__kmp_str_buf_print(&buf, "%s %d ", __kmp_hw_get_catalog_string(types[i]),
|
||||
addrP[proc].first.labels[i]);
|
||||
}
|
||||
KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", addrP[proc].second, buf.str);
|
||||
__kmp_str_buf_clear(&buf);
|
||||
@ -386,53 +544,47 @@ static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP, int len,
|
||||
__kmp_str_buf_free(&buf);
|
||||
}
|
||||
|
||||
static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile;
|
||||
static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) {
|
||||
#if HWLOC_API_VERSION >= 0x00020000
|
||||
return hwloc_obj_type_is_cache(obj->type);
|
||||
#else
|
||||
return obj->type == HWLOC_OBJ_CACHE;
|
||||
#endif
|
||||
}
|
||||
|
||||
// This function removes the topology levels that are radix 1 and don't offer
|
||||
// further information about the topology. The most common example is when you
|
||||
// have one thread context per core, we don't want the extra thread context
|
||||
// level if it offers no unique labels. So they are removed.
|
||||
// return value: the new depth of address2os
|
||||
static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP, int nTh,
|
||||
int depth, int *levels) {
|
||||
int level;
|
||||
int i;
|
||||
int radix1_detected;
|
||||
int new_depth = depth;
|
||||
for (level = depth - 1; level > 0; --level) {
|
||||
// Detect if this level is radix 1
|
||||
radix1_detected = 1;
|
||||
for (i = 1; i < nTh; ++i) {
|
||||
if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) {
|
||||
// There are differing label values for this level so it stays
|
||||
radix1_detected = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!radix1_detected)
|
||||
continue;
|
||||
// Radix 1 was detected
|
||||
--new_depth;
|
||||
levels[level] = -1; // mark level as not present in address2os array
|
||||
if (level == new_depth) {
|
||||
// "turn off" deepest level, just decrement the depth that removes
|
||||
// the level from address2os array
|
||||
for (i = 0; i < nTh; ++i) {
|
||||
addrP[i].first.depth--;
|
||||
}
|
||||
} else {
|
||||
// For other levels, we move labels over and also reduce the depth
|
||||
int j;
|
||||
for (j = level; j < new_depth; ++j) {
|
||||
for (i = 0; i < nTh; ++i) {
|
||||
addrP[i].first.labels[j] = addrP[i].first.labels[j + 1];
|
||||
addrP[i].first.depth--;
|
||||
}
|
||||
levels[j + 1] -= 1;
|
||||
// Returns KMP_HW_* type derived from HWLOC_* type
|
||||
static inline kmp_hw_t __kmp_hwloc_type_2_topology_type(hwloc_obj_t obj) {
|
||||
|
||||
if (__kmp_hwloc_is_cache_type(obj)) {
|
||||
if (obj->attr->cache.type == HWLOC_OBJ_CACHE_INSTRUCTION)
|
||||
return KMP_HW_UNKNOWN;
|
||||
switch (obj->attr->cache.depth) {
|
||||
case 1:
|
||||
return KMP_HW_L1;
|
||||
case 2:
|
||||
#if KMP_MIC_SUPPORTED
|
||||
if (__kmp_mic_type == mic3) {
|
||||
return KMP_HW_TILE;
|
||||
}
|
||||
#endif
|
||||
return KMP_HW_L2;
|
||||
case 3:
|
||||
return KMP_HW_L3;
|
||||
}
|
||||
return KMP_HW_UNKNOWN;
|
||||
}
|
||||
return new_depth;
|
||||
|
||||
switch (obj->type) {
|
||||
case HWLOC_OBJ_PACKAGE:
|
||||
return KMP_HW_SOCKET;
|
||||
case HWLOC_OBJ_NUMANODE:
|
||||
return KMP_HW_NUMA;
|
||||
case HWLOC_OBJ_CORE:
|
||||
return KMP_HW_CORE;
|
||||
case HWLOC_OBJ_PU:
|
||||
return KMP_HW_THREAD;
|
||||
}
|
||||
return KMP_HW_UNKNOWN;
|
||||
}
|
||||
|
||||
// Returns the number of objects of type 'type' below 'obj' within the topology
|
||||
@ -484,93 +636,48 @@ static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
|
||||
return sum; // will be 0 if no one found (as PU arity is 0)
|
||||
}
|
||||
|
||||
static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair,
|
||||
int &nActiveThreads,
|
||||
int &num_active_cores,
|
||||
hwloc_obj_t obj, int depth,
|
||||
int *labels) {
|
||||
hwloc_obj_t core = NULL;
|
||||
hwloc_topology_t &tp = __kmp_hwloc_topology;
|
||||
int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core);
|
||||
for (int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) {
|
||||
hwloc_obj_t pu = NULL;
|
||||
KMP_DEBUG_ASSERT(core != NULL);
|
||||
int num_active_threads = 0;
|
||||
int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu);
|
||||
// int NT = core->arity; pu = core->first_child; // faster?
|
||||
for (int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) {
|
||||
KMP_DEBUG_ASSERT(pu != NULL);
|
||||
if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
|
||||
continue; // skip inactive (inaccessible) unit
|
||||
Address addr(depth + 2);
|
||||
KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
|
||||
obj->os_index, obj->logical_index, core->os_index,
|
||||
core->logical_index, pu->os_index, pu->logical_index));
|
||||
for (int i = 0; i < depth; ++i)
|
||||
addr.labels[i] = labels[i]; // package, etc.
|
||||
addr.labels[depth] = core_id; // core
|
||||
addr.labels[depth + 1] = pu_id; // pu
|
||||
addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
|
||||
__kmp_pu_os_idx[nActiveThreads] = pu->os_index;
|
||||
nActiveThreads++;
|
||||
++num_active_threads; // count active threads per core
|
||||
}
|
||||
if (num_active_threads) { // were there any active threads on the core?
|
||||
++__kmp_ncores; // count total active cores
|
||||
++num_active_cores; // count active cores per socket
|
||||
if (num_active_threads > __kmp_nThreadsPerCore)
|
||||
__kmp_nThreadsPerCore = num_active_threads; // calc maximum
|
||||
// This gets the sub_id for a lower object under a higher object in the
|
||||
// topology tree
|
||||
static int __kmp_hwloc_get_sub_id(hwloc_topology_t t, hwloc_obj_t higher,
|
||||
hwloc_obj_t lower) {
|
||||
hwloc_obj_t obj;
|
||||
hwloc_obj_type_t ltype = lower->type;
|
||||
int lindex = lower->logical_index - 1;
|
||||
int sub_id = 0;
|
||||
// Get the previous lower object
|
||||
obj = hwloc_get_obj_by_type(t, ltype, lindex);
|
||||
while (obj && lindex >= 0 &&
|
||||
hwloc_bitmap_isincluded(obj->cpuset, higher->cpuset)) {
|
||||
if (obj->userdata) {
|
||||
sub_id = (int)(RCAST(kmp_intptr_t, obj->userdata));
|
||||
break;
|
||||
}
|
||||
sub_id++;
|
||||
lindex--;
|
||||
obj = hwloc_get_obj_by_type(t, ltype, lindex);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check if NUMA node detected below the package,
|
||||
// and if tile object is detected and return its depth
|
||||
static int __kmp_hwloc_check_numa() {
|
||||
hwloc_topology_t &tp = __kmp_hwloc_topology;
|
||||
hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
|
||||
int depth, l2cache_depth, package_depth;
|
||||
|
||||
// Get some PU
|
||||
hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0);
|
||||
if (hT == NULL) // something has gone wrong
|
||||
return 1;
|
||||
|
||||
// check NUMA node below PACKAGE
|
||||
hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
|
||||
hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
|
||||
KMP_DEBUG_ASSERT(hS != NULL);
|
||||
if (hN != NULL && hN->depth > hS->depth) {
|
||||
__kmp_numa_detected = TRUE; // socket includes node(s)
|
||||
if (__kmp_affinity_gran == affinity_gran_node) {
|
||||
__kmp_affinity_gran = affinity_gran_numa;
|
||||
}
|
||||
}
|
||||
|
||||
package_depth = hwloc_get_type_depth(tp, HWLOC_OBJ_PACKAGE);
|
||||
l2cache_depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
|
||||
// check tile, get object by depth because of multiple caches possible
|
||||
depth = (l2cache_depth < package_depth) ? package_depth : l2cache_depth;
|
||||
hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT);
|
||||
hC = NULL; // not used, but reset it here just in case
|
||||
if (hL != NULL &&
|
||||
__kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1)
|
||||
__kmp_tile_depth = depth; // tile consists of multiple cores
|
||||
return 0;
|
||||
// store sub_id + 1 so that 0 is differed from NULL
|
||||
lower->userdata = RCAST(void *, sub_id + 1);
|
||||
return sub_id;
|
||||
}
|
||||
|
||||
static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
|
||||
kmp_i18n_id_t *const msg_id) {
|
||||
hwloc_topology_t &tp = __kmp_hwloc_topology; // shortcut of a long name
|
||||
*address2os = NULL;
|
||||
kmp_hw_t type;
|
||||
int hw_thread_index, sub_id, nActiveThreads;
|
||||
int depth;
|
||||
hwloc_obj_t pu, obj, root, prev;
|
||||
int ratio[KMP_HW_LAST];
|
||||
int count[KMP_HW_LAST];
|
||||
kmp_hw_t types[KMP_HW_LAST];
|
||||
|
||||
hwloc_topology_t tp = __kmp_hwloc_topology;
|
||||
*msg_id = kmp_i18n_null;
|
||||
|
||||
// Save the affinity mask for the current thread.
|
||||
kmp_affin_mask_t *oldMask;
|
||||
KMP_CPU_ALLOC(oldMask);
|
||||
__kmp_get_system_affinity(oldMask, TRUE);
|
||||
__kmp_hwloc_check_numa();
|
||||
|
||||
if (!KMP_AFFINITY_CAPABLE()) {
|
||||
// Hack to try and infer the machine topology using only the data
|
||||
@ -606,13 +713,44 @@ static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int depth = 3;
|
||||
int levels[5] = {0, 1, 2, 3, 4}; // package, [node,] [tile,] core, thread
|
||||
int labels[3] = {0}; // package [,node] [,tile] - head of labels array
|
||||
if (__kmp_numa_detected)
|
||||
++depth;
|
||||
if (__kmp_tile_depth)
|
||||
++depth;
|
||||
root = hwloc_get_root_obj(tp);
|
||||
|
||||
// Figure out the depth and types in the topology
|
||||
depth = 0;
|
||||
pu = hwloc_get_pu_obj_by_os_index(tp, __kmp_affin_fullMask->begin());
|
||||
obj = pu;
|
||||
types[depth] = KMP_HW_THREAD;
|
||||
depth++;
|
||||
while (obj != root && obj != NULL) {
|
||||
obj = obj->parent;
|
||||
#if HWLOC_API_VERSION >= 0x00020000
|
||||
if (obj->memory_arity) {
|
||||
hwloc_obj_t memory;
|
||||
for (memory = obj->memory_first_child; memory;
|
||||
memory = hwloc_get_next_child(tp, obj, memory)) {
|
||||
if (memory->type == HWLOC_OBJ_NUMANODE)
|
||||
break;
|
||||
}
|
||||
if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
|
||||
types[depth] = KMP_HW_NUMA;
|
||||
depth++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
type = __kmp_hwloc_type_2_topology_type(obj);
|
||||
if (type != KMP_HW_UNKNOWN) {
|
||||
types[depth] = type;
|
||||
depth++;
|
||||
}
|
||||
}
|
||||
KMP_ASSERT(depth > 0 && depth <= KMP_HW_LAST);
|
||||
|
||||
// Get the order for the types correct
|
||||
for (int i = 0, j = depth - 1; i < j; ++i, --j) {
|
||||
kmp_hw_t temp = types[i];
|
||||
types[i] = types[j];
|
||||
types[j] = temp;
|
||||
}
|
||||
|
||||
// Allocate the data structure to be returned.
|
||||
AddrUnsPair *retval =
|
||||
@ -620,105 +758,60 @@ static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
|
||||
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
|
||||
__kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
|
||||
|
||||
// When affinity is off, this routine will still be called to set
|
||||
// __kmp_ncores, as well as __kmp_nThreadsPerCore,
|
||||
// nCoresPerPkg, & nPackages. Make sure all these vars are set
|
||||
// correctly, and return if affinity is not enabled.
|
||||
|
||||
hwloc_obj_t socket, node, tile;
|
||||
int nActiveThreads = 0;
|
||||
int socket_id = 0;
|
||||
// re-calculate globals to count only accessible resources
|
||||
__kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
|
||||
nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0;
|
||||
for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL;
|
||||
socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket),
|
||||
socket_id++) {
|
||||
labels[0] = socket_id;
|
||||
if (__kmp_numa_detected) {
|
||||
int NN;
|
||||
int n_active_nodes = 0;
|
||||
node = NULL;
|
||||
NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE,
|
||||
&node);
|
||||
for (int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) {
|
||||
labels[1] = node_id;
|
||||
if (__kmp_tile_depth) {
|
||||
// NUMA + tiles
|
||||
int NT;
|
||||
int n_active_tiles = 0;
|
||||
tile = NULL;
|
||||
NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth,
|
||||
&tile);
|
||||
for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
|
||||
labels[2] = tl_id;
|
||||
int n_active_cores = 0;
|
||||
__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
|
||||
n_active_cores, tile, 3, labels);
|
||||
if (n_active_cores) { // were there any active cores on the socket?
|
||||
++n_active_tiles; // count active tiles per node
|
||||
if (n_active_cores > nCorePerTile)
|
||||
nCorePerTile = n_active_cores; // calc maximum
|
||||
}
|
||||
}
|
||||
if (n_active_tiles) { // were there any active tiles on the socket?
|
||||
++n_active_nodes; // count active nodes per package
|
||||
if (n_active_tiles > nTilePerNode)
|
||||
nTilePerNode = n_active_tiles; // calc maximum
|
||||
}
|
||||
} else {
|
||||
// NUMA, no tiles
|
||||
int n_active_cores = 0;
|
||||
__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
|
||||
n_active_cores, node, 2, labels);
|
||||
if (n_active_cores) { // were there any active cores on the socket?
|
||||
++n_active_nodes; // count active nodes per package
|
||||
if (n_active_cores > nCorePerNode)
|
||||
nCorePerNode = n_active_cores; // calc maximum
|
||||
hw_thread_index = 0;
|
||||
pu = NULL;
|
||||
nActiveThreads = 0;
|
||||
while (pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu)) {
|
||||
int index = depth - 1;
|
||||
bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask);
|
||||
Address hw_thread(depth);
|
||||
if (included) {
|
||||
hw_thread.labels[index] = pu->logical_index;
|
||||
__kmp_pu_os_idx[hw_thread_index] = pu->os_index;
|
||||
index--;
|
||||
nActiveThreads++;
|
||||
}
|
||||
obj = pu;
|
||||
prev = obj;
|
||||
while (obj != root && obj != NULL) {
|
||||
obj = obj->parent;
|
||||
#if HWLOC_API_VERSION >= 0x00020000
|
||||
// NUMA Nodes are handled differently since they are not within the
|
||||
// parent/child structure anymore. They are separate children
|
||||
// of obj (memory_first_child points to first memory child)
|
||||
if (obj->memory_arity) {
|
||||
hwloc_obj_t memory;
|
||||
for (memory = obj->memory_first_child; memory;
|
||||
memory = hwloc_get_next_child(tp, obj, memory)) {
|
||||
if (memory->type == HWLOC_OBJ_NUMANODE)
|
||||
break;
|
||||
}
|
||||
if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
|
||||
sub_id = __kmp_hwloc_get_sub_id(tp, memory, prev);
|
||||
if (included) {
|
||||
hw_thread.labels[index] = memory->logical_index;
|
||||
hw_thread.labels[index + 1] = sub_id;
|
||||
index--;
|
||||
}
|
||||
prev = memory;
|
||||
}
|
||||
}
|
||||
if (n_active_nodes) { // were there any active nodes on the socket?
|
||||
++nPackages; // count total active packages
|
||||
if (n_active_nodes > nNodePerPkg)
|
||||
nNodePerPkg = n_active_nodes; // calc maximum
|
||||
}
|
||||
} else {
|
||||
if (__kmp_tile_depth) {
|
||||
// no NUMA, tiles
|
||||
int NT;
|
||||
int n_active_tiles = 0;
|
||||
tile = NULL;
|
||||
NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth,
|
||||
&tile);
|
||||
for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
|
||||
labels[1] = tl_id;
|
||||
int n_active_cores = 0;
|
||||
__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
|
||||
n_active_cores, tile, 2, labels);
|
||||
if (n_active_cores) { // were there any active cores on the socket?
|
||||
++n_active_tiles; // count active tiles per package
|
||||
if (n_active_cores > nCorePerTile)
|
||||
nCorePerTile = n_active_cores; // calc maximum
|
||||
}
|
||||
}
|
||||
if (n_active_tiles) { // were there any active tiles on the socket?
|
||||
++nPackages; // count total active packages
|
||||
if (n_active_tiles > nTilePerPkg)
|
||||
nTilePerPkg = n_active_tiles; // calc maximum
|
||||
}
|
||||
} else {
|
||||
// no NUMA, no tiles
|
||||
int n_active_cores = 0;
|
||||
__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores,
|
||||
socket, 1, labels);
|
||||
if (n_active_cores) { // were there any active cores on the socket?
|
||||
++nPackages; // count total active packages
|
||||
if (n_active_cores > nCoresPerPkg)
|
||||
nCoresPerPkg = n_active_cores; // calc maximum
|
||||
}
|
||||
#endif
|
||||
type = __kmp_hwloc_type_2_topology_type(obj);
|
||||
if (type != KMP_HW_UNKNOWN) {
|
||||
sub_id = __kmp_hwloc_get_sub_id(tp, obj, prev);
|
||||
if (included) {
|
||||
hw_thread.labels[index] = obj->logical_index;
|
||||
hw_thread.labels[index + 1] = sub_id;
|
||||
index--;
|
||||
}
|
||||
prev = obj;
|
||||
}
|
||||
}
|
||||
if (included) {
|
||||
retval[hw_thread_index] = AddrUnsPair(hw_thread, pu->os_index);
|
||||
hw_thread_index++;
|
||||
}
|
||||
}
|
||||
|
||||
// If there's only one thread context to bind to, return now.
|
||||
@ -763,54 +856,86 @@ static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
|
||||
qsort(retval, nActiveThreads, sizeof(*retval),
|
||||
__kmp_affinity_cmp_Address_labels);
|
||||
|
||||
// Check to see if the machine topology is uniform
|
||||
int nPUs = nPackages * __kmp_nThreadsPerCore;
|
||||
if (__kmp_numa_detected) {
|
||||
if (__kmp_tile_depth) { // NUMA + tiles
|
||||
nPUs *= (nNodePerPkg * nTilePerNode * nCorePerTile);
|
||||
} else { // NUMA, no tiles
|
||||
nPUs *= (nNodePerPkg * nCorePerNode);
|
||||
}
|
||||
} else {
|
||||
if (__kmp_tile_depth) { // no NUMA, tiles
|
||||
nPUs *= (nTilePerPkg * nCorePerTile);
|
||||
} else { // no NUMA, no tiles
|
||||
nPUs *= nCoresPerPkg;
|
||||
}
|
||||
// Find any levels with radiix 1, and remove them from the map
|
||||
// (except for the package level).
|
||||
depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
|
||||
types);
|
||||
|
||||
__kmp_affinity_gather_enumeration_information(retval, nActiveThreads, depth,
|
||||
types, ratio, count);
|
||||
|
||||
for (int level = 0; level < depth; ++level) {
|
||||
if ((types[level] == KMP_HW_L2 || types[level] == KMP_HW_L3))
|
||||
__kmp_tile_depth = level;
|
||||
}
|
||||
unsigned uniform = (nPUs == nActiveThreads);
|
||||
|
||||
// This routine should set __kmp_ncores, as well as
|
||||
// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
|
||||
int thread_level, core_level, tile_level, numa_level, socket_level;
|
||||
thread_level = core_level = tile_level = numa_level = socket_level = -1;
|
||||
for (int level = 0; level < depth; ++level) {
|
||||
if (types[level] == KMP_HW_THREAD)
|
||||
thread_level = level;
|
||||
else if (types[level] == KMP_HW_CORE)
|
||||
core_level = level;
|
||||
else if (types[level] == KMP_HW_SOCKET)
|
||||
socket_level = level;
|
||||
else if (types[level] == KMP_HW_TILE)
|
||||
tile_level = level;
|
||||
else if (types[level] == KMP_HW_NUMA)
|
||||
numa_level = level;
|
||||
}
|
||||
__kmp_nThreadsPerCore =
|
||||
__kmp_hwloc_calculate_ratio(ratio, thread_level, core_level);
|
||||
nCoresPerPkg = __kmp_hwloc_calculate_ratio(ratio, core_level, socket_level);
|
||||
if (socket_level >= 0)
|
||||
nPackages = count[socket_level];
|
||||
else
|
||||
nPackages = 1;
|
||||
if (core_level >= 0)
|
||||
__kmp_ncores = count[core_level];
|
||||
else
|
||||
__kmp_ncores = 1;
|
||||
|
||||
unsigned uniform = __kmp_affinity_discover_uniformity(depth, ratio, count);
|
||||
|
||||
// Print the machine topology summary.
|
||||
if (__kmp_affinity_verbose) {
|
||||
kmp_hw_t numerator_type, denominator_type;
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_str_buf_init(&buf);
|
||||
KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
|
||||
if (uniform) {
|
||||
KMP_INFORM(Uniform, "KMP_AFFINITY");
|
||||
} else {
|
||||
KMP_INFORM(NonUniform, "KMP_AFFINITY");
|
||||
}
|
||||
if (__kmp_numa_detected) {
|
||||
if (__kmp_tile_depth) { // NUMA + tiles
|
||||
KMP_INFORM(TopologyExtraNoTi, "KMP_AFFINITY", nPackages, nNodePerPkg,
|
||||
nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore,
|
||||
__kmp_ncores);
|
||||
} else { // NUMA, no tiles
|
||||
KMP_INFORM(TopologyExtraNode, "KMP_AFFINITY", nPackages, nNodePerPkg,
|
||||
nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores);
|
||||
nPUs *= (nNodePerPkg * nCorePerNode);
|
||||
}
|
||||
} else {
|
||||
if (__kmp_tile_depth) { // no NUMA, tiles
|
||||
KMP_INFORM(TopologyExtraTile, "KMP_AFFINITY", nPackages, nTilePerPkg,
|
||||
nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores);
|
||||
} else { // no NUMA, no tiles
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_str_buf_init(&buf);
|
||||
__kmp_str_buf_print(&buf, "%d", nPackages);
|
||||
KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
|
||||
__kmp_nThreadsPerCore, __kmp_ncores);
|
||||
__kmp_str_buf_free(&buf);
|
||||
|
||||
__kmp_str_buf_clear(&buf);
|
||||
|
||||
if (core_level < 0)
|
||||
core_level = depth - 1;
|
||||
int ncores = count[core_level];
|
||||
|
||||
denominator_type = KMP_HW_UNKNOWN;
|
||||
for (int level = 0; level < depth; ++level) {
|
||||
int c;
|
||||
bool plural;
|
||||
numerator_type = types[level];
|
||||
c = ratio[level];
|
||||
plural = (c > 1);
|
||||
if (level == 0) {
|
||||
__kmp_str_buf_print(&buf, "%d %s", c, __kmp_hw_get_catalog_string(
|
||||
numerator_type, plural));
|
||||
} else {
|
||||
__kmp_str_buf_print(&buf, " x %d %s/%s", c,
|
||||
__kmp_hw_get_catalog_string(numerator_type, plural),
|
||||
__kmp_hw_get_catalog_string(denominator_type));
|
||||
}
|
||||
denominator_type = numerator_type;
|
||||
}
|
||||
KMP_INFORM(TopologyGeneric, "KMP_AFFINITY", buf.str, ncores);
|
||||
__kmp_str_buf_free(&buf);
|
||||
}
|
||||
|
||||
if (__kmp_affinity_type == affinity_none) {
|
||||
@ -819,30 +944,27 @@ static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int depth_full = depth; // number of levels before compressing
|
||||
// Find any levels with radix 1, and remove them from the map
|
||||
// (except for the package level).
|
||||
depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
|
||||
levels);
|
||||
// Set the granularity level based on what levels are modeled
|
||||
// in the machine topology map.
|
||||
if (__kmp_affinity_gran == affinity_gran_node)
|
||||
__kmp_affinity_gran = affinity_gran_numa;
|
||||
KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default);
|
||||
if (__kmp_affinity_gran_levels < 0) {
|
||||
// Set the granularity level based on what levels are modeled
|
||||
// in the machine topology map.
|
||||
__kmp_affinity_gran_levels = 0; // lowest level (e.g. fine)
|
||||
if (__kmp_affinity_gran > affinity_gran_thread) {
|
||||
for (int i = 1; i <= depth_full; ++i) {
|
||||
if (__kmp_affinity_gran <= i) // only count deeper levels
|
||||
break;
|
||||
if (levels[depth_full - i] > 0)
|
||||
__kmp_affinity_gran_levels++;
|
||||
}
|
||||
}
|
||||
if (__kmp_affinity_gran > affinity_gran_package)
|
||||
__kmp_affinity_gran_levels++; // e.g. granularity = group
|
||||
if ((thread_level >= 0) && (__kmp_affinity_gran > affinity_gran_thread))
|
||||
__kmp_affinity_gran_levels++;
|
||||
if ((core_level >= 0) && (__kmp_affinity_gran > affinity_gran_core))
|
||||
__kmp_affinity_gran_levels++;
|
||||
if ((tile_level >= 0) && (__kmp_affinity_gran > affinity_gran_tile))
|
||||
__kmp_affinity_gran_levels++;
|
||||
if ((numa_level >= 0) && (__kmp_affinity_gran > affinity_gran_numa))
|
||||
__kmp_affinity_gran_levels++;
|
||||
if ((socket_level >= 0) && (__kmp_affinity_gran > affinity_gran_package))
|
||||
__kmp_affinity_gran_levels++;
|
||||
}
|
||||
|
||||
if (__kmp_affinity_verbose)
|
||||
__kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels);
|
||||
__kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, types);
|
||||
|
||||
KMP_CPU_FREE(oldMask);
|
||||
*address2os = retval;
|
||||
|
Loading…
x
Reference in New Issue
Block a user