[OpenMP][libomp] Introduce oneAPI compiler support

Introduce KMP_COMPILER_ICX macro to represent compilation with oneAPI
compiler.

Fixup flag detection and compiler ID detection in CMake. Older CMake's
detect IntelLLVM as Clang.

Fix compiler warnings.

Fixup many of the tests to have non-empty parallel regions as they are
elided by oneAPI compiler.
This commit is contained in:
Jonathan Peyton 2022-01-31 10:04:49 -06:00
parent aabf6e65fd
commit 1234011b80
35 changed files with 210 additions and 135 deletions

View File

@ -10,6 +10,7 @@ function(write_compiler_information lang)
set(information "${information}\\;${CMAKE_${lang}_COMPILER_VERSION}")
set(information "${information}\\;${${lang}_FLAGS}")
set(information "${information}\\;${${lang}_HAS_TSAN_FLAG}")
set(information "${information}\\;${${lang}_HAS_OMIT_FRAME_POINTER}")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${lang}CompilerInformation.txt ${information})
endfunction(write_compiler_information)
@ -40,6 +41,9 @@ if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
add_experimental_isel_flag(CXX)
endif()
check_c_compiler_flag("-fno-omit-frame-pointer" C_HAS_OMIT_FRAME_POINTER)
check_cxx_compiler_flag("-fno-omit-frame-pointer" CXX_HAS_OMIT_FRAME_POINTER)
SET(CMAKE_REQUIRED_FLAGS "-fsanitize=thread")
check_c_compiler_flag("" C_HAS_TSAN_FLAG)
check_cxx_compiler_flag("" CXX_HAS_TSAN_FLAG)

View File

@ -76,12 +76,14 @@ macro(extract_test_compiler_information lang file)
list(GET information 2 version)
list(GET information 3 openmp_flags)
list(GET information 4 has_tsan_flags)
list(GET information 5 has_omit_frame_pointer_flags)
set(OPENMP_TEST_${lang}_COMPILER_PATH ${path})
set(OPENMP_TEST_${lang}_COMPILER_ID ${id})
set(OPENMP_TEST_${lang}_COMPILER_VERSION ${version})
set(OPENMP_TEST_${lang}_COMPILER_OPENMP_FLAGS ${openmp_flags})
set(OPENMP_TEST_${lang}_COMPILER_HAS_TSAN_FLAGS ${has_tsan_flags})
set(OPENMP_TEST_${lang}_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS ${has_omit_frame_pointer_flags})
endmacro()
# Function to set variables with information about the test compiler.
@ -98,6 +100,7 @@ function(set_test_compiler_information dir)
set(OPENMP_TEST_COMPILER_VERSION "${OPENMP_TEST_C_COMPILER_VERSION}" PARENT_SCOPE)
set(OPENMP_TEST_COMPILER_OPENMP_FLAGS "${OPENMP_TEST_C_COMPILER_OPENMP_FLAGS}" PARENT_SCOPE)
set(OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS "${OPENMP_TEST_C_COMPILER_HAS_TSAN_FLAGS}" PARENT_SCOPE)
set(OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS "${OPENMP_TEST_C_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS}" PARENT_SCOPE)
# Determine major version.
string(REGEX MATCH "[0-9]+" major "${OPENMP_TEST_C_COMPILER_VERSION}")
@ -149,6 +152,7 @@ else()
endif()
# TODO: Implement blockaddress in GlobalISel and remove this flag!
set(OPENMP_TEST_COMPILER_OPENMP_FLAGS "-fopenmp ${OPENMP_TEST_COMPILER_THREAD_FLAGS} -fno-experimental-isel")
set(OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS 1)
endif()
# Function to set compiler features for use in lit.

View File

@ -1,4 +1,20 @@
include(CheckCXXCompilerFlag)
include(CheckCXXSourceCompiles)
# Check for oneAPI compiler (some older CMake versions detect as Clang)
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
check_cxx_source_compiles("#if (defined(__INTEL_CLANG_COMPILER) || defined(__INTEL_LLVM_COMPILER))
int main() { return 0; }
#else
not oneAPI
#endif" OPENMP_HAVE_ONEAPI_COMPILER)
if (OPENMP_HAVE_ONEAPI_COMPILER)
# According to CMake documentation, the compiler id should
# be IntelLLVM when detected oneAPI
set(CMAKE_C_COMPILER_ID "IntelLLVM")
set(CMAKE_CXX_COMPILER_ID "IntelLLVM")
endif()
endif()
check_cxx_compiler_flag(-Wall OPENMP_HAVE_WALL_FLAG)
check_cxx_compiler_flag(-Werror OPENMP_HAVE_WERROR_FLAG)

View File

@ -25,7 +25,7 @@ function(libomp_check_linker_flag flag boolean)
add_library(foo SHARED src_to_link.c)")
# Compiling as a part of runtimes introduces ARCH-unknown-linux-gnu as a part
# of a working directory. So adding a guard for unknown.
set(failed_regexes "[Ee]rror;[Uu]nknown[^-];[Ss]kipping;LINK : warning")
set(failed_regexes "[Ee]rror;[Uu]nknown[^-];[Ss]kipping;LINK : warning;Unsupported command line")
set(base_dir ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/link_flag_check_${boolean})
file(MAKE_DIRECTORY ${base_dir})
file(MAKE_DIRECTORY ${base_dir}/build)

View File

@ -139,7 +139,7 @@ elseif(NOT APPLE)
endif()
# Check Intel(R) C Compiler specific flags
if(CMAKE_C_COMPILER_ID STREQUAL "Intel")
if(CMAKE_C_COMPILER_ID STREQUAL "Intel" OR CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM")
check_cxx_compiler_flag(/Qlong_double LIBOMP_HAVE_LONG_DOUBLE_FLAG)
check_cxx_compiler_flag(/Qdiag-disable:177 LIBOMP_HAVE_DIAG_DISABLE_177_FLAG)
check_cxx_compiler_flag(/Qinline-min-size=1 LIBOMP_HAVE_INLINE_MIN_SIZE_FLAG)
@ -247,7 +247,7 @@ libomp_check_version_symbols(LIBOMP_HAVE_VERSION_SYMBOLS)
# Check if quad precision types are available
if(CMAKE_C_COMPILER_ID STREQUAL "GNU")
set(LIBOMP_HAVE_QUAD_PRECISION TRUE)
elseif(CMAKE_C_COMPILER_ID STREQUAL "Intel")
elseif(CMAKE_C_COMPILER_ID STREQUAL "Intel" OR CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM")
if(LIBOMP_HAVE_EXTENDED_FLOAT_TYPES_FLAG)
set(LIBOMP_HAVE_QUAD_PRECISION TRUE)
else()

View File

@ -1124,7 +1124,7 @@ extern void __kmp_init_target_mem();
#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
// HW TSC is used to reduce overhead (clock tick instead of nanosecond).
extern kmp_uint64 __kmp_ticks_per_msec;
#if KMP_COMPILER_ICC
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
#define KMP_NOW() ((kmp_uint64)_rdtsc())
#else
#define KMP_NOW() __kmp_hardware_timestamp()

View File

@ -1765,7 +1765,7 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
hw_thread_index = 0;
pu = NULL;
while (pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu)) {
while ((pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu))) {
int index = depth - 1;
bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask);
kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);

View File

@ -2452,6 +2452,7 @@ ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
RTYPE, LCK_ID, MASK, GOMP_FLAG) \
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
TYPE new_value; \
(void)new_value; \
OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
OP_CMPXCHG_CPT(TYPE, BITS, OP) \
}
@ -2461,6 +2462,7 @@ ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
LCK_ID, GOMP_FLAG) \
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
TYPE new_value; \
(void)new_value; \
OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
}
@ -3162,6 +3164,7 @@ ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
RTYPE, LCK_ID, MASK, GOMP_FLAG) \
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
TYPE new_value; \
(void)new_value; \
OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
}
@ -3171,6 +3174,7 @@ ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
LCK_ID, GOMP_FLAG) \
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
TYPE new_value; \
(void)new_value; \
OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
}

View File

@ -251,6 +251,9 @@ struct KMP_DO_ALIGN(4) kmp_cmplx128_a4_t {
kmp_cmplx128_a4_t() : q() {}
#if defined(__cplusplus) && (KMP_OS_WINDOWS)
kmp_cmplx128_a4_t(const std::complex<_Quad> &c128) : q(c128) {}
#endif
kmp_cmplx128_a4_t(const kmp_cmplx128 &c128) : q(c128) {}
kmp_cmplx128_a4_t operator+(const kmp_cmplx128_a4_t &b) {
@ -314,6 +317,9 @@ struct KMP_DO_ALIGN(16) kmp_cmplx128_a16_t {
kmp_cmplx128_a16_t() : q() {}
#if defined(__cplusplus) && (KMP_OS_WINDOWS)
kmp_cmplx128_a16_t(const std::complex<_Quad> &c128) : q(c128) {}
#endif
kmp_cmplx128_a16_t(const kmp_cmplx128 &c128) : q(c128) {}
kmp_cmplx128_a16_t operator+(const kmp_cmplx128_a16_t &b) {

View File

@ -2163,7 +2163,6 @@ void __kmp_join_barrier(int gtid) {
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team;
kmp_uint nproc;
int tid;
#ifdef KMP_DEBUG
int team_id;
@ -2176,12 +2175,14 @@ void __kmp_join_barrier(int gtid) {
itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
#endif
#endif /* USE_ITT_BUILD */
#if ((USE_ITT_BUILD && USE_ITT_NOTIFY) || defined KMP_DEBUG)
int nproc = this_thr->th.th_team_nproc;
#endif
KMP_MB();
// Get current info
team = this_thr->th.th_team;
nproc = this_thr->th.th_team_nproc;
KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc);
KMP_DEBUG_ASSERT(nproc == team->t.t_nproc);
tid = __kmp_tid_from_gtid(gtid);
#ifdef KMP_DEBUG
team_id = team->t.t_id;
@ -2354,7 +2355,7 @@ void __kmp_join_barrier(int gtid) {
// Set arrive time to zero to be able to check it in
// __kmp_invoke_task(); the same is done inside the loop below
this_thr->th.th_bar_arrive_time = 0;
for (kmp_uint i = 1; i < nproc; ++i) {
for (int i = 1; i < nproc; ++i) {
delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
other_threads[i]->th.th_bar_arrive_time = 0;
}

View File

@ -685,13 +685,13 @@ void __kmpc_flush(ident_t *loc) {
if (!__kmp_cpuinfo.flags.sse2) {
// CPU cannot execute SSE2 instructions.
} else {
#if KMP_COMPILER_ICC
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
_mm_mfence();
#elif KMP_COMPILER_MSVC
MemoryBarrier();
#else
__sync_synchronize();
#endif // KMP_COMPILER_ICC
#endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
}
#endif // KMP_MIC
#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \

View File

@ -226,16 +226,16 @@ kmp_omp_struct_info_t __kmp_omp_debug_struct_info = {
when 64-bit value is assigned to 32-bit pointer. Use this function
to suppress the warning. */
static inline void *__kmp_convert_to_ptr(kmp_uint64 addr) {
#if KMP_COMPILER_ICC
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
#pragma warning(push)
#pragma warning(disable : 810) // conversion from "unsigned long long" to "char
// *" may lose significant bits
#pragma warning(disable : 1195) // conversion from integer to smaller pointer
#endif // KMP_COMPILER_ICC
#endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
return (void *)addr;
#if KMP_COMPILER_ICC
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
#pragma warning(pop)
#endif // KMP_COMPILER_ICC
#endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
} // __kmp_convert_to_ptr
static int kmp_location_match(kmp_str_loc_t *loc, kmp_omp_nthr_item_t *item) {

View File

@ -1954,7 +1954,7 @@ static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
// We need a fence here, since we must ensure that no memory operations
// from later in this thread float above that read.
#if KMP_COMPILER_ICC
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
_mm_mfence();
#else
__sync_synchronize();

View File

@ -53,8 +53,12 @@
#define KMP_COMPILER_GCC 0
#define KMP_COMPILER_CLANG 0
#define KMP_COMPILER_MSVC 0
#define KMP_COMPILER_ICX 0
#if defined(__INTEL_COMPILER)
#if __INTEL_CLANG_COMPILER
#undef KMP_COMPILER_ICX
#define KMP_COMPILER_ICX 1
#elif defined(__INTEL_COMPILER)
#undef KMP_COMPILER_ICC
#define KMP_COMPILER_ICC 1
#elif defined(__clang__)
@ -85,7 +89,7 @@
/* Check for quad-precision extension. */
#define KMP_HAVE_QUAD 0
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
#if KMP_COMPILER_ICC
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
/* _Quad is already defined for icc */
#undef KMP_HAVE_QUAD
#define KMP_HAVE_QUAD 1
@ -448,8 +452,10 @@ enum kmp_mem_fence_type {
#pragma intrinsic(InterlockedExchangeAdd)
#pragma intrinsic(InterlockedCompareExchange)
#pragma intrinsic(InterlockedExchange)
#if !(KMP_COMPILER_ICX && KMP_32_BIT_ARCH)
#pragma intrinsic(InterlockedExchange64)
#endif
#endif
// Using InterlockedIncrement / InterlockedDecrement causes a library loading
// ordering problem, so we use InterlockedExchangeAdd instead.
@ -842,8 +848,14 @@ static inline bool mips_sync_val_compare_and_swap(volatile kmp_uint64 *p,
(kmp_uint64)(sv))
#endif
#if KMP_OS_DARWIN && defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1800
#define KMP_XCHG_FIXED8(p, v) \
__atomic_exchange_1((volatile kmp_uint8 *)(p), (kmp_uint8)(v), \
__ATOMIC_SEQ_CST)
#else
#define KMP_XCHG_FIXED8(p, v) \
__sync_lock_test_and_set((volatile kmp_uint8 *)(p), (kmp_uint8)(v))
#endif
#define KMP_XCHG_FIXED16(p, v) \
__sync_lock_test_and_set((volatile kmp_uint16 *)(p), (kmp_uint16)(v))
#define KMP_XCHG_FIXED32(p, v) \
@ -1026,7 +1038,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
#if KMP_COMPILER_ICC
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
#define KMP_MFENCE_() _mm_mfence()
#define KMP_SFENCE_() _mm_sfence()
#elif KMP_COMPILER_MSVC

View File

@ -8953,19 +8953,16 @@ void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
}
// Release all the workers
kmp_uint64 new_value; // new value for go
new_value = team->t.b->go_release();
team->t.b->go_release();
KMP_MFENCE();
// Workers should see transition status 2 and move to 0; but may need to be
// woken up first
size_t my_go_index;
int count = old_nthreads - 1;
while (count > 0) {
count = old_nthreads - 1;
for (int f = 1; f < old_nthreads; ++f) {
my_go_index = f / team->t.b->threads_per_go;
if (other_threads[f]->th.th_used_in_team.load() != 0) {
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers
kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(

View File

@ -159,11 +159,11 @@ void *kmp_malloc(size_t size) {
}
void *kmp_aligned_malloc(size_t sz, size_t a) {
i;
int err;
void *res;
#if KMP_OS_WINDOWS
res = _aligned_malloc(sz, a);
#else
int err;
if ((err = posix_memalign(&res, a, sz))) {
errno = err; // can be EINVAL or ENOMEM
res = NULL;
@ -393,12 +393,12 @@ void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
void *omp_aligned_alloc(size_t a, size_t size, omp_allocator_handle_t al) {
i;
int err;
void *res;
#if KMP_OS_WINDOWS
res = _aligned_malloc(size, a);
#else
if (err = posix_memalign(&res, a, size)) {
int err;
if ((err = posix_memalign(&res, a, size))) {
errno = err; // can be EINVAL or ENOMEM
res = NULL;
}
@ -420,12 +420,12 @@ void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t al) {
void *omp_aligned_calloc(size_t a, size_t nmemb, size_t size,
omp_allocator_handle_t al) {
i;
int err;
void *res;
#if KMP_OS_WINDOWS
res = _aligned_recalloc(NULL, nmemb, size, a);
#else
if (err = posix_memalign(&res, a, nmemb * size)) {
int err;
if ((err = posix_memalign(&res, a, nmemb * size))) {
errno = err; // can be EINVAL or ENOMEM
res = NULL;
}

View File

@ -22,7 +22,9 @@
#define stringer(x) _stringer(x)
// Detect compiler.
#if KMP_COMPILER_ICC
#if KMP_COMPILER_ICX
#define KMP_COMPILER __VERSION__
#elif KMP_COMPILER_ICC
#if __INTEL_COMPILER == 1010
#define KMP_COMPILER "Intel(R) C++ Compiler 10.1"
#elif __INTEL_COMPILER == 1100
@ -53,8 +55,10 @@
#define KMP_COMPILER "Intel(R) C++ Compiler 19.0"
#elif __INTEL_COMPILER == 1910
#define KMP_COMPILER "Intel(R) C++ Compiler 19.1"
#elif __INTEL_COMPILER >= 9900
#define KMP_COMPILER "Intel(R) C++ Compiler mainline"
#elif __INTEL_COMPILER > 1910
#define KMP_COMPILER \
"Intel(R) C++ Compiler Classic " stringer(__INTEL_COMPILER) "." stringer( \
__INTEL_COMPILER_UPDATE)
#endif
#elif KMP_COMPILER_CLANG
#define KMP_COMPILER \

View File

@ -310,7 +310,8 @@ ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) {
OMPT_VERBOSE_INIT_PRINT("Opening %s... ", fname);
HMODULE h = LoadLibrary(fname);
if (!h) {
OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: Error %u\n", GetLastError());
OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: Error %u\n",
(unsigned)GetLastError());
} else {
OMPT_VERBOSE_INIT_CONTINUED_PRINT("Success. \n");
OMPT_VERBOSE_INIT_PRINT("Searching for ompt_start_tool in %s... ",
@ -318,7 +319,7 @@ ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) {
start_tool = (ompt_start_tool_t)GetProcAddress(h, "ompt_start_tool");
if (!start_tool) {
OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: Error %u\n",
GetLastError());
(unsigned)GetLastError());
} else
#else
#error Activation of OMPT is not supported on this platform.

View File

@ -568,7 +568,8 @@ void __kmp_gtid_set_specific(int gtid) {
if (__kmp_init_gtid) {
KA_TRACE(50, ("__kmp_gtid_set_specific: T#%d key:%d\n", gtid,
__kmp_gtid_threadprivate_key));
if (!TlsSetValue(__kmp_gtid_threadprivate_key, (LPVOID)(gtid + 1)))
kmp_intptr_t g = (kmp_intptr_t)gtid;
if (!TlsSetValue(__kmp_gtid_threadprivate_key, (LPVOID)(g + 1)))
KMP_FATAL(TLSSetValueFailed);
} else {
KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n"));
@ -934,9 +935,8 @@ void __kmp_terminate_thread(int gtid) {
}
void __kmp_clear_system_time(void) {
BOOL status;
LARGE_INTEGER time;
status = QueryPerformanceCounter(&time);
QueryPerformanceCounter(&time);
__kmp_win32_time = (kmp_int64)time.QuadPart;
}
@ -960,9 +960,8 @@ void __kmp_initialize_system_tick(void) {
/* Calculate the elapsed wall clock time for the user */
void __kmp_elapsed(double *t) {
BOOL status;
LARGE_INTEGER now;
status = QueryPerformanceCounter(&now);
QueryPerformanceCounter(&now);
*t = ((double)now.QuadPart) * __kmp_win32_tick;
}
@ -972,11 +971,8 @@ void __kmp_elapsed_tick(double *t) { *t = __kmp_win32_tick; }
void __kmp_read_system_time(double *delta) {
if (delta != NULL) {
BOOL status;
LARGE_INTEGER now;
status = QueryPerformanceCounter(&now);
QueryPerformanceCounter(&now);
*delta = ((double)(((kmp_int64)now.QuadPart) - __kmp_win32_time)) *
__kmp_win32_tick;
}
@ -1020,6 +1016,7 @@ extern "C" void *__stdcall __kmp_launch_worker(void *arg) {
if (__kmp_stkoffset > 0 && gtid > 0) {
padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
(void)padding;
}
KMP_FSYNC_RELEASING(&this_thr->th.th_info.ds.ds_alive);
@ -1354,9 +1351,10 @@ static void __kmp_reap_common(kmp_info_t *th) {
/* NOTE: The ExitProcess(code) system call causes all threads to Terminate
with a exit_val = code. Because of this we can not rely on exit_val having
any particular value. */
kmp_intptr_t e = (kmp_intptr_t)exit_val;
if (exit_val == STILL_ACTIVE) {
KA_TRACE(1, ("__kmp_reap_common: thread still active.\n"));
} else if ((void *)exit_val != (void *)th) {
} else if ((void *)e != (void *)th) {
KA_TRACE(1, ("__kmp_reap_common: ExitProcess / TerminateThread used?\n"));
}
@ -1519,13 +1517,12 @@ void __kmp_thread_sleep(int millis) {
// Determine whether the given address is mapped into the current address space.
int __kmp_is_address_mapped(void *addr) {
DWORD status;
MEMORY_BASIC_INFORMATION lpBuffer;
SIZE_T dwLength;
dwLength = sizeof(MEMORY_BASIC_INFORMATION);
status = VirtualQuery(addr, &lpBuffer, dwLength);
VirtualQuery(addr, &lpBuffer, dwLength);
return !(((lpBuffer.State == MEM_RESERVE) || (lpBuffer.State == MEM_FREE)) ||
((lpBuffer.Protect == PAGE_NOACCESS) ||

View File

@ -30,6 +30,7 @@ pythonize_bool(LIBOMP_OMPT_OPTIONAL)
pythonize_bool(LIBOMP_HAVE_LIBM)
pythonize_bool(LIBOMP_HAVE_LIBATOMIC)
pythonize_bool(OPENMP_STANDALONE_BUILD)
pythonize_bool(OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS)
add_library(ompt-print-callback INTERFACE)
target_include_directories(ompt-print-callback INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/ompt)

View File

@ -3,30 +3,22 @@
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "omp_testsuite.h"
int main(int argc, char** argv) {
omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N");
// should print all for first parallel
omp_set_num_threads(4);
#pragma omp parallel
{ }
go_parallel_nthreads(4);
// should print all because of new threads
omp_set_num_threads(8);
#pragma omp parallel
{ }
go_parallel_nthreads(8);
// should not print anything here
omp_set_num_threads(6);
#pragma omp parallel
{ }
go_parallel_nthreads(6);
// should print all because of new thread
omp_set_num_threads(9);
#pragma omp parallel
{ }
go_parallel_nthreads(9);
// should not print anything here
omp_set_num_threads(2);
#pragma omp parallel
{ }
return 0;
go_parallel_nthreads(2);
return get_exit_value();
}
// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4

View File

@ -4,16 +4,16 @@
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "omp_testsuite.h"
int main(int argc, char** argv) {
omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
omp_set_nested(1);
#pragma omp parallel num_threads(4)
{
#pragma omp parallel num_threads(3)
{ }
go_parallel_nthreads(3);
}
return 0;
return get_exit_value();
}
// CHECK: num_threads=4 TESTER: tl:1 at:0 tn:[0-3] nt:4

View File

@ -3,6 +3,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "omp_testsuite.h"
// Currently, KMP_HOT_TEAMS_MAX_LEVEL has to be equal to the
// nest depth for intuitive behavior
@ -11,14 +12,11 @@ int main(int argc, char** argv) {
omp_set_nested(1);
#pragma omp parallel num_threads(4)
{
#pragma omp parallel num_threads(3)
{ }
#pragma omp parallel num_threads(3)
{ }
go_parallel_nthreads(3);
go_parallel_nthreads(3);
}
#pragma omp parallel num_threads(4)
{ }
return 0;
go_parallel_nthreads(4);
return get_exit_value();
}
// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4

View File

@ -3,30 +3,26 @@
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "omp_testsuite.h"
int main(int argc, char** argv) {
omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
omp_set_nested(1);
#pragma omp parallel num_threads(1)
{
#pragma omp parallel num_threads(2)
{ }
go_parallel_nthreads(2);
#pragma omp parallel num_threads(2)
{
#pragma omp parallel num_threads(1)
{
#pragma omp parallel num_threads(2)
{ }
go_parallel_nthreads(2);
}
}
#pragma omp parallel num_threads(1)
{ }
go_parallel_nthreads(1);
}
#pragma omp parallel num_threads(2)
{ }
#pragma omp parallel num_threads(1)
{ }
return 0;
go_parallel_nthreads(2);
go_parallel_nthreads(1);
return get_exit_value();
}
// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1

View File

@ -3,29 +3,24 @@
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "omp_testsuite.h"
int main(int argc, char** argv) {
omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
omp_set_nested(1);
#pragma omp parallel num_threads(1)
{
#pragma omp parallel num_threads(1)
{ }
#pragma omp parallel num_threads(1)
{ }
go_parallel_nthreads(1);
go_parallel_nthreads(1);
#pragma omp parallel num_threads(1)
{
#pragma omp parallel num_threads(1)
{ }
go_parallel_nthreads(1);
}
#pragma omp parallel num_threads(1)
{ }
go_parallel_nthreads(1);
}
#pragma omp parallel num_threads(1)
{ }
#pragma omp parallel num_threads(1)
{ }
return 0;
go_parallel_nthreads(1);
go_parallel_nthreads(1);
return get_exit_value();
}
// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1

View File

@ -4,26 +4,21 @@
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "omp_testsuite.h"
int main(int argc, char** argv) {
omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N aff:{%A}");
omp_set_num_threads(8);
// Initial parallel
#pragma omp parallel proc_bind(spread)
{ }
#pragma omp parallel proc_bind(spread)
{ }
go_parallel_spread();
go_parallel_spread();
// Affinity changes here
#pragma omp parallel proc_bind(close)
{ }
#pragma omp parallel proc_bind(close)
{ }
go_parallel_close();
go_parallel_close();
// Affinity changes here
#pragma omp parallel proc_bind(master)
{ }
#pragma omp parallel proc_bind(master)
{ }
return 0;
go_parallel_master();
go_parallel_master();
return get_exit_value();
}
// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 aff:

View File

@ -9,14 +9,13 @@
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "omp_testsuite.h"
int main(int argc, char** argv) {
omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N");
#pragma omp parallel
{ }
#pragma omp parallel
{ }
return 0;
go_parallel();
go_parallel();
return get_exit_value();
}
// NOTHING: NO_OUTPUT

View File

@ -4,13 +4,12 @@
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "omp_testsuite.h"
int main(int argc, char** argv) {
#pragma omp parallel
{ }
#pragma omp parallel
{ }
return 0;
go_parallel();
go_parallel();
return get_exit_value();
}
// CHECK-8: num_threads=8 TESTER-ENV: tl:1 tn:[0-7] nt:8$

View File

@ -4,8 +4,9 @@
// CHECK-SAME: cores
// REQUIRES: affinity
#include "omp_testsuite.h"
int main() {
#pragma omp parallel
{}
return 0;
go_parallel();
return get_exit_value();
}

View File

@ -45,9 +45,13 @@ config.test_format = lit.formats.ShTest()
flags = " -I " + config.test_source_root + \
" -L " + config.library_dir + \
" " + config.test_extra_flags
if config.has_omit_frame_pointer_flag:
flags += " -fno-omit-frame-pointer"
config.test_flags = " -I " + config.omp_header_directory + flags
config.test_flags_use_compiler_omp_h = flags
# extra libraries
libs = ""
if config.has_libm:

View File

@ -17,6 +17,7 @@ config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_OPTIONAL@
config.has_libm = @LIBOMP_HAVE_LIBM@
config.has_libatomic = @LIBOMP_HAVE_LIBATOMIC@
config.is_standalone_build = @OPENMP_STANDALONE_BUILD@
config.has_omit_frame_pointer_flag = @OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS@
# Let the main config do the real work.
lit_config.load_config(config, "@LIBOMP_BASE_DIR@/test/lit.cfg")

View File

@ -20,6 +20,60 @@
#define NUM_TASKS 25
#define MAX_TASKS_PER_THREAD 5
// Functions that call a parallel region that does very minimal work
// Some compilers may optimize away an empty parallel region
volatile int g_counter__;
// If nthreads == 0, then do not use num_threads() clause
static void go_parallel() {
g_counter__ = 0;
#pragma omp parallel
{
#pragma omp atomic
g_counter__++;
}
}
static void go_parallel_nthreads(int nthreads) {
g_counter__ = 0;
#pragma omp parallel num_threads(nthreads)
{
#pragma omp atomic
g_counter__++;
}
}
static void go_parallel_spread() {
g_counter__ = 0;
#pragma omp parallel proc_bind(spread)
{
#pragma omp atomic
g_counter__++;
}
}
static void go_parallel_close() {
g_counter__ = 0;
#pragma omp parallel proc_bind(close)
{
#pragma omp atomic
g_counter__++;
}
}
static void go_parallel_master() {
g_counter__ = 0;
#pragma omp parallel proc_bind(master)
{
#pragma omp atomic
g_counter__++;
}
}
static inline int get_exit_value() {
return ((g_counter__ == -1) ? EXIT_FAILURE : EXIT_SUCCESS);
}
#ifdef _WIN32
// Windows versions of pthread_create() and pthread_join()
# include <windows.h>

View File

@ -1,16 +1,16 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include "omp_testsuite.h"
int main() {
#pragma omp parallel num_threads(2)
{}
go_parallel_nthreads(2);
printf("Before ompt_finalize_tool\n");
ompt_finalize_tool();
printf("After ompt_finalize_tool\n");
return 0;
return get_exit_value();
}
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]

View File

@ -1,19 +1,14 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
#include "callback.h"
#include "omp_testsuite.h"
#include <omp.h>
int main()
{
#pragma omp parallel num_threads(1)
{
}
go_parallel_nthreads(1);
ompt_set_callback(ompt_callback_parallel_begin, NULL);
#pragma omp parallel num_threads(1)
{
}
go_parallel_nthreads(1);
// Check if libomp supports the callbacks for this test.
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_idle'
@ -25,5 +20,5 @@ int main()
// CHECK-NOT: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin:
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_parallel_end:
return 0;
return get_exit_value();
}

View File

@ -8,8 +8,7 @@
// XFAIL: icc
// support for taskwait with depend clause introduced in clang-14
// UNSUPPORTED: clang-5, clang-6, clang-6, clang-8, clang-9, clang-10, clang-11,
// clang-12, clang-13
// UNSUPPORTED: clang-5, clang-6, clang-6, clang-8, clang-9, clang-10, clang-11, clang-12, clang-13
#include "callback.h"
#include <omp.h>