mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-10-07 10:54:01 +00:00
[OpenMP][VE] Support OpenMP runtime on VE
Support OpenMP runtime library on VE. This patch makes OpenMP compilable for VE architecture. Almost all tests run correctly on VE. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D159401
This commit is contained in:
parent
52b4bec939
commit
18b6724355
@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD})
|
||||
# If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake
|
||||
libomp_get_architecture(LIBOMP_DETECTED_ARCH)
|
||||
set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING
|
||||
"The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64).")
|
||||
"The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64/ve).")
|
||||
# Should assertions be enabled? They are on by default.
|
||||
set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL
|
||||
"enable assertions?")
|
||||
@ -63,6 +63,8 @@ else() # Part of LLVM build
|
||||
set(LIBOMP_ARCH riscv64)
|
||||
elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64")
|
||||
set(LIBOMP_ARCH loongarch64)
|
||||
elseif(LIBOMP_NATIVE_ARCH MATCHES "ve")
|
||||
set(LIBOMP_ARCH ve)
|
||||
else()
|
||||
# last ditch effort
|
||||
libomp_get_architecture(LIBOMP_ARCH)
|
||||
@ -83,7 +85,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64)
|
||||
libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64 ve)
|
||||
|
||||
set(LIBOMP_LIB_TYPE normal CACHE STRING
|
||||
"Performance,Profiling,Stubs library (normal/profile/stubs)")
|
||||
@ -162,6 +164,7 @@ set(MIPS64 FALSE)
|
||||
set(MIPS FALSE)
|
||||
set(RISCV64 FALSE)
|
||||
set(LOONGARCH64 FALSE)
|
||||
set(VE FALSE)
|
||||
if("${LIBOMP_ARCH}" STREQUAL "i386" OR "${LIBOMP_ARCH}" STREQUAL "32") # IA-32 architecture
|
||||
set(IA32 TRUE)
|
||||
elseif("${LIBOMP_ARCH}" STREQUAL "x86_64" OR "${LIBOMP_ARCH}" STREQUAL "32e") # Intel(R) 64 architecture
|
||||
@ -188,6 +191,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture
|
||||
set(RISCV64 TRUE)
|
||||
elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture
|
||||
set(LOONGARCH64 TRUE)
|
||||
elseif("${LIBOMP_ARCH}" STREQUAL "ve") # VE architecture
|
||||
set(VE TRUE)
|
||||
endif()
|
||||
|
||||
# Set some flags based on build_type
|
||||
|
@ -49,6 +49,8 @@ function(libomp_get_architecture return_arch)
|
||||
#error ARCHITECTURE=riscv64
|
||||
#elif defined(__loongarch__) && __loongarch_grlen == 64
|
||||
#error ARCHITECTURE=loongarch64
|
||||
#elif defined(__ve__)
|
||||
#error ARCHITECTURE=ve
|
||||
#else
|
||||
#error ARCHITECTURE=UnknownArchitecture
|
||||
#endif
|
||||
|
@ -111,6 +111,8 @@ function(libomp_get_legal_arch return_arch_string)
|
||||
set(${return_arch_string} "RISCV64" PARENT_SCOPE)
|
||||
elseif(${LOONGARCH64})
|
||||
set(${return_arch_string} "LOONGARCH64" PARENT_SCOPE)
|
||||
elseif(${VE})
|
||||
set(${return_arch_string} "VE" PARENT_SCOPE)
|
||||
else()
|
||||
set(${return_arch_string} "${LIBOMP_ARCH}" PARENT_SCOPE)
|
||||
libomp_warning_say("libomp_get_legal_arch(): Warning: Unknown architecture: Using ${LIBOMP_ARCH}")
|
||||
|
@ -1170,6 +1170,10 @@ extern void __kmp_init_target_task();
|
||||
#elif KMP_ARCH_X86_64
|
||||
#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
|
||||
#define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
|
||||
#elif KMP_ARCH_VE
|
||||
// Minimum stack size for pthread for VE is 4MB.
|
||||
// https://www.hpc.nec/documents/veos/en/glibc/Difference_Points_glibc.htm
|
||||
#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
|
||||
#else
|
||||
#define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
|
||||
#endif
|
||||
|
@ -286,6 +286,17 @@ public:
|
||||
#elif __NR_sched_getaffinity != 123
|
||||
#error Wrong code for getaffinity system call.
|
||||
#endif /* __NR_sched_getaffinity */
|
||||
#elif KMP_ARCH_VE
|
||||
#ifndef __NR_sched_setaffinity
|
||||
#define __NR_sched_setaffinity 203
|
||||
#elif __NR_sched_setaffinity != 203
|
||||
#error Wrong code for setaffinity system call.
|
||||
#endif /* __NR_sched_setaffinity */
|
||||
#ifndef __NR_sched_getaffinity
|
||||
#define __NR_sched_getaffinity 204
|
||||
#elif __NR_sched_getaffinity != 204
|
||||
#error Wrong code for getaffinity system call.
|
||||
#endif /* __NR_sched_getaffinity */
|
||||
#else
|
||||
#error Unknown or unsupported architecture
|
||||
#endif /* KMP_ARCH_* */
|
||||
|
@ -178,7 +178,7 @@ typedef unsigned long long kmp_uint64;
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
|
||||
#define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
|
||||
#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
|
||||
#define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
|
||||
#else
|
||||
#error "Can't determine size_t printf format specifier."
|
||||
@ -1043,7 +1043,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
|
||||
#endif /* KMP_OS_WINDOWS */
|
||||
|
||||
#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
|
||||
#if KMP_OS_WINDOWS
|
||||
#undef KMP_MB
|
||||
#define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst)
|
||||
|
@ -93,6 +93,7 @@
|
||||
#define KMP_ARCH_MIPS64 0
|
||||
#define KMP_ARCH_RISCV64 0
|
||||
#define KMP_ARCH_LOONGARCH64 0
|
||||
#define KMP_ARCH_VE 0
|
||||
|
||||
#if KMP_OS_WINDOWS
|
||||
#if defined(_M_AMD64) || defined(__x86_64)
|
||||
@ -142,6 +143,9 @@
|
||||
#elif defined __loongarch__ && __loongarch_grlen == 64
|
||||
#undef KMP_ARCH_LOONGARCH64
|
||||
#define KMP_ARCH_LOONGARCH64 1
|
||||
#elif defined __ve__
|
||||
#undef KMP_ARCH_VE
|
||||
#define KMP_ARCH_VE 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -206,7 +210,7 @@
|
||||
// TODO: Fixme - This is clever, but really fugly
|
||||
#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \
|
||||
KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \
|
||||
KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64)
|
||||
KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64 + KMP_ARCH_VE)
|
||||
#error Unknown or unsupported architecture
|
||||
#endif
|
||||
|
||||
|
@ -8830,7 +8830,7 @@ __kmp_determine_reduction_method(
|
||||
int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
|
||||
|
||||
#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
|
||||
|
||||
#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
|
||||
KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
|
||||
|
@ -162,6 +162,10 @@
|
||||
#define ITT_ARCH_ARM64 6
|
||||
#endif /* ITT_ARCH_ARM64 */
|
||||
|
||||
#ifndef ITT_ARCH_VE
|
||||
#define ITT_ARCH_VE 8
|
||||
#endif /* ITT_ARCH_VE */
|
||||
|
||||
#ifndef ITT_ARCH
|
||||
#if defined _M_IX86 || defined __i386__
|
||||
#define ITT_ARCH ITT_ARCH_IA32
|
||||
@ -175,6 +179,8 @@
|
||||
#define ITT_ARCH ITT_ARCH_ARM64
|
||||
#elif defined __powerpc64__
|
||||
#define ITT_ARCH ITT_ARCH_PPC64
|
||||
#elif defined __ve__
|
||||
#define ITT_ARCH ITT_ARCH_VE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -2060,6 +2060,198 @@ __kmp_invoke_microtask:
|
||||
|
||||
#endif /* KMP_ARCH_LOONGARCH64 */
|
||||
|
||||
#if KMP_ARCH_VE
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// typedef void (*microtask_t)(int *gtid, int *tid, ...);
|
||||
//
|
||||
// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
|
||||
// void *p_argv[]
|
||||
// #if OMPT_SUPPORT
|
||||
// ,
|
||||
// void **exit_frame_ptr
|
||||
// #endif
|
||||
// ) {
|
||||
// #if OMPT_SUPPORT
|
||||
// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
// #endif
|
||||
//
|
||||
// (*pkfn)(>id, &tid, argv[0], ...);
|
||||
//
|
||||
// return 1;
|
||||
// }
|
||||
//
|
||||
// Parameters:
|
||||
// s0: pkfn
|
||||
// s1: gtid
|
||||
// s2: tid
|
||||
// s3: argc
|
||||
// s4: p_argv
|
||||
// s5: exit_frame_ptr
|
||||
//
|
||||
// Locals:
|
||||
// __gtid: gtid param pushed on stack so can pass >id to pkfn
|
||||
// __tid: tid param pushed on stack so can pass &tid to pkfn
|
||||
//
|
||||
// Temp. registers:
|
||||
//
|
||||
// s34: used to calculate the dynamic stack size
|
||||
// s35: used as temporary for stack placement calculation
|
||||
// s36: used as temporary for stack arguments
|
||||
// s37: used as temporary for number of remaining pkfn parms
|
||||
// s38: used to traverse p_argv array
|
||||
//
|
||||
// return: s0 (always 1/TRUE)
|
||||
//
|
||||
|
||||
__gtid = -4
|
||||
__tid = -8
|
||||
|
||||
// -- Begin __kmp_invoke_microtask
|
||||
// mark_begin;
|
||||
.text
|
||||
.globl __kmp_invoke_microtask
|
||||
// A function requires 8 bytes align.
|
||||
.p2align 3
|
||||
.type __kmp_invoke_microtask,@function
|
||||
__kmp_invoke_microtask:
|
||||
.cfi_startproc
|
||||
|
||||
// First, save fp and lr. VE stores them at caller stack frame.
|
||||
st %fp, 0(, %sp)
|
||||
st %lr, 8(, %sp)
|
||||
or %fp, 0, %sp
|
||||
.cfi_def_cfa %fp, 0
|
||||
.cfi_offset %lr, 8
|
||||
.cfi_offset %fp, 0
|
||||
|
||||
// Compute the dynamic stack size:
|
||||
//
|
||||
// - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them
|
||||
// by reference
|
||||
// - We need 8 bytes for whole arguments. We have two + 'argc'
|
||||
// arguments (condider >id and &tid). We need to reserve
|
||||
// (argc + 2) * 8 bytes.
|
||||
// - We need 176 bytes for RSA and others
|
||||
//
|
||||
// The total number of bytes is then (argc + 2) * 8 + 8 + 176.
|
||||
//
|
||||
// |------------------------------|
|
||||
// | return address of callee | 8(%fp)
|
||||
// |------------------------------|
|
||||
// | frame pointer of callee | 0(%fp)
|
||||
// |------------------------------| <------------------ %fp
|
||||
// | __tid / __gtid | -8(%fp) / -4(%fp)
|
||||
// |------------------------------|
|
||||
// | argc+2 for arguments | 176(%sp)
|
||||
// |------------------------------|
|
||||
// | RSA |
|
||||
// |------------------------------|
|
||||
// | return address |
|
||||
// |------------------------------|
|
||||
// | frame pointer |
|
||||
// |------------------------------| <------------------ %sp
|
||||
|
||||
adds.w.sx %s34, 2, %s3
|
||||
sll %s34, %s34, 3
|
||||
lea %s34, 184(, %s34)
|
||||
subs.l %sp, %sp, %s34
|
||||
|
||||
// Align the stack to 16 bytes.
|
||||
and %sp, -16, %sp
|
||||
|
||||
// Save pkfn.
|
||||
or %s12, 0, %s0
|
||||
|
||||
// Call host to allocate stack if it is necessary.
|
||||
brge.l %sp, %sl, .L_kmp_pass
|
||||
ld %s61, 24(, %tp)
|
||||
lea %s63, 0x13b
|
||||
shm.l %s63, 0(%s61)
|
||||
shm.l %sl, 8(%s61)
|
||||
shm.l %sp, 16(%s61)
|
||||
monc
|
||||
|
||||
.L_kmp_pass:
|
||||
lea %s35, 176(, %sp)
|
||||
adds.w.sx %s37, 0, %s3
|
||||
or %s38, 0, %s4
|
||||
|
||||
#if OMPT_SUPPORT
|
||||
// Save frame pointer into exit_frame.
|
||||
st %fp, 0(%s5)
|
||||
#endif
|
||||
|
||||
// Prepare arguments for the pkfn function (first 8 using s0-s7
|
||||
// registers, but need to store stack also because of varargs).
|
||||
|
||||
stl %s1, __gtid(%fp)
|
||||
stl %s2, __tid(%fp)
|
||||
|
||||
adds.l %s0, __gtid, %fp
|
||||
st %s0, 0(, %s35)
|
||||
adds.l %s1, __tid, %fp
|
||||
st %s1, 8(, %s35)
|
||||
|
||||
breq.l 0, %s37, .L_kmp_call
|
||||
ld %s2, 0(, %s38)
|
||||
st %s2, 16(, %s35)
|
||||
|
||||
breq.l 1, %s37, .L_kmp_call
|
||||
ld %s3, 8(, %s38)
|
||||
st %s3, 24(, %s35)
|
||||
|
||||
breq.l 2, %s37, .L_kmp_call
|
||||
ld %s4, 16(, %s38)
|
||||
st %s4, 32(, %s35)
|
||||
|
||||
breq.l 3, %s37, .L_kmp_call
|
||||
ld %s5, 24(, %s38)
|
||||
st %s5, 40(, %s35)
|
||||
|
||||
breq.l 4, %s37, .L_kmp_call
|
||||
ld %s6, 32(, %s38)
|
||||
st %s6, 48(, %s35)
|
||||
|
||||
breq.l 5, %s37, .L_kmp_call
|
||||
ld %s7, 40(, %s38)
|
||||
st %s7, 56(, %s35)
|
||||
|
||||
breq.l 6, %s37, .L_kmp_call
|
||||
|
||||
// Prepare any additional argument passed through the stack.
|
||||
adds.l %s37, -6, %s37
|
||||
lea %s38, 48(, %s38)
|
||||
lea %s35, 64(, %s35)
|
||||
.L_kmp_loop:
|
||||
ld %s36, 0(, %s38)
|
||||
st %s36, 0(, %s35)
|
||||
adds.l %s37, -1, %s37
|
||||
adds.l %s38, 8, %s38
|
||||
adds.l %s35, 8, %s35
|
||||
brne.l 0, %s37, .L_kmp_loop
|
||||
|
||||
.L_kmp_call:
|
||||
// Call pkfn function.
|
||||
bsic %lr, (, %s12)
|
||||
|
||||
// Return value.
|
||||
lea %s0, 1
|
||||
|
||||
// Restore stack and return.
|
||||
or %sp, 0, %fp
|
||||
ld %lr, 8(, %sp)
|
||||
ld %fp, 0(, %sp)
|
||||
b.l.t (, %lr)
|
||||
.Lfunc_end0:
|
||||
.size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
|
||||
.cfi_endproc
|
||||
|
||||
// -- End __kmp_invoke_microtask
|
||||
|
||||
#endif /* KMP_ARCH_VE */
|
||||
|
||||
#if KMP_ARCH_ARM || KMP_ARCH_MIPS
|
||||
.data
|
||||
COMMON .gomp_critical_user_, 32, 3
|
||||
@ -2073,7 +2265,8 @@ __kmp_unnamed_critical_addr:
|
||||
#endif
|
||||
#endif /* KMP_ARCH_ARM */
|
||||
|
||||
#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
|
||||
#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \
|
||||
KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
|
||||
#ifndef KMP_PREFIX_UNDERSCORE
|
||||
# define KMP_PREFIX_UNDERSCORE(x) x
|
||||
#endif
|
||||
@ -2088,7 +2281,7 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
|
||||
.size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
|
||||
#endif
|
||||
#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
|
||||
KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 */
|
||||
KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE */
|
||||
|
||||
#if KMP_OS_LINUX
|
||||
# if KMP_ARCH_ARM || KMP_ARCH_AARCH64
|
||||
|
@ -2456,7 +2456,7 @@ finish: // Clean up and exit.
|
||||
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \
|
||||
((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \
|
||||
KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
|
||||
KMP_ARCH_ARM)
|
||||
KMP_ARCH_ARM || KMP_ARCH_VE)
|
||||
|
||||
// we really only need the case with 1 argument, because CLANG always build
|
||||
// a struct of pointers to shared variables referenced in the outlined function
|
||||
|
@ -221,6 +221,13 @@ ompt_label_##id:
|
||||
printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
|
||||
ompt_get_thread_data()->value, ((char *)addr) - 4, \
|
||||
((char *)addr) - 8, ((char *)addr) - 12)
|
||||
#elif KMP_ARCH_VE
|
||||
// On VE the NOP instruction is 8 byte long. In addition, the compiler inserts
|
||||
// a ??? instruction for non-void runtime functions which is ? bytes long.
|
||||
#define print_possible_return_addresses(addr) \
|
||||
printf("%" PRIu64 ": current_address=%p or %p\n", \
|
||||
ompt_get_thread_data()->value, ((char *)addr) - 8, \
|
||||
((char *)addr) - 8)
|
||||
#else
|
||||
#error Unsupported target architecture, cannot determine address offset!
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user