mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-24 22:30:13 +00:00
Changed omp_get_max_threads() implementation to more closely match spec description.
Summary: The 5.0 spec states, "The omp_get_max_threads routine returns an upper bound on the number of threads that could be used to form a new team if a parallel construct without a num_threads clause were encountered after execution returns from this routine." The attached test shows Max Threads: 96, Num Threads: 128 without the proposed change. The number of threads should not exceed the (max) nthreads ICV, hence we should return the higher SPMD thread number even when omp_get_max_threads() is called in a generic kernel. This change does fail the api test, max_threads.c, because now it would return 64 instead of 32. Reviewers: jdoerfert, ABataev, grokos, JonChesterfield Reviewed By: jdoerfert Subscribers: openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D74092
This commit is contained in:
parent
c2ce9ea4e3
commit
190a11148b
@ -68,7 +68,7 @@ EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime) {
|
||||
// set number of threads and thread limit in team to started value
|
||||
omptarget_nvptx_TaskDescr *currTaskDescr =
|
||||
omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
|
||||
nThreads = GetNumberOfWorkersInTeam();
|
||||
nThreads = GetNumberOfThreadsInBlock();
|
||||
threadLimit = ThreadLimit;
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,22 @@
|
||||
// RUN: %compile-run-and-check
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int main(){
|
||||
int max_threads = -1;
|
||||
int num_threads = -1;
|
||||
|
||||
#pragma omp target map(tofrom: max_threads)
|
||||
max_threads = omp_get_max_threads();
|
||||
|
||||
#pragma omp target parallel map(tofrom: num_threads)
|
||||
{
|
||||
#pragma omp master
|
||||
num_threads = omp_get_num_threads();
|
||||
}
|
||||
|
||||
// CHECK: Max Threads: 128, Num Threads: 128
|
||||
printf("Max Threads: %d, Num Threads: %d\n", max_threads, num_threads);
|
||||
|
||||
return 0;
|
||||
}
|
@ -19,7 +19,14 @@ int main(int argc, char *argv[]) {
|
||||
{ MaxThreadsL2 = omp_get_max_threads(); }
|
||||
}
|
||||
|
||||
// CHECK: Non-SPMD MaxThreadsL1 = 32
|
||||
//FIXME: This Non-SPMD kernel will have 32 active threads due to
|
||||
// thread_limit. However, Non-SPMD MaxThreadsL1 is the total number of
|
||||
// threads in block (64 in this case), which translates to worker
|
||||
// threads + WARP_SIZE for Non-SPMD kernels and worker threads for SPMD
|
||||
// kernels. According to the spec, omp_get_max_threads must return the
|
||||
// max active threads possible between the two kernel types.
|
||||
|
||||
// CHECK: Non-SPMD MaxThreadsL1 = 64
|
||||
printf("Non-SPMD MaxThreadsL1 = %d\n", MaxThreadsL1);
|
||||
// CHECK: Non-SPMD MaxThreadsL2 = 1
|
||||
printf("Non-SPMD MaxThreadsL2 = %d\n", MaxThreadsL2);
|
||||
|
Loading…
Reference in New Issue
Block a user