[OpenMP] Use __OPENMP_NVPTX__ instead of _OPENMP in complex wrapper headers.

This is very similar to 7f1e6fcff9, just fixing a left-over.
With this, it should be possible to use both, -x cuda and -fopenmp in the same invocation,
enabling to use both OpenMP, targeting CPU, and CUDA, targeting the GPU.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D90415
This commit is contained in:
Joachim Meyer 2020-10-29 19:45:49 +01:00
parent fa5a132767
commit eaee608448
3 changed files with 7 additions and 3 deletions

View File

@ -16,7 +16,7 @@
// to work with CUDA and OpenMP target offloading [in C and C++ mode].)
#pragma push_macro("__DEVICE__")
#ifdef _OPENMP
#ifdef __OPENMP_NVPTX__
#pragma omp declare target
#define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
#else
@ -26,7 +26,7 @@
// To make the algorithms available for C and C++ in CUDA and OpenMP we select
// different but equivalent function versions. TODO: For OpenMP we currently
// select the native builtins as the overload support for templates is lacking.
#if !defined(_OPENMP)
#if !defined(__OPENMP_NVPTX__)
#define _ISNANd std::isnan
#define _ISNANf std::isnan
#define _ISINFd std::isinf
@ -276,7 +276,7 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
#undef _fmaxd
#undef _fmaxf
#ifdef _OPENMP
#ifdef __OPENMP_NVPTX__
#pragma omp end declare target
#endif

View File

@ -18,7 +18,9 @@
#include <cmath>
#define __CUDA__
#define __OPENMP_NVPTX__
#include <__clang_cuda_complex_builtins.h>
#undef __OPENMP_NVPTX__
#endif
// Grab the host header too.

View File

@ -18,7 +18,9 @@
#include <math.h>
#define __CUDA__
#define __OPENMP_NVPTX__
#include <__clang_cuda_complex_builtins.h>
#undef __OPENMP_NVPTX__
#endif
// Grab the host header too.