diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index 430425a62f50..9eeef4e48726 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -113,7 +113,15 @@ EXTERN int omp_target_is_present(void *ptr, int device_num) { DeviceTy& Device = Devices[device_num]; bool IsLast; // not used - int rc = (Device.getTgtPtrBegin(ptr, 0, IsLast, false) != NULL); + bool IsHostPtr; + void *TgtPtr = Device.getTgtPtrBegin(ptr, 0, IsLast, false, IsHostPtr); + int rc = (TgtPtr != NULL); + // Under unified memory the host pointer can be returned by the + // getTgtPtrBegin() function which means that there is no device + // corresponding point for ptr. This function should return false + // in that situation. + if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) + rc = !IsHostPtr; DP("Call to omp_target_is_present returns %d\n", rc); return rc; } diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 5ecba5759eb0..0bdcc504c060 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -157,12 +157,17 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) { // If NULL is returned, then either data allocation failed or the user tried // to do an illegal mapping. void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, - int64_t Size, bool &IsNew, bool IsImplicit, bool UpdateRefCount) { + int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit, + bool UpdateRefCount) { void *rc = NULL; + IsHostPtr = false; DataMapMtx.lock(); LookupResult lr = lookupMapping(HstPtrBegin, Size); // Check if the pointer is contained. + // If a variable is mapped to the device manually by the user - which would + // lead to the IsContained flag to be true - then we must ensure that the + // device address is returned even under unified memory conditions. if (lr.Flags.IsContained || ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) { auto &HT = *lr.Entry; @@ -183,15 +188,28 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, // Explicit extension of mapped data - not allowed. DP("Explicit extension of mapping is not allowed.\n"); } else if (Size) { - // If it is not contained and Size > 0 we should create a new entry for it. - IsNew = true; - uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin); - DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", " - "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase), - DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp)); - HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase, - (uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp)); - rc = (void *)tp; + // If unified shared memory is active, implicitly mapped variables that are not + // privatized use host address. Any explicitly mapped variables also use + // host address where correctness is not impeded. In all other cases + // maps are respected. + // TODO: In addition to the mapping rules above, when the close map + // modifier is implemented, foce the mapping of the variable to the device. + if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { + DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n", + DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : "")); + IsHostPtr = true; + rc = HstPtrBegin; + } else { + // If it is not contained and Size > 0 we should create a new entry for it. + IsNew = true; + uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin); + DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", " + "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase), + DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp)); + HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase, + (uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp)); + rc = (void *)tp; + } } DataMapMtx.unlock(); @@ -202,8 +220,10 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, // Return the target pointer begin (where the data will be moved). // Decrement the reference counter if called from target_data_end. void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, - bool UpdateRefCount) { + bool UpdateRefCount, bool &IsHostPtr) { void *rc = NULL; + IsHostPtr = false; + IsLast = false; DataMapMtx.lock(); LookupResult lr = lookupMapping(HstPtrBegin, Size); @@ -221,8 +241,14 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, (CONSIDERED_INF(HT.RefCount)) ? "INF" : std::to_string(HT.RefCount).c_str()); rc = (void *)tp; - } else { - IsLast = false; + } else if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { + // If the value isn't found in the mapping and unified shared memory + // is on then it means we have stumbled upon a value which we need to + // use directly from the host. + DP("Get HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n", + DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : "")); + IsHostPtr = true; + rc = HstPtrBegin; } DataMapMtx.unlock(); @@ -244,6 +270,8 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) { } int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) { + if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) + return OFFLOAD_SUCCESS; // Check if the pointer is contained in any sub-nodes. int rc; DataMapMtx.lock(); diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h index ded84e300eed..211507685a6d 100644 --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -137,10 +137,10 @@ struct DeviceTy { long getMapEntryRefCnt(void *HstPtrBegin); LookupResult lookupMapping(void *HstPtrBegin, int64_t Size); void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, - bool &IsNew, bool IsImplicit, bool UpdateRefCount = true); + bool &IsNew, bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount = true); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, - bool UpdateRefCount); + bool UpdateRefCount, bool &IsHostPtr); int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete); int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); int disassociatePtr(void *HstPtrBegin); diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index c41bf3167ccc..2813f28573b7 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -242,6 +242,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, // Address of pointer on the host and device, respectively. void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin; bool IsNew, Pointer_IsNew; + bool IsHostPtr = false; bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT; // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we // have reached this point via __tgt_target_data_begin and not __tgt_target @@ -253,7 +254,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, DP("Has a pointer entry: \n"); // base is address of pointer. Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase, - sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef); + sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef); if (!Pointer_TgtPtrBegin) { DP("Call to getOrAllocTgtPtr returned null pointer (device failure or " "illegal mapping).\n"); @@ -269,7 +270,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, } void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, - data_size, IsNew, IsImplicit, UpdateRef); + data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef); if (!TgtPtrBegin && data_size) { // If data_size==0, then the argument could be a zero-length pointer to // NULL, so getOrAlloc() returning NULL is not an error. @@ -289,19 +290,21 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { bool copy = false; - if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { - copy = true; - } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) { - // Copy data only if the "parent" struct has RefCount==1. - int32_t parent_idx = member_of(arg_types[i]); - long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); - assert(parent_rc > 0 && "parent struct not found"); - if (parent_rc == 1) { + if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) { + if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { copy = true; + } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) { + // Copy data only if the "parent" struct has RefCount==1. + int32_t parent_idx = member_of(arg_types[i]); + long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); + assert(parent_rc > 0 && "parent struct not found"); + if (parent_rc == 1) { + copy = true; + } } } - if (copy) { + if (copy && !IsHostPtr) { DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size); @@ -312,7 +315,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, } } - if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { + if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) { DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; @@ -363,14 +366,14 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, } } - bool IsLast; + bool IsLast, IsHostPtr; bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) || (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ); bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE; // If PTR_AND_OBJ, HstPtrBegin is address of pointee void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast, - UpdateRef); + UpdateRef, IsHostPtr); DP("There are %" PRId64 " bytes allocated at target address " DPxMOD " - is%s last\n", data_size, DPxPTR(TgtPtrBegin), (IsLast ? "" : " not")); @@ -387,18 +390,22 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS; bool CopyMember = false; - if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && - !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { - // Copy data only if the "parent" struct has RefCount==1. - int32_t parent_idx = member_of(arg_types[i]); - long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); - assert(parent_rc > 0 && "parent struct not found"); - if (parent_rc == 1) { - CopyMember = true; + if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) { + if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && + !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { + // Copy data only if the "parent" struct has RefCount==1. + int32_t parent_idx = member_of(arg_types[i]); + long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); + assert(parent_rc > 0 && "parent struct not found"); + if (parent_rc == 1) { + CopyMember = true; + } } } - if (DelEntry || Always || CopyMember) { + if ((DelEntry || Always || CopyMember) && + !(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + TgtPtrBegin == HstPtrBegin)) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size); @@ -471,14 +478,21 @@ int target_data_update(DeviceTy &Device, int32_t arg_num, void *HstPtrBegin = args[i]; int64_t MapSize = arg_sizes[i]; - bool IsLast; + bool IsLast, IsHostPtr; void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast, - false); + false, IsHostPtr); if (!TgtPtrBegin) { DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)); continue; } + if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + TgtPtrBegin == HstPtrBegin) { + DP("hst data:" DPxMOD " unified and shared, becomes a noop\n", + DPxPTR(HstPtrBegin)); + continue; + } + if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); @@ -514,6 +528,7 @@ int target_data_update(DeviceTy &Device, int32_t arg_num, DP("Copying data to device failed.\n"); return OFFLOAD_FAIL; } + uintptr_t lb = (uintptr_t) HstPtrBegin; uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; Device.ShadowMtx.lock(); @@ -640,19 +655,26 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num, void *HstPtrVal = args[i]; void *HstPtrBegin = args_base[i]; void *HstPtrBase = args[idx]; - bool IsLast; // unused. + bool IsLast, IsHostPtr; // unused. void *TgtPtrBase = (void *)((intptr_t)tgt_args[tgtIdx] + tgt_offsets[tgtIdx]); DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase)); uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta); void *Pointer_TgtPtrBegin = - Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false); + Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false, + IsHostPtr); if (!Pointer_TgtPtrBegin) { DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n", DPxPTR(HstPtrVal)); continue; } + if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + TgtPtrBegin == HstPtrBegin) { + DP("Unified memory is active, no need to map lambda captured" + "variable (" DPxMOD ")\n", DPxPTR(HstPtrVal)); + continue; + } DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); int rt = Device.data_submit(TgtPtrBegin, &Pointer_TgtPtrBegin, @@ -668,7 +690,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num, void *HstPtrBase = args_base[i]; void *TgtPtrBegin; ptrdiff_t TgtBaseOffset; - bool IsLast; // unused. + bool IsLast, IsHostPtr; // unused. if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) { DP("Forwarding first-private value " DPxMOD " to the target construct\n", DPxPTR(HstPtrBase)); @@ -705,14 +727,14 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num, } } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast, - false); + false, IsHostPtr); TgtBaseOffset = 0; // no offset for ptrs. DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to " "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase), DPxPTR(HstPtrBase)); } else { TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast, - false); + false, IsHostPtr); TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; #ifdef OMPTARGET_DEBUG void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset); diff --git a/openmp/libomptarget/test/offloading/requires.c b/openmp/libomptarget/test/offloading/requires.c index 7f014d32c0b5..079ce5cb9348 100644 --- a/openmp/libomptarget/test/offloading/requires.c +++ b/openmp/libomptarget/test/offloading/requires.c @@ -43,4 +43,4 @@ int main() { {} return 0; -} \ No newline at end of file +} diff --git a/openmp/libomptarget/test/unified_shared_memory/api.c b/openmp/libomptarget/test/unified_shared_memory/api.c new file mode 100644 index 000000000000..b0a71ad35801 --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/api.c @@ -0,0 +1,164 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL + +extern void __tgt_register_requires(int64_t); + +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- + +#pragma omp requires unified_shared_memory + +#define N 1024 + +void init(int A[], int B[], int C[]) { + for (int i = 0; i < N; ++i) { + A[i] = 0; + B[i] = 1; + C[i] = i; + } +} + +int main(int argc, char *argv[]) { + const int device = omp_get_default_device(); + + // Manual registration of requires flags for Clang versions + // that do not support requires. + __tgt_register_requires(8); + + // CHECK: Initial device: -10 + printf("Initial device: %d\n", omp_get_initial_device()); + + // + // Target alloc & target memcpy + // + int A[N], B[N], C[N]; + + // Init + init(A, B, C); + + int *pA, *pB, *pC; + + // map ptrs + pA = &A[0]; + pB = &B[0]; + pC = &C[0]; + + int *d_A = (int *)omp_target_alloc(N * sizeof(int), device); + int *d_B = (int *)omp_target_alloc(N * sizeof(int), device); + int *d_C = (int *)omp_target_alloc(N * sizeof(int), device); + + // CHECK: omp_target_alloc succeeded + printf("omp_target_alloc %s\n", d_A && d_B && d_C ? "succeeded" : "failed"); + + omp_target_memcpy(d_B, pB, N * sizeof(int), 0, 0, device, + omp_get_initial_device()); + omp_target_memcpy(d_C, pC, N * sizeof(int), 0, 0, device, + omp_get_initial_device()); + +#pragma omp target is_device_ptr(d_A, d_B, d_C) device(device) + { +#pragma omp parallel for schedule(static, 1) + for (int i = 0; i < N; i++) { + d_A[i] = d_B[i] + d_C[i] + 1; + } + } + + omp_target_memcpy(pA, d_A, N * sizeof(int), 0, 0, omp_get_initial_device(), + device); + + // CHECK: Test omp_target_memcpy: Succeeded + int fail = 0; + for (int i = 0; i < N; ++i) { + if (A[i] != i + 2) + fail++; + } + if (fail) { + printf("Test omp_target_memcpy: Failed\n"); + } else { + printf("Test omp_target_memcpy: Succeeded\n"); + } + + // + // target_is_present and target_associate/disassociate_ptr + // + init(A, B, C); + + // CHECK: B is not present, associating it... + // CHECK: omp_target_associate_ptr B succeeded + if (!omp_target_is_present(B, device)) { + printf("B is not present, associating it...\n"); + int rc = omp_target_associate_ptr(B, d_B, N * sizeof(int), 0, device); + printf("omp_target_associate_ptr B %s\n", !rc ? "succeeded" : "failed"); + } + + // CHECK: C is not present, associating it... + // CHECK: omp_target_associate_ptr C succeeded + if (!omp_target_is_present(C, device)) { + printf("C is not present, associating it...\n"); + int rc = omp_target_associate_ptr(C, d_C, N * sizeof(int), 0, device); + printf("omp_target_associate_ptr C %s\n", !rc ? "succeeded" : "failed"); + } + +// CHECK: Inside target data: A is not present +// CHECK: Inside target data: B is present +// CHECK: Inside target data: C is present +#pragma omp target data map(from : B, C) device(device) + { + printf("Inside target data: A is%s present\n", + omp_target_is_present(A, device) ? "" : " not"); + printf("Inside target data: B is%s present\n", + omp_target_is_present(B, device) ? "" : " not"); + printf("Inside target data: C is%s present\n", + omp_target_is_present(C, device) ? "" : " not"); + +#pragma omp target map(from : A) device(device) + { +#pragma omp parallel for schedule(static, 1) + for (int i = 0; i < N; i++) + A[i] = B[i] + C[i] + 1; + } + } + + // CHECK: B is present, disassociating it... + // CHECK: omp_target_disassociate_ptr B succeeded + // CHECK: C is present, disassociating it... + // CHECK: omp_target_disassociate_ptr C succeeded + if (omp_target_is_present(B, device)) { + printf("B is present, disassociating it...\n"); + int rc = omp_target_disassociate_ptr(B, device); + printf("omp_target_disassociate_ptr B %s\n", !rc ? "succeeded" : "failed"); + } + if (omp_target_is_present(C, device)) { + printf("C is present, disassociating it...\n"); + int rc = omp_target_disassociate_ptr(C, device); + printf("omp_target_disassociate_ptr C %s\n", !rc ? "succeeded" : "failed"); + } + + // CHECK: Test omp_target_associate_ptr: Succeeded + fail = 0; + for (int i = 0; i < N; ++i) { + if (A[i] != i + 2) + fail++; + } + if (fail) { + printf("Test omp_target_associate_ptr: Failed\n"); + } else { + printf("Test omp_target_associate_ptr: Succeeded\n"); + } + + omp_target_free(d_A, device); + omp_target_free(d_B, device); + omp_target_free(d_C, device); + + printf("Done!\n"); + + return 0; +} diff --git a/openmp/libomptarget/test/unified_shared_memory/shared_update.c b/openmp/libomptarget/test/unified_shared_memory/shared_update.c new file mode 100644 index 000000000000..8036bc2f0405 --- /dev/null +++ b/openmp/libomptarget/test/unified_shared_memory/shared_update.c @@ -0,0 +1,114 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL + +extern void __tgt_register_requires(int64_t); + +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- + +#pragma omp requires unified_shared_memory + +#define N 1024 + +int main(int argc, char *argv[]) { + int fails; + void *host_alloc, *device_alloc; + void *host_data, *device_data; + int *alloc = (int *)malloc(N * sizeof(int)); + int data[N]; + + // Manual registration of requires flags for Clang versions + // that do not support requires. + __tgt_register_requires(8); + + for (int i = 0; i < N; ++i) { + alloc[i] = 10; + data[i] = 1; + } + + host_data = &data[0]; + host_alloc = &alloc[0]; + +// implicit mapping of data +#pragma omp target map(tofrom : device_data, device_alloc) + { + device_data = &data[0]; + device_alloc = &alloc[0]; + + for (int i = 0; i < N; i++) { + alloc[i] += 1; + data[i] += 1; + } + } + + // CHECK: Address of alloc on device matches host address. + if (device_alloc == host_alloc) + printf("Address of alloc on device matches host address.\n"); + + // CHECK: Address of data on device matches host address. + if (device_data == host_data) + printf("Address of data on device matches host address.\n"); + + // On the host, check that the arrays have been updated. + // CHECK: Alloc device values updated: Succeeded + fails = 0; + for (int i = 0; i < N; i++) { + if (alloc[i] != 11) + fails++; + } + printf("Alloc device values updated: %s\n", + (fails == 0) ? "Succeeded" : "Failed"); + + // CHECK: Data device values updated: Succeeded + fails = 0; + for (int i = 0; i < N; i++) { + if (data[i] != 2) + fails++; + } + printf("Data device values updated: %s\n", + (fails == 0) ? "Succeeded" : "Failed"); + + // + // Test that updates on the host snd on the device are both visible. + // + + // Update on the host. + for (int i = 0; i < N; ++i) { + alloc[i] += 1; + data[i] += 1; + } + +#pragma omp target + { + // CHECK: Alloc host values updated: Succeeded + fails = 0; + for (int i = 0; i < N; i++) { + if (alloc[i] != 12) + fails++; + } + printf("Alloc host values updated: %s\n", + (fails == 0) ? "Succeeded" : "Failed"); + // CHECK: Data host values updated: Succeeded + fails = 0; + for (int i = 0; i < N; i++) { + if (data[i] != 3) + fails++; + } + printf("Data host values updated: %s\n", + (fails == 0) ? "Succeeded" : "Failed"); + } + + free(alloc); + + printf("Done!\n"); + + return 0; +}