[AMDGPU] Introduce Clang builtins to be mapped to AMDGCN atomic inc/dec intrinsics

Summary:
__builtin_amdgcn_atomic_inc32(int *Ptr, int Val, unsigned MemoryOrdering, const char *SyncScope)
__builtin_amdgcn_atomic_inc64(int64_t *Ptr, int64_t Val, unsigned MemoryOrdering, const char *SyncScope)
__builtin_amdgcn_atomic_dec32(int *Ptr, int Val, unsigned MemoryOrdering, const char *SyncScope)
__builtin_amdgcn_atomic_dec64(int64_t *Ptr, int64_t Val, unsigned MemoryOrdering, const char *SyncScope)

First and second arguments gets transparently passed to the amdgcn atomic
inc/dec intrinsic. Fifth argument of the intrinsic is set as true if the
first argument of the builtin is a volatile pointer. The third argument of
this builtin is one of the memory-ordering specifiers ATOMIC_ACQUIRE,
ATOMIC_RELEASE, ATOMIC_ACQ_REL, or ATOMIC_SEQ_CST following C++11 memory
model semantics. This is mapped to corresponding LLVM atomic memory ordering
for the atomic inc/dec instruction using CLANG atomic C ABI. The fourth
argument is an AMDGPU-specific synchronization scope defined as string.

Reviewers: arsenm, sameerds, JonChesterfield, jdoerfert

Reviewed By: arsenm, sameerds

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, kerbowa, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D80804
This commit is contained in:
Saiyedul Islam 2020-05-29 14:16:07 +00:00
parent 1c189d71db
commit 675cefbf60
7 changed files with 459 additions and 64 deletions

View File

@ -60,6 +60,12 @@ BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n")
BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n")
BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
BUILTIN(__builtin_amdgcn_atomic_inc32, "ZiZiD*ZiUicC*", "n")
BUILTIN(__builtin_amdgcn_atomic_inc64, "WiWiD*WiUicC*", "n")
BUILTIN(__builtin_amdgcn_atomic_dec32, "ZiZiD*ZiUicC*", "n")
BUILTIN(__builtin_amdgcn_atomic_dec64, "WiWiD*WiUicC*", "n")
// FIXME: Need to disallow constant address space.
BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")
BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n")

View File

@ -14301,8 +14301,49 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
}
} // namespace
// For processing memory ordering and memory scope arguments of various
// amdgcn builtins.
// \p Order takes a C++11 comptabile memory-ordering specifier and converts
// it into LLVM's memory ordering specifier using atomic C ABI, and writes
// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
// specific SyncScopeID and writes it to \p SSID.
bool CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
llvm::AtomicOrdering &AO,
llvm::SyncScope::ID &SSID) {
if (isa<llvm::ConstantInt>(Order)) {
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
// Map C11/C++11 memory ordering to LLVM memory ordering
switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
case llvm::AtomicOrderingCABI::acquire:
AO = llvm::AtomicOrdering::Acquire;
break;
case llvm::AtomicOrderingCABI::release:
AO = llvm::AtomicOrdering::Release;
break;
case llvm::AtomicOrderingCABI::acq_rel:
AO = llvm::AtomicOrdering::AcquireRelease;
break;
case llvm::AtomicOrderingCABI::seq_cst:
AO = llvm::AtomicOrdering::SequentiallyConsistent;
break;
case llvm::AtomicOrderingCABI::consume:
case llvm::AtomicOrderingCABI::relaxed:
break;
}
StringRef scp;
llvm::getConstantStringInfo(Scope, scp);
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
return true;
}
return false;
}
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
llvm::SyncScope::ID SSID;
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_div_scale:
case AMDGPU::BI__builtin_amdgcn_div_scalef: {
@ -14507,38 +14548,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
}
case AMDGPU::BI__builtin_amdgcn_fence: {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
llvm::SyncScope::ID SSID;
Value *Order = EmitScalarExpr(E->getArg(0));
Value *Scope = EmitScalarExpr(E->getArg(1));
if (isa<llvm::ConstantInt>(Order)) {
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
// Map C11/C++11 memory ordering to LLVM memory ordering
switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
case llvm::AtomicOrderingCABI::acquire:
AO = llvm::AtomicOrdering::Acquire;
break;
case llvm::AtomicOrderingCABI::release:
AO = llvm::AtomicOrdering::Release;
break;
case llvm::AtomicOrderingCABI::acq_rel:
AO = llvm::AtomicOrdering::AcquireRelease;
break;
case llvm::AtomicOrderingCABI::seq_cst:
AO = llvm::AtomicOrdering::SequentiallyConsistent;
break;
case llvm::AtomicOrderingCABI::consume: // not supported by LLVM fence
case llvm::AtomicOrderingCABI::relaxed: // not supported by LLVM fence
break;
}
StringRef scp;
llvm::getConstantStringInfo(Scope, scp);
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
EmitScalarExpr(E->getArg(1)), AO, SSID))
return Builder.CreateFence(AO, SSID);
LLVM_FALLTHROUGH;
}
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
unsigned BuiltinAtomicOp;
llvm::Type *ResultType = ConvertType(E->getType());
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc;
break;
case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec;
break;
}
Value *Ptr = EmitScalarExpr(E->getArg(0));
Value *Val = EmitScalarExpr(E->getArg(1));
llvm::Function *F =
CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()});
if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
EmitScalarExpr(E->getArg(3)), AO, SSID)) {
// llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
// scope as unsigned values
Value *MemOrder = Builder.getInt32(static_cast<int>(AO));
Value *MemScope = Builder.getInt32(static_cast<int>(SSID));
QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
bool Volatile =
PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile));
return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile});
}
LLVM_FALLTHROUGH;
}

View File

@ -3988,6 +3988,9 @@ public:
llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
const CallExpr *E);
llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
llvm::AtomicOrdering &AO,
llvm::SyncScope::ID &SSID);
private:
enum class MSVCIntrin;

View File

@ -3070,41 +3070,56 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
CallExpr *TheCall) {
// position of memory order and scope arguments in the builtin
unsigned OrderIndex, ScopeIndex;
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_fence: {
ExprResult Arg = TheCall->getArg(0);
auto ArgExpr = Arg.get();
Expr::EvalResult ArgResult;
if (!ArgExpr->EvaluateAsInt(ArgResult, Context))
return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
<< ArgExpr->getType();
int ord = ArgResult.Val.getInt().getZExtValue();
// Check valididty of memory ordering as per C11 / C++11's memody model.
switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
case llvm::AtomicOrderingCABI::acquire:
case llvm::AtomicOrderingCABI::release:
case llvm::AtomicOrderingCABI::acq_rel:
case llvm::AtomicOrderingCABI::seq_cst:
break;
default: {
return Diag(ArgExpr->getBeginLoc(),
diag::warn_atomic_op_has_invalid_memory_order)
<< ArgExpr->getSourceRange();
}
}
Arg = TheCall->getArg(1);
ArgExpr = Arg.get();
Expr::EvalResult ArgResult1;
// Check that sync scope is a constant literal
if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, Expr::EvaluateForCodeGen,
Context))
return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
<< ArgExpr->getType();
} break;
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
OrderIndex = 2;
ScopeIndex = 3;
break;
case AMDGPU::BI__builtin_amdgcn_fence:
OrderIndex = 0;
ScopeIndex = 1;
break;
default:
return false;
}
ExprResult Arg = TheCall->getArg(OrderIndex);
auto ArgExpr = Arg.get();
Expr::EvalResult ArgResult;
if (!ArgExpr->EvaluateAsInt(ArgResult, Context))
return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
<< ArgExpr->getType();
int ord = ArgResult.Val.getInt().getZExtValue();
// Check valididty of memory ordering as per C11 / C++11's memody model.
switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
case llvm::AtomicOrderingCABI::acquire:
case llvm::AtomicOrderingCABI::release:
case llvm::AtomicOrderingCABI::acq_rel:
case llvm::AtomicOrderingCABI::seq_cst:
break;
default: {
return Diag(ArgExpr->getBeginLoc(),
diag::warn_atomic_op_has_invalid_memory_order)
<< ArgExpr->getSourceRange();
}
}
Arg = TheCall->getArg(ScopeIndex);
ArgExpr = Arg.get();
Expr::EvalResult ArgResult1;
// Check that sync scope is a constant literal
if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, Expr::EvaluateForCodeGen,
Context))
return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
<< ArgExpr->getType();
return false;
}

View File

@ -0,0 +1,253 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s
__attribute__((device)) void test_non_volatile_parameter32(int *ptr) {
// CHECK-LABEL: test_non_volatile_parameter32
int res;
// CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5)
// CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32**
// CHECK-NEXT: %res = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32*
// CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8
// CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8
// CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8
// CHECK-NEXT: %2 = load i32, i32* %1, align 4
// CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4
res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8
// CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8
// CHECK-NEXT: %6 = load i32, i32* %5, align 4
// CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4
res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
}
__attribute__((device)) void test_non_volatile_parameter64(__INT64_TYPE__ *ptr) {
// CHECK-LABEL: test_non_volatile_parameter64
__INT64_TYPE__ res;
// CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5)
// CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64**
// CHECK-NEXT: %res = alloca i64, align 8, addrspace(5)
// CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64*
// CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8
// CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8
// CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8
// CHECK-NEXT: %2 = load i64, i64* %1, align 8
// CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8
res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8
// CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8
// CHECK-NEXT: %6 = load i64, i64* %5, align 8
// CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8
res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
}
__attribute__((device)) void test_volatile_parameter32(volatile int *ptr) {
// CHECK-LABEL: test_volatile_parameter32
int res;
// CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5)
// CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32**
// CHECK-NEXT: %res = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32*
// CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8
// CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8
// CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8
// CHECK-NEXT: %2 = load volatile i32, i32* %1, align 4
// CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 true)
// CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4
res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8
// CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8
// CHECK-NEXT: %6 = load volatile i32, i32* %5, align 4
// CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 true)
// CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4
res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
}
__attribute__((device)) void test_volatile_parameter64(volatile __INT64_TYPE__ *ptr) {
// CHECK-LABEL: test_volatile_parameter64
__INT64_TYPE__ res;
// CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5)
// CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64**
// CHECK-NEXT: %res = alloca i64, align 8, addrspace(5)
// CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64*
// CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8
// CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8
// CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8
// CHECK-NEXT: %2 = load volatile i64, i64* %1, align 8
// CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 true)
// CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8
res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8
// CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8
// CHECK-NEXT: %6 = load volatile i64, i64* %5, align 8
// CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 true)
// CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8
res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
}
__attribute__((device)) void test_shared32() {
// CHECK-LABEL: test_shared32
__attribute__((shared)) int val;
// CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
// CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %0, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
// CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
}
__attribute__((device)) void test_shared64() {
// CHECK-LABEL: test_shared64
__attribute__((shared)) __INT64_TYPE__ val;
// CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
// CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %0, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
// CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
}
int global_val32;
__attribute__((device)) void test_global32() {
// CHECK-LABEL: test_global32
// CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
// CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %0, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
// CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %2, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
global_val32 = __builtin_amdgcn_atomic_dec32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
}
__INT64_TYPE__ global_val64;
__attribute__((device)) void test_global64() {
// CHECK-LABEL: test_global64
// CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
// CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %0, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
// CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %2, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
global_val64 = __builtin_amdgcn_atomic_dec64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
}
__attribute__((constant)) int cval32;
__attribute__((device)) void test_constant32() {
// CHECK-LABEL: test_constant32
int local_val;
// CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4
// CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %0, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i32 %1, i32* %local_val.ascast, align 4
local_val = __builtin_amdgcn_atomic_inc32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4
// CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %2, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i32 %3, i32* %local_val.ascast, align 4
local_val = __builtin_amdgcn_atomic_dec32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup");
}
__attribute__((constant)) __INT64_TYPE__ cval64;
__attribute__((device)) void test_constant64() {
// CHECK-LABEL: test_constant64
__INT64_TYPE__ local_val;
// CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8
// CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %0, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i64 %1, i64* %local_val.ascast, align 8
local_val = __builtin_amdgcn_atomic_inc64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8
// CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %2, i32 7, i32 2, i1 false)
// CHECK-NEXT: store i64 %3, i64* %local_val.ascast, align 8
local_val = __builtin_amdgcn_atomic_dec64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup");
}
__attribute__((device)) void test_order32() {
// CHECK-LABEL: test_order32
__attribute__((shared)) int val;
// CHECK: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %0, i32 4, i32 2, i1 false)
val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, "workgroup");
// CHECK: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %2, i32 5, i32 2, i1 false)
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_RELEASE, "workgroup");
// CHECK: %5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %4, i32 6, i32 2, i1 false)
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQ_REL, "workgroup");
// CHECK: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %6, i32 7, i32 2, i1 false)
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
}
__attribute__((device)) void test_order64() {
// CHECK-LABEL: test_order64
__attribute__((shared)) __INT64_TYPE__ val;
// CHECK: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %0, i32 4, i32 2, i1 false)
val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, "workgroup");
// CHECK: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %2, i32 5, i32 2, i1 false)
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_RELEASE, "workgroup");
// CHECK: %5 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %4, i32 6, i32 2, i1 false)
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQ_REL, "workgroup");
// CHECK: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %6, i32 7, i32 2, i1 false)
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
}
__attribute__((device)) void test_scope32() {
// CHECK-LABEL: test_scope32
__attribute__((shared)) int val;
// CHECK: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %0, i32 7, i32 1, i1 false)
val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "");
// CHECK: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false)
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %4, i32 7, i32 3, i1 false)
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "agent");
// CHECK: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %6, i32 7, i32 4, i1 false)
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "wavefront");
}
__attribute__((device)) void test_scope64() {
// CHECK-LABEL: test_scope64
__attribute__((shared)) __INT64_TYPE__ val;
// CHECK: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %0, i32 7, i32 1, i1 false)
val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "");
// CHECK: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false)
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
// CHECK: %5 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %4, i32 7, i32 3, i1 false)
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "agent");
// CHECK: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %6, i32 7, i32 4, i1 false)
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "wavefront");
}

View File

@ -0,0 +1,18 @@
// REQUIRES: amdgpu-registered-target
// RUN: not %clang_cc1 %s -x hip -fcuda-is-device -o - -emit-llvm -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
void test_host() {
int val;
// CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc32' in __host__ function
val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "");
// CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc64' in __host__ function
val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "");
// CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec32' in __host__ function
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "");
// CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec64' in __host__ function
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "");
}

View File

@ -144,3 +144,51 @@ void test_s_setreg(int x, int y) {
__builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
__builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
}
void test_atomic_inc32() {
int val = 17;
val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
val = __builtin_amdgcn_atomic_inc32(4); // expected-error {{too few arguments to function call, expected 4}}
val = __builtin_amdgcn_atomic_inc32(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
val = __builtin_amdgcn_atomic_inc32(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
const char ptr[] = "workgroup";
val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
}
void test_atomic_inc64() {
__INT64_TYPE__ val = 17;
val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
val = __builtin_amdgcn_atomic_inc64(4); // expected-error {{too few arguments to function call, expected 4}}
val = __builtin_amdgcn_atomic_inc64(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
val = __builtin_amdgcn_atomic_inc64(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
const char ptr[] = "workgroup";
val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
}
void test_atomic_dec32() {
int val = 17;
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
val = __builtin_amdgcn_atomic_dec32(4); // expected-error {{too few arguments to function call, expected 4}}
val = __builtin_amdgcn_atomic_dec32(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
val = __builtin_amdgcn_atomic_dec32(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
const char ptr[] = "workgroup";
val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
}
void test_atomic_dec64() {
__INT64_TYPE__ val = 17;
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
val = __builtin_amdgcn_atomic_dec64(4); // expected-error {{too few arguments to function call, expected 4}}
val = __builtin_amdgcn_atomic_dec64(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
val = __builtin_amdgcn_atomic_dec64(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
const char ptr[] = "workgroup";
val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
}