From b7ecf6b4d6b0879e9215983eb4e2fc8b3baf5aa0 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 27 Jun 2014 18:35:30 +0000 Subject: [PATCH] [NVPTX] Add missing isel patterns for 64-bit atomics llvm-svn: 211933 --- lib/Target/NVPTX/NVPTXIntrinsics.td | 98 +++++++++++++++++++ test/CodeGen/NVPTX/atomics.ll | 141 ++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+) create mode 100644 test/CodeGen/NVPTX/atomics.ll diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index fdd4748920f..00c315c94e2 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1057,12 +1057,24 @@ def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_max_32 node:$a, node:$b)>; def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_max_32 node:$a, node:$b)>; +def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) + , (atomic_load_max_64 node:$a, node:$b)>; +def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_max_64 node:$a, node:$b)>; +def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_max_64 node:$a, node:$b)>; def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), (atomic_load_umax_32 node:$a, node:$b)>; def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_umax_32 node:$a, node:$b)>; def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_umax_32 node:$a, node:$b)>; +def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_umax_64 node:$a, node:$b)>; +def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_umax_64 node:$a, node:$b)>; +def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_umax_64 node:$a, node:$b)>; defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2; @@ -1072,6 +1084,14 @@ defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2; // atom_min @@ -1089,12 +1117,24 @@ def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_min_32 node:$a, node:$b)>; def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_min_32 node:$a, node:$b)>; +def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_min_64 node:$a, node:$b)>; +def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_min_64 node:$a, node:$b)>; +def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_min_64 node:$a, node:$b)>; def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), (atomic_load_umin_32 node:$a, node:$b)>; def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_umin_32 node:$a, node:$b)>; def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_umin_32 node:$a, node:$b)>; +def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_umin_64 node:$a, node:$b)>; +def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_umin_64 node:$a, node:$b)>; +def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_umin_64 node:$a, node:$b)>; defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2; @@ -1104,6 +1144,14 @@ defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2; defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2; // atom_inc atom_dec @@ -1153,6 +1209,12 @@ def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_and_32 node:$a, node:$b)>; def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_and_32 node:$a, node:$b)>; +def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_and_64 node:$a, node:$b)>; +def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_and_64 node:$a, node:$b)>; +def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_and_64 node:$a, node:$b)>; defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2; @@ -1162,6 +1224,14 @@ defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2; defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2; // atom_or @@ -1171,6 +1241,12 @@ def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_or_32 node:$a, node:$b)>; def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_or_32 node:$a, node:$b)>; +def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_or_64 node:$a, node:$b)>; +def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_or_64 node:$a, node:$b)>; +def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_or_64 node:$a, node:$b)>; defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2; @@ -1180,6 +1256,14 @@ defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2; defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2; // atom_xor @@ -1189,6 +1273,12 @@ def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_xor_32 node:$a, node:$b)>; def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_xor_32 node:$a, node:$b)>; +def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_xor_64 node:$a, node:$b)>; +def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_xor_64 node:$a, node:$b)>; +def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_xor_64 node:$a, node:$b)>; defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2; @@ -1198,6 +1288,14 @@ defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2; defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2; // atom_cas diff --git a/test/CodeGen/NVPTX/atomics.ll b/test/CodeGen/NVPTX/atomics.ll new file mode 100644 index 00000000000..10ab73d541e --- /dev/null +++ b/test/CodeGen/NVPTX/atomics.ll @@ -0,0 +1,141 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + + +; CHECK: atom0 +define i32 @atom0(i32* %addr, i32 %val) { +; CHECK: atom.add.u32 + %ret = atomicrmw add i32* %addr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom1 +define i64 @atom1(i64* %addr, i64 %val) { +; CHECK: atom.add.u64 + %ret = atomicrmw add i64* %addr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom2 +define i32 @atom2(i32* %subr, i32 %val) { +; CHECK: neg.s32 +; CHECK: atom.add.u32 + %ret = atomicrmw sub i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom3 +define i64 @atom3(i64* %subr, i64 %val) { +; CHECK: neg.s64 +; CHECK: atom.add.u64 + %ret = atomicrmw sub i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom4 +define i32 @atom4(i32* %subr, i32 %val) { +; CHECK: atom.and.b32 + %ret = atomicrmw and i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom5 +define i64 @atom5(i64* %subr, i64 %val) { +; CHECK: atom.and.b64 + %ret = atomicrmw and i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +;; NAND not yet supported +;define i32 @atom6(i32* %subr, i32 %val) { +; %ret = atomicrmw nand i32* %subr, i32 %val seq_cst +; ret i32 %ret +;} + +;define i64 @atom7(i64* %subr, i64 %val) { +; %ret = atomicrmw nand i64* %subr, i64 %val seq_cst +; ret i64 %ret +;} + +; CHECK: atom8 +define i32 @atom8(i32* %subr, i32 %val) { +; CHECK: atom.or.b32 + %ret = atomicrmw or i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom9 +define i64 @atom9(i64* %subr, i64 %val) { +; CHECK: atom.or.b64 + %ret = atomicrmw or i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom10 +define i32 @atom10(i32* %subr, i32 %val) { +; CHECK: atom.xor.b32 + %ret = atomicrmw xor i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom11 +define i64 @atom11(i64* %subr, i64 %val) { +; CHECK: atom.xor.b64 + %ret = atomicrmw xor i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom12 +define i32 @atom12(i32* %subr, i32 %val) { +; CHECK: atom.max.s32 + %ret = atomicrmw max i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom13 +define i64 @atom13(i64* %subr, i64 %val) { +; CHECK: atom.max.s64 + %ret = atomicrmw max i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom14 +define i32 @atom14(i32* %subr, i32 %val) { +; CHECK: atom.min.s32 + %ret = atomicrmw min i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom15 +define i64 @atom15(i64* %subr, i64 %val) { +; CHECK: atom.min.s64 + %ret = atomicrmw min i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom16 +define i32 @atom16(i32* %subr, i32 %val) { +; CHECK: atom.max.u32 + %ret = atomicrmw umax i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom17 +define i64 @atom17(i64* %subr, i64 %val) { +; CHECK: atom.max.u64 + %ret = atomicrmw umax i64* %subr, i64 %val seq_cst + ret i64 %ret +} + +; CHECK: atom18 +define i32 @atom18(i32* %subr, i32 %val) { +; CHECK: atom.min.u32 + %ret = atomicrmw umin i32* %subr, i32 %val seq_cst + ret i32 %ret +} + +; CHECK: atom19 +define i64 @atom19(i64* %subr, i64 %val) { +; CHECK: atom.min.u64 + %ret = atomicrmw umin i64* %subr, i64 %val seq_cst + ret i64 %ret +}