[NVPTX] Add missing isel patterns for 64-bit atomics

llvm-svn: 211933
This commit is contained in:
Justin Holewinski 2014-06-27 18:35:30 +00:00
parent 2ffa2d24b0
commit b7ecf6b4d6
2 changed files with 239 additions and 0 deletions

View File

@ -1057,12 +1057,24 @@ def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_max_32 node:$a, node:$b)>;
def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_max_32 node:$a, node:$b)>;
def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
, (atomic_load_max_64 node:$a, node:$b)>;
def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_max_64 node:$a, node:$b)>;
def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_max_64 node:$a, node:$b)>;
def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_umax_32 node:$a, node:$b)>;
def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_umax_32 node:$a, node:$b)>;
def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umax_32 node:$a, node:$b)>;
def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_umax_64 node:$a, node:$b)>;
def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_umax_64 node:$a, node:$b)>;
def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umax_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
@ -1072,6 +1084,14 @@ defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@ -1080,6 +1100,14 @@ defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_min
@ -1089,12 +1117,24 @@ def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_min_32 node:$a, node:$b)>;
def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_min_32 node:$a, node:$b)>;
def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_min_64 node:$a, node:$b)>;
def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_min_64 node:$a, node:$b)>;
def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_min_64 node:$a, node:$b)>;
def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_umin_32 node:$a, node:$b)>;
def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_umin_32 node:$a, node:$b)>;
def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umin_32 node:$a, node:$b)>;
def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_umin_64 node:$a, node:$b)>;
def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_umin_64 node:$a, node:$b)>;
def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umin_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
@ -1104,6 +1144,14 @@ defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@ -1112,6 +1160,14 @@ defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_inc atom_dec
@ -1153,6 +1209,12 @@ def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_and_32 node:$a, node:$b)>;
def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_and_32 node:$a, node:$b)>;
def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_and_64 node:$a, node:$b)>;
def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_and_64 node:$a, node:$b)>;
def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_and_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
@ -1162,6 +1224,14 @@ defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_or
@ -1171,6 +1241,12 @@ def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_or_32 node:$a, node:$b)>;
def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_or_32 node:$a, node:$b)>;
def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_or_64 node:$a, node:$b)>;
def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_or_64 node:$a, node:$b)>;
def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_or_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
@ -1180,6 +1256,14 @@ defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
// atom_xor
@ -1189,6 +1273,12 @@ def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_xor_32 node:$a, node:$b)>;
def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_xor_32 node:$a, node:$b)>;
def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_xor_64 node:$a, node:$b)>;
def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_xor_64 node:$a, node:$b)>;
def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_xor_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
@ -1198,6 +1288,14 @@ defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_cas

View File

@ -0,0 +1,141 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; CHECK: atom0
define i32 @atom0(i32* %addr, i32 %val) {
; CHECK: atom.add.u32
%ret = atomicrmw add i32* %addr, i32 %val seq_cst
ret i32 %ret
}
; CHECK: atom1
define i64 @atom1(i64* %addr, i64 %val) {
; CHECK: atom.add.u64
%ret = atomicrmw add i64* %addr, i64 %val seq_cst
ret i64 %ret
}
; CHECK: atom2
define i32 @atom2(i32* %subr, i32 %val) {
; CHECK: neg.s32
; CHECK: atom.add.u32
%ret = atomicrmw sub i32* %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK: atom3
define i64 @atom3(i64* %subr, i64 %val) {
; CHECK: neg.s64
; CHECK: atom.add.u64
%ret = atomicrmw sub i64* %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK: atom4
define i32 @atom4(i32* %subr, i32 %val) {
; CHECK: atom.and.b32
%ret = atomicrmw and i32* %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK: atom5
define i64 @atom5(i64* %subr, i64 %val) {
; CHECK: atom.and.b64
%ret = atomicrmw and i64* %subr, i64 %val seq_cst
ret i64 %ret
}
;; NAND not yet supported
;define i32 @atom6(i32* %subr, i32 %val) {
; %ret = atomicrmw nand i32* %subr, i32 %val seq_cst
; ret i32 %ret
;}
;define i64 @atom7(i64* %subr, i64 %val) {
; %ret = atomicrmw nand i64* %subr, i64 %val seq_cst
; ret i64 %ret
;}
; CHECK: atom8
define i32 @atom8(i32* %subr, i32 %val) {
; CHECK: atom.or.b32
%ret = atomicrmw or i32* %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK: atom9
define i64 @atom9(i64* %subr, i64 %val) {
; CHECK: atom.or.b64
%ret = atomicrmw or i64* %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK: atom10
define i32 @atom10(i32* %subr, i32 %val) {
; CHECK: atom.xor.b32
%ret = atomicrmw xor i32* %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK: atom11
define i64 @atom11(i64* %subr, i64 %val) {
; CHECK: atom.xor.b64
%ret = atomicrmw xor i64* %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK: atom12
define i32 @atom12(i32* %subr, i32 %val) {
; CHECK: atom.max.s32
%ret = atomicrmw max i32* %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK: atom13
define i64 @atom13(i64* %subr, i64 %val) {
; CHECK: atom.max.s64
%ret = atomicrmw max i64* %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK: atom14
define i32 @atom14(i32* %subr, i32 %val) {
; CHECK: atom.min.s32
%ret = atomicrmw min i32* %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK: atom15
define i64 @atom15(i64* %subr, i64 %val) {
; CHECK: atom.min.s64
%ret = atomicrmw min i64* %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK: atom16
define i32 @atom16(i32* %subr, i32 %val) {
; CHECK: atom.max.u32
%ret = atomicrmw umax i32* %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK: atom17
define i64 @atom17(i64* %subr, i64 %val) {
; CHECK: atom.max.u64
%ret = atomicrmw umax i64* %subr, i64 %val seq_cst
ret i64 %ret
}
; CHECK: atom18
define i32 @atom18(i32* %subr, i32 %val) {
; CHECK: atom.min.u32
%ret = atomicrmw umin i32* %subr, i32 %val seq_cst
ret i32 %ret
}
; CHECK: atom19
define i64 @atom19(i64* %subr, i64 %val) {
; CHECK: atom.min.u64
%ret = atomicrmw umin i64* %subr, i64 %val seq_cst
ret i64 %ret
}