llvm/test/CodeGen/NVPTX/vec-param-load.ll
Artem Belevich b1a24afa7a [NVPTX] Unify vectorization of load/stores of aggregate arguments and return values.
Original code only used vector loads/stores for explicit vector arguments.
It could also do more loads/stores than necessary (e.g v5f32 would
touch 8 f32 values). Aggregate types were loaded one element at a time,
even the vectors contained within.

This change attempts to generalize (and simplify) parameter space
loads/stores so that vector loads/stores can be used more broadly.
Functionality of the patch has been verified by compiling thrust
test suite and manually checking the differences between PTX
generated by llvm with and without the patch.

General algorithm:
* ComputePTXValueVTs() flattens input/output argument into a flat list
  of scalars to load/store and returns their types and offsets.
* VectorizePTXValueVTs() uses that data to create vectorization plan
  which returns an array of flags marking boundaries of vectorized
  load/stores. Scalars are represented as 1-element vectors.
* Code that generates loads/stores implements a simple state machine
  that constructs a vector according to the plan.

Differential Revision: https://reviews.llvm.org/D30011

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295784 91177308-0d34-0410-b5e6-96231b3b80d8
2017-02-21 22:56:05 +00:00

83 lines
3.7 KiB
LLVM

; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
define <16 x float> @test_v16f32(<16 x float> %a) {
; CHECK-LABEL: test_v16f32(
; CHECK-DAG: ld.param.v4.f32 {[[V_12_15:(%f[0-9]+[, ]*){4}]]}, [test_v16f32_param_0+48];
; CHECK-DAG: ld.param.v4.f32 {[[V_8_11:(%f[0-9]+[, ]*){4}]]}, [test_v16f32_param_0+32];
; CHECK-DAG: ld.param.v4.f32 {[[V_4_7:(%f[0-9]+[, ]*){4}]]}, [test_v16f32_param_0+16];
; CHECK-DAG: ld.param.v4.f32 {[[V_0_3:(%f[0-9]+[, ]*){4}]]}, [test_v16f32_param_0];
; CHECK-DAG: st.param.v4.f32 [func_retval0+0], {[[V_0_3]]}
; CHECK-DAG: st.param.v4.f32 [func_retval0+16], {[[V_4_7]]}
; CHECK-DAG: st.param.v4.f32 [func_retval0+32], {[[V_8_11]]}
; CHECK-DAG: st.param.v4.f32 [func_retval0+48], {[[V_12_15]]}
; CHECK: ret;
ret <16 x float> %a
}
define <8 x float> @test_v8f32(<8 x float> %a) {
; CHECK-LABEL: test_v8f32(
; CHECK-DAG: ld.param.v4.f32 {[[V_4_7:(%f[0-9]+[, ]*){4}]]}, [test_v8f32_param_0+16];
; CHECK-DAG: ld.param.v4.f32 {[[V_0_3:(%f[0-9]+[, ]*){4}]]}, [test_v8f32_param_0];
; CHECK-DAG: st.param.v4.f32 [func_retval0+0], {[[V_0_3]]}
; CHECK-DAG: st.param.v4.f32 [func_retval0+16], {[[V_4_7]]}
; CHECK: ret;
ret <8 x float> %a
}
define <4 x float> @test_v4f32(<4 x float> %a) {
; CHECK-LABEL: test_v4f32(
; CHECK-DAG: ld.param.v4.f32 {[[V_0_3:(%f[0-9]+[, ]*){4}]]}, [test_v4f32_param_0];
; CHECK-DAG: st.param.v4.f32 [func_retval0+0], {[[V_0_3]]}
; CHECK: ret;
ret <4 x float> %a
}
define <2 x float> @test_v2f32(<2 x float> %a) {
; CHECK-LABEL: test_v2f32(
; CHECK-DAG: ld.param.v2.f32 {[[V_0_3:(%f[0-9]+[, ]*){2}]]}, [test_v2f32_param_0];
; CHECK-DAG: st.param.v2.f32 [func_retval0+0], {[[V_0_3]]}
; CHECK: ret;
ret <2 x float> %a
}
; Oddly shaped vectors should not load any extra elements.
define <3 x float> @test_v3f32(<3 x float> %a) {
; CHECK-LABEL: test_v3f32(
; CHECK-DAG: ld.param.f32 [[V_2:%f[0-9]+]], [test_v3f32_param_0+8];
; CHECK-DAG: ld.param.v2.f32 {[[V_0_1:(%f[0-9]+[, ]*){2}]]}, [test_v3f32_param_0];
; CHECK-DAG: st.param.v2.f32 [func_retval0+0], {[[V_0_1]]}
; CHECK-DAG: st.param.f32 [func_retval0+8], [[V_2]]
; CHECK: ret;
ret <3 x float> %a
}
define <8 x i64> @test_v8i64(<8 x i64> %a) {
; CHECK-LABEL: test_v8i64(
; CHECK-DAG: ld.param.v2.u64 {[[V_6_7:(%rd[0-9]+[, ]*){2}]]}, [test_v8i64_param_0+48];
; CHECK-DAG: ld.param.v2.u64 {[[V_4_5:(%rd[0-9]+[, ]*){2}]]}, [test_v8i64_param_0+32];
; CHECK-DAG: ld.param.v2.u64 {[[V_2_3:(%rd[0-9]+[, ]*){2}]]}, [test_v8i64_param_0+16];
; CHECK-DAG: ld.param.v2.u64 {[[V_0_1:(%rd[0-9]+[, ]*){2}]]}, [test_v8i64_param_0];
; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[V_0_1]]}
; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[V_2_3]]}
; CHECK-DAG: st.param.v2.b64 [func_retval0+32], {[[V_4_5]]}
; CHECK-DAG: st.param.v2.b64 [func_retval0+48], {[[V_6_7]]}
; CHECK: ret;
ret <8 x i64> %a
}
define <16 x i16> @test_v16i16(<16 x i16> %a) {
; CHECK-LABEL: test_v16i16(
; CHECK-DAG: ld.param.v4.u16 {[[V_12_15:(%rs[0-9]+[, ]*){4}]]}, [test_v16i16_param_0+24];
; CHECK-DAG: ld.param.v4.u16 {[[V_8_11:(%rs[0-9]+[, ]*){4}]]}, [test_v16i16_param_0+16];
; CHECK-DAG: ld.param.v4.u16 {[[V_4_7:(%rs[0-9]+[, ]*){4}]]}, [test_v16i16_param_0+8];
; CHECK-DAG: ld.param.v4.u16 {[[V_0_3:(%rs[0-9]+[, ]*){4}]]}, [test_v16i16_param_0];
; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[V_0_3]]}
; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[V_4_7]]}
; CHECK-DAG: st.param.v4.b16 [func_retval0+16], {[[V_8_11]]}
; CHECK-DAG: st.param.v4.b16 [func_retval0+24], {[[V_12_15]]}
; CHECK: ret;
ret <16 x i16> %a
}