mirror of
https://github.com/libretro/cpp-cheat.git
synced 2025-04-04 03:51:34 +00:00
58 lines
1.7 KiB
C
58 lines
1.7 KiB
C
/*
|
|
Increment a vector, one value per work item.
|
|
|
|
It is useless to do this on a GPU, not enough work per IO,
|
|
it is just a clEnqueueNDRangeKernel + get_global_id hello world.
|
|
|
|
- http://stackoverflow.com/questions/15194798/vector-step-addition-slower-on-cuda
|
|
- http://stackoverflow.com/questions/22005405/how-to-add-up-the-elements-of-an-array-in-gpu-any-function-similar-to-cublasdas
|
|
- http://stackoverflow.com/questions/15161575/reduction-for-sum-of-vector-when-size-is-not-power-of-2
|
|
*/
|
|
|
|
#include "common.h"
|
|
|
|
int main(int argc, char **argv) {
|
|
const char *source =
|
|
"__kernel void kmain(__global int *io) {\n"
|
|
" io[get_global_id(0)]++;\n"
|
|
"}\n";
|
|
cl_int *io, *expected_output;
|
|
cl_mem buffer;
|
|
Common common;
|
|
size_t i, n, io_sizeof;
|
|
|
|
if (argc > 1) {
|
|
n = strtoul(argv[1], NULL, 10);
|
|
} else {
|
|
n = 2;
|
|
}
|
|
|
|
/* Initialize data. */
|
|
io_sizeof = n * sizeof(*io);
|
|
io = malloc(io_sizeof);
|
|
expected_output = malloc(n * sizeof(*expected_output));
|
|
for (i = 0; i < n; ++i) {
|
|
io[i] = i;
|
|
expected_output[i] = i + 1;
|
|
}
|
|
|
|
/* Run kernel. */
|
|
common_init(&common, source);
|
|
buffer = clCreateBuffer(common.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, io_sizeof, io, NULL);
|
|
clSetKernelArg(common.kernel, 0, sizeof(buffer), &buffer);
|
|
clEnqueueNDRangeKernel(common.command_queue, common.kernel, 1, NULL, &n, NULL, 0, NULL, NULL);
|
|
clFlush(common.command_queue);
|
|
clFinish(common.command_queue);
|
|
clEnqueueReadBuffer(common.command_queue, buffer, CL_TRUE, 0, io_sizeof, io, 0, NULL, NULL);
|
|
|
|
/* Assertions. */
|
|
common_vec_assert_eq_i(io, expected_output, n);
|
|
|
|
/* Cleanup. */
|
|
clReleaseMemObject(buffer);
|
|
common_deinit(&common);
|
|
free(io);
|
|
free(expected_output);
|
|
return EXIT_SUCCESS;
|
|
}
|