2017-09-16 12:58:36 +01:00

144 lines
3.9 KiB
C

/*
Process an arbitrary input vector with a given shader and print output to stdout.
Sample usage:
echo '1 2 3' | tr ' ' '\n' >vec_io.vec
./prog vec_io.cl vec_io.vec
Output:
2.000000e+00
3.000000e+00
4.000000e+00
Or you can use the default kernel and stdin input:
echo '1 2 3' | tr ' ' '\n' | ./prog
Multi vector operations can be done by reusing parts of the vector as in `+=`, e.g.:
echo '1 2 3 4' | tr ' ' '\n' | ./vec_io.out vec_io_sum.cl
gives:
4.000000e+00
6.000000e+00
3.000000e+00
4.000000e+00
which represents:
(1, 2) += (3, 4) == (4, 6)
Set global work size and work group size different than defaults (n and 1):
./prog -g 10 -l 5 vec_io.cl vec_io.vec
Generate a binary shader , and then use it (clCreateProgramWithBinary) instead of the CL C:
./prog vec_io.cl vec_io.vec
./prog -b vec_io.c.bin.tmp vec_io.vec
This allows you to modify the binary shader while reverse engineer it.
Generic boilerplate that:
- takes a vector as input either from stdin or from a file, one per line
- processes it with a kernel read from a file, one vector item per work item (configurable with options)
- produces as output a vector of the same size to stdout
This allows you to quickly play with different kernels without recompiling the C code.
But is unsuitable for real applications, which require querying the CL implementation
for limits, specially work group and memory maximum sizes. Although you could use a script
to parse clinfo and get those values out... hmmm...
*/
#include "common.h"
int main(int argc, char **argv) {
char *source_path;
cl_float *io;
cl_mem buffer;
Common common;
FILE *input_vector_file;
float f;
int a, global_work_size_given, use_cache;
size_t i, global_work_size, local_work_size, n, nmax, io_sizeof;
/* Treat CLI arguments. */
global_work_size_given = 0;
local_work_size = 1;
use_cache = 0;
for (a = 1; a < argc; ++a) {
if (argv[a][0] == '-') {
switch(argv[a][1]) {
case 'b':
use_cache = 1;
break;
case 'g':
a++;
global_work_size = strtoul(argv[a], NULL, 10);
global_work_size_given = 1;
break;
case 'l':
a++;
local_work_size = strtoul(argv[a], NULL, 10);
break;
}
} else {
break;
}
}
if (argc > a) {
source_path = argv[a];
} else {
source_path = (char *)"vec_io.cl";
}
a++;
if (argc > a) {
input_vector_file = fopen(argv[a], "r");
} else {
input_vector_file = stdin;
}
/* Initialize data. */
n = 0;
nmax = n + 1;
io = (cl_float *)malloc(nmax * sizeof(*io));
while(fscanf(input_vector_file, "%f", &f) != EOF) {
io[n] = f;
n++;
if (n == nmax) {
nmax *= 2;
io = (cl_float *)realloc(io, nmax * sizeof(*io));
}
}
io_sizeof = n * sizeof(*io);
if (!global_work_size_given) {
global_work_size = n;
}
/* Create kernel. */
common_create_kernel_or_use_cache(&common, use_cache, source_path, (char *)(__FILE__ ".bin.tmp"));
buffer = clCreateBuffer(common.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, io_sizeof, io, NULL);
clSetKernelArg(common.kernel, 0, sizeof(buffer), &buffer);
clEnqueueNDRangeKernel(common.command_queue, common.kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
clFlush(common.command_queue);
clFinish(common.command_queue);
clEnqueueReadBuffer(common.command_queue, buffer, CL_TRUE, 0, io_sizeof, io, 0, NULL, NULL);
/* Print result. */
for (i = 0; i < n; ++i) {
printf("%.6e\n", io[i]);
}
/* Cleanup. */
clReleaseMemObject(buffer);
common_deinit(&common);
free(io);
fclose(input_vector_file);
return EXIT_SUCCESS;
}