mirror of
https://github.com/libretro/cpp-cheat.git
synced 2025-04-06 13:01:34 +00:00
144 lines
3.9 KiB
C
144 lines
3.9 KiB
C
/*
|
|
Process an arbitrary input vector with a given shader and print output to stdout.
|
|
|
|
Sample usage:
|
|
|
|
echo '1 2 3' | tr ' ' '\n' >vec_io.vec
|
|
./prog vec_io.cl vec_io.vec
|
|
|
|
Output:
|
|
|
|
2.000000e+00
|
|
3.000000e+00
|
|
4.000000e+00
|
|
|
|
Or you can use the default kernel and stdin input:
|
|
|
|
echo '1 2 3' | tr ' ' '\n' | ./prog
|
|
|
|
Multi vector operations can be done by reusing parts of the vector as in `+=`, e.g.:
|
|
|
|
echo '1 2 3 4' | tr ' ' '\n' | ./vec_io.out vec_io_sum.cl
|
|
|
|
gives:
|
|
|
|
4.000000e+00
|
|
6.000000e+00
|
|
3.000000e+00
|
|
4.000000e+00
|
|
|
|
which represents:
|
|
|
|
(1, 2) += (3, 4) == (4, 6)
|
|
|
|
Set global work size and work group size different than defaults (n and 1):
|
|
|
|
./prog -g 10 -l 5 vec_io.cl vec_io.vec
|
|
|
|
Generate a binary shader , and then use it (clCreateProgramWithBinary) instead of the CL C:
|
|
|
|
./prog vec_io.cl vec_io.vec
|
|
./prog -b vec_io.c.bin.tmp vec_io.vec
|
|
|
|
This allows you to modify the binary shader while reverse engineer it.
|
|
|
|
Generic boilerplate that:
|
|
|
|
- takes a vector as input either from stdin or from a file, one per line
|
|
- processes it with a kernel read from a file, one vector item per work item (configurable with options)
|
|
- produces as output a vector of the same size to stdout
|
|
|
|
This allows you to quickly play with different kernels without recompiling the C code.
|
|
|
|
But is unsuitable for real applications, which require querying the CL implementation
|
|
for limits, specially work group and memory maximum sizes. Although you could use a script
|
|
to parse clinfo and get those values out... hmmm...
|
|
*/
|
|
|
|
#include "common.h"
|
|
|
|
int main(int argc, char **argv) {
|
|
char *source_path;
|
|
cl_float *io;
|
|
cl_mem buffer;
|
|
Common common;
|
|
FILE *input_vector_file;
|
|
float f;
|
|
int a, global_work_size_given, use_cache;
|
|
size_t i, global_work_size, local_work_size, n, nmax, io_sizeof;
|
|
|
|
/* Treat CLI arguments. */
|
|
global_work_size_given = 0;
|
|
local_work_size = 1;
|
|
use_cache = 0;
|
|
for (a = 1; a < argc; ++a) {
|
|
if (argv[a][0] == '-') {
|
|
switch(argv[a][1]) {
|
|
case 'b':
|
|
use_cache = 1;
|
|
break;
|
|
case 'g':
|
|
a++;
|
|
global_work_size = strtoul(argv[a], NULL, 10);
|
|
global_work_size_given = 1;
|
|
break;
|
|
case 'l':
|
|
a++;
|
|
local_work_size = strtoul(argv[a], NULL, 10);
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
if (argc > a) {
|
|
source_path = argv[a];
|
|
} else {
|
|
source_path = (char *)"vec_io.cl";
|
|
}
|
|
a++;
|
|
if (argc > a) {
|
|
input_vector_file = fopen(argv[a], "r");
|
|
} else {
|
|
input_vector_file = stdin;
|
|
}
|
|
|
|
/* Initialize data. */
|
|
n = 0;
|
|
nmax = n + 1;
|
|
io = (cl_float *)malloc(nmax * sizeof(*io));
|
|
while(fscanf(input_vector_file, "%f", &f) != EOF) {
|
|
io[n] = f;
|
|
n++;
|
|
if (n == nmax) {
|
|
nmax *= 2;
|
|
io = (cl_float *)realloc(io, nmax * sizeof(*io));
|
|
}
|
|
}
|
|
io_sizeof = n * sizeof(*io);
|
|
if (!global_work_size_given) {
|
|
global_work_size = n;
|
|
}
|
|
|
|
/* Create kernel. */
|
|
common_create_kernel_or_use_cache(&common, use_cache, source_path, (char *)(__FILE__ ".bin.tmp"));
|
|
buffer = clCreateBuffer(common.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, io_sizeof, io, NULL);
|
|
clSetKernelArg(common.kernel, 0, sizeof(buffer), &buffer);
|
|
clEnqueueNDRangeKernel(common.command_queue, common.kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
|
|
clFlush(common.command_queue);
|
|
clFinish(common.command_queue);
|
|
clEnqueueReadBuffer(common.command_queue, buffer, CL_TRUE, 0, io_sizeof, io, 0, NULL, NULL);
|
|
|
|
/* Print result. */
|
|
for (i = 0; i < n; ++i) {
|
|
printf("%.6e\n", io[i]);
|
|
}
|
|
|
|
/* Cleanup. */
|
|
clReleaseMemObject(buffer);
|
|
common_deinit(&common);
|
|
free(io);
|
|
fclose(input_vector_file);
|
|
return EXIT_SUCCESS;
|
|
}
|