vec_io can also take binary!

This commit is contained in:
Ciro Santilli 2017-04-19 09:17:33 +01:00
parent 5e2bd82d1c
commit a2af3f8204
4 changed files with 94 additions and 54 deletions

1
.gitignore vendored
View File

@ -26,6 +26,7 @@ main
*.tmp
tmp.*
tmp
*.bak
core
core.*

View File

@ -1,4 +1,5 @@
/*Let's play with clCreateProgramWithBinary.
/*
Let's play with clCreateProgramWithBinary.
Compile the "inc.cl" CL C shader to binary, save the binary to a file, and load the shader from the binary:
@ -17,22 +18,15 @@ just dumps PTX human readable assembly, which we can modify without
reverse engineering. Hurray!
*/
#define BIN_PATH __FILE__ ".bin.tmp"
#include "common.h"
int main(int argc, char **argv) {
Common common;
FILE *f;
char *binary, *source_path;
cl_int input[] = {1, 2}, errcode_ret, binary_status;
cl_kernel kernel;
char *source_path;
cl_int input[] = {1, 2};
cl_mem buffer;
cl_program program;
const size_t global_work_size = sizeof(input) / sizeof(input[0]);
int use_cache;
long lenght;
size_t binary_size;
if (argc > 1) {
use_cache = !strcmp(argv[1], "0");
@ -45,37 +39,13 @@ int main(int argc, char **argv) {
source_path = "inc.cl";
}
/* Get the binary, and create a kernel with it. */
if (use_cache) {
common_init(&common, NULL);
binary = common_read_file(BIN_PATH, &lenght);
binary_size = lenght;
} else {
common_init_file(&common, source_path);
clGetProgramInfo(common.program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_size, NULL);
binary = malloc(binary_size);
clGetProgramInfo(common.program, CL_PROGRAM_BINARIES, binary_size, &binary, NULL);
f = fopen(BIN_PATH, "w");
fwrite(binary, binary_size, 1, f);
fclose(f);
}
program = clCreateProgramWithBinary(
common.context, 1, &common.device, &binary_size,
(const unsigned char **)&binary, &binary_status, &errcode_ret
);
assert(NULL != program);
common_assert_success(binary_status);
common_assert_success(errcode_ret);
free(binary);
common_build_program(&common, NULL, &program);
kernel = clCreateKernel(program, "kmain", &errcode_ret);
assert(NULL != kernel);
common_assert_success(errcode_ret);
/* Create binary or kernel from binary. */
common_create_kernel_or_use_cache(&common, use_cache, source_path, __FILE__ ".bin.tmp");
/* Run the kernel created from the binary. */
buffer = clCreateBuffer(common.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(input), input, NULL);
clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
clEnqueueNDRangeKernel(common.command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
clSetKernelArg(common.kernel, 0, sizeof(buffer), &buffer);
clEnqueueNDRangeKernel(common.command_queue, common.kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
clFlush(common.command_queue);
clFinish(common.command_queue);
clEnqueueReadBuffer(common.command_queue, buffer, CL_TRUE, 0, sizeof(input), input, 0, NULL, NULL);
@ -85,8 +55,6 @@ int main(int argc, char **argv) {
assert(input[1] == 3);
/* Cleanup. */
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseMemObject(buffer);
common_deinit(&common);
return EXIT_SUCCESS;

View File

@ -112,6 +112,9 @@ void common_create_kernel_file(
free(source);
}
/*
* @param[in] source: if NULL, kernel and program are left to NULL on common
**/
void common_init_options(
Common *common,
const char *source,
@ -151,22 +154,71 @@ void common_init_file(
common_init_file_options(common, source_path, "");
}
void common_deinit_kernel_and_program(cl_kernel kernel, cl_program program) {
if (NULL != kernel) {
clReleaseKernel(kernel);
}
if (NULL != program) {
clReleaseProgram(program);
}
}
void common_deinit(
Common *common
) {
clReleaseCommandQueue(common->command_queue);
clReleaseProgram(common->program);
if (NULL != common->kernel) {
clReleaseKernel(common->kernel);
}
if (NULL != common->context) {
clReleaseContext(common->context);
}
common_deinit_kernel_and_program(common->kernel, common->program);
clReleaseContext(common->context);
#ifdef CL_1_2
clReleaseDevice(common->device);
#endif
}
void common_create_kernel_or_use_cache(
Common *common,
int use_cache,
char *source_path,
char *bin_path
) {
FILE *f;
char *binary;
cl_int errcode_ret, binary_status;
cl_kernel kernel;
cl_program program;
long length;
size_t binary_size;
if (use_cache) {
common_init(common, NULL);
binary = common_read_file(bin_path, &length);
binary_size = length;
} else {
common_init_file(common, source_path);
clGetProgramInfo(common->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_size, NULL);
binary = malloc(binary_size);
clGetProgramInfo(common->program, CL_PROGRAM_BINARIES, binary_size, &binary, NULL);
f = fopen(bin_path, "w");
fwrite(binary, binary_size, 1, f);
fclose(f);
}
program = clCreateProgramWithBinary(
common->context, 1, &common->device, &binary_size,
(const unsigned char **)&binary, &binary_status, &errcode_ret
);
assert(NULL != program);
common_assert_success(binary_status);
common_assert_success(errcode_ret);
free(binary);
common_build_program(common, NULL, &program);
kernel = clCreateKernel(program, "kmain", &errcode_ret);
assert(NULL != kernel);
common_assert_success(errcode_ret);
/* Cleanup kernel and program if they were created from source. */
common_deinit_kernel_and_program(common->kernel, common->program);
common->kernel = kernel;
common->program = program;
}
double common_get_nanos(void) {
struct timespec ts;
timespec_get(&ts, TIME_UTC);

View File

@ -1,9 +1,17 @@
/*
Process an arbitrary input vector with a given shader and print output to stdout.
Sample usage:
echo '1 2 3' | tr ' ' '\n' >vec_io.vec
./prog vec_io.cl vec_io.vec
Output:
2.000000e+00
3.000000e+00
4.000000e+00
Or you can use the default kernel and stdin input:
echo '1 2 3' | tr ' ' '\n' | ./prog
@ -12,6 +20,13 @@ Set global work size and work group size different than defaults (n and 1):
./prog -g 10 -l 5 vec_io.cl vec_io.vec
Generate a binary shader , and then use it (clCreateProgramWithBinary) instead of the CL C:
./prog vec_io.cl vec_io.vec
./prog -b vec_io.c.bin.tmp vec_io.vec
This allows you to modify the binary shader while reverse engineer it.
Generic boilerplate that:
- takes a vector as input either from stdin or from a file, one per line
@ -28,21 +43,25 @@ to parse clinfo and get those values out... hmmm...
#include "common.h"
int main(int argc, char **argv) {
char *cl_source_path;
char *source_path;
cl_float *io;
cl_mem buffer;
Common common;
FILE *input_vector_file;
float f;
int a, global_work_size_given;
int a, global_work_size_given, use_cache;
size_t i, global_work_size, local_work_size, n, nmax, io_sizeof;
/* Treat CLI arguments. */
global_work_size_given = 0;
local_work_size = 1;
use_cache = 0;
for (a = 1; a < argc; ++a) {
if (argv[a][0] == '-') {
switch(argv[a][1]) {
case 'b':
use_cache = 1;
break;
case 'g':
a++;
global_work_size = strtoul(argv[a], NULL, 10);
@ -58,9 +77,9 @@ int main(int argc, char **argv) {
}
}
if (argc > a) {
cl_source_path = argv[a];
source_path = argv[a];
} else {
cl_source_path = "vec_io.cl";
source_path = "vec_io.cl";
}
a++;
if (argc > a) {
@ -86,8 +105,8 @@ int main(int argc, char **argv) {
global_work_size = n;
}
/* Run kernel. */
common_init_file(&common, cl_source_path);
/* Create kernel. */
common_create_kernel_or_use_cache(&common, use_cache, source_path, __FILE__ ".bin.tmp");
buffer = clCreateBuffer(common.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, io_sizeof, io, NULL);
clSetKernelArg(common.kernel, 0, sizeof(buffer), &buffer);
clEnqueueNDRangeKernel(common.command_queue, common.kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
@ -97,7 +116,7 @@ int main(int argc, char **argv) {
/* Print result. */
for (i = 0; i < n; ++i) {
printf("%f\n", io[i]);
printf("%.6e\n", io[i]);
}
/* Cleanup. */