Rename kernels to kmain since main forbidden...

This commit is contained in:
Ciro Santilli 2017-04-06 09:56:21 +01:00
parent a13d85edf0
commit d9573ed19f
15 changed files with 20 additions and 17 deletions

View File

@ -71,7 +71,7 @@ void common_create_kernel(
) {
if (NULL != source) {
common_create_program(common, source, options, &common->program);
common->kernel = clCreateKernel(common->program, "main", NULL);
common->kernel = clCreateKernel(common->program, "mymain", NULL);
} else {
common->kernel = NULL;
common->program = NULL;

View File

@ -19,7 +19,10 @@ int main(void) {
const char *source =
/* kernel pointer arguments must be __global, __constant, or __local. */
/* https://www.khronos.org/registry/cl/sdk/2.1/docs/man/xhtml/restrictions.html */
"__kernel void main(__global int *out) {\n"
/**/
/* Kernel functions cannot be called main... NVIDIA's compiler may allow that
* but others don't, so don't do it. */
"__kernel void kmain(__global int *out) {\n"
" out[0]++;\n"
"}\n";
cl_command_queue command_queue;
@ -37,7 +40,7 @@ int main(void) {
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
program = clCreateProgramWithSource(context, 1, &source, NULL, NULL);
clBuildProgram(program, 1, &device, "", NULL, NULL);
kernel = clCreateKernel(program, "main", NULL);
kernel = clCreateKernel(program, "kmain", NULL);
buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int), &input, NULL);
clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
command_queue = clCreateCommandQueue(context, device, 0, NULL);

View File

@ -13,7 +13,7 @@ it is just a clEnqueueNDRangeKernel + get_global_id hello world.
int main(int argc, char **argv) {
const char *source =
"__kernel void main(__global int *io) {\n"
"__kernel void kmain(__global int *io) {\n"
" io[get_global_id(0)]++;\n"
"}\n";
cl_int *io, *expected_output;

View File

@ -3,7 +3,7 @@ API exercise, increment a vector with less global work groups than integers,
which forces us to put a for loop in the kernel.
I don't think we can get the size of each global work group from the kernel,
so we just calculate it on CPU ans pass a sa parameter.
so we just calculate it on CPU and pass as a parameter.
This is how the work will be split:
@ -17,7 +17,7 @@ This is how the work will be split:
int main(void) {
const char *source =
"__kernel void main(uint group_nlems, __global int *out) {\n"
"__kernel void kmain(uint group_nlems, __global int *out) {\n"
" uint i_min = get_global_id(0) * group_nlems;\n"
" uint i_max = i_min + group_nlems;\n"
" for (uint i = i_min; i < i_max; ++i) {\n"

View File

@ -1,4 +1,4 @@
__kernel void main(
__kernel void kmain(
__global float *A,
__global float *B,
__global float *C,

View File

@ -1,4 +1,4 @@
__kernel void main(
__kernel void kmain(
__global const float* restrict A,
__global const float* restrict B,
__global float* restrict C,

View File

@ -1,4 +1,4 @@
__kernel void main(
__kernel void kmain(
__global float* A,
__global float* B,
__global float* C,

View File

@ -1,4 +1,4 @@
__kernel void main(
__kernel void kmain(
__global float* A,
__global float* B,
__global float* C,

View File

@ -1,4 +1,4 @@
__kernel void main(
__kernel void kmain(
__global float* A,
__global float* B,
__global float* C,

View File

@ -1,4 +1,4 @@
__kernel void main(
__kernel void kmain(
__global float* A,
__global float* B,
__global float* C,

View File

@ -13,7 +13,7 @@ In practice, this is often used to pass problem size parameters to the kernel.
int main(void) {
const char *source =
"__kernel void main(int in, __global int *out) {\n"
"__kernel void kmain(int in, __global int *out) {\n"
" out[0] = in + 1;\n"
"}\n";
cl_int in = 1, out;

View File

@ -12,7 +12,7 @@ but this is a bit nicer.
int main(void) {
char options[256];
const char *source =
"__kernel void main(__global int *out) {\n"
"__kernel void kmain(__global int *out) {\n"
" out[0] = X;\n"
"}\n";
cl_int io[] = {0};

View File

@ -12,7 +12,7 @@ TODO: is using them faster than scalars?
int main(void) {
const char *source =
"__kernel void main(__global int2 *out) {\n"
"__kernel void kmain(__global int2 *out) {\n"
" out[get_global_id(0)]++;\n"
"}\n";
cl_int input[] = {0, 1, 2, 3};

View File

@ -1,4 +1,4 @@
__kernel void main(__global uint *out) {
__kernel void kmain(__global uint *out) {
out[0] = get_work_dim();
/* Total number of work items that will run this kernel, not the size of each one...

View File

@ -8,7 +8,7 @@ Allows us to reuse an existing device buffer and kernel, possibly saving allocat
int main(void) {
const char *source =
"__kernel void main(__global int *io) {\n"
"__kernel void kmain(__global int *io) {\n"
" io[get_global_id(0)]++;\n"
"}\n";
cl_int io[2];