Find your OpenCL platform
The following function will locate your OpenCL platform or alert you if one cannot be found:
cl_platform_id getPlatformID()
{
cl_platform_id plat_id;
{
cl_platform_id plat_id;
cl_uint my_err = clGetPlatformIDs(1, &plat_id, NULL);
if(my_err != CL_SUCCESS)
{
printf("Could not find OpenCL platform\n");
plat_id = NULL;
}
return plat_id;
}
{
printf("Could not find OpenCL platform\n");
plat_id = NULL;
}
return plat_id;
}
Find your OpenCL supported GPU
The following function will locate your GPU or alert you if no supported GPU is found on your platform:
cl_device_id getGPUID(cl_platform_id plat_id)
{
cl_device_id dev_id; // The ID of the GPU device
cl_uint my_err = clGetDeviceIDs( // Get an openCL device
plat_id, // The platform ID
CL_DEVICE_TYPE_GPU, // Ignore CPUs, etc.
1, // Just one for now
&dev_id, // Place ID here
NULL
);
if(my_err != CL_SUCCESS){
printf("Could find supported GPU\n");
dev_id = NULL;
}
return dev_id;
}
Square each element of a 2D array with your GPU
The following program utilizes the above functions to square each element of a 2D array
int main()
{
int i,j;
int N = 10; // Array Size (NxN
int A_size = sizeof(float)*N*N; // Size in bytes
float *A = malloc(A_size); // Storage for input array
float *B = malloc(A_size); // Storage for output array
for(i=0;i<N;i++) // Initialize input array
for(j=0;j<N;j++)
A[i*N+j] = i+j;
cl_mem A_mem,B_mem; // OpenCL memory objects
cl_uint my_err; // Error catching
cl_device_id device_id; // GPU ID
cl_context context;
cl_command_queue queue;
device_id = getGPUID(getPlatformID());
if(device_id == NULL){
printf("Could not find supported platform or device.\n");
exit(1);
}
context = clCreateContext(0,1,&device_id,NULL,NULL,&my_err);
if(my_err != CL_SUCCESS){
printf("Failed to create context\n");
exit(2);
}
queue = clCreateCommandQueue(context,device_id,0,&my_err);
if(my_err != CL_SUCCESS){
printf("Failed to create command queue\n");
exit(3);
}
A_mem = clCreateBuffer(context, CL_MEM_READ_WRITE,A_size, NULL, &my_err);
if(my_err != CL_SUCCESS){
printf("Failed to create buffer\n");
exit(4);
}
// Load Input Array onto GPU
my_err = clEnqueueWriteBuffer(queue, A_mem, CL_TRUE, 0,A_size, (void *)A,0, NULL, NULL);
if(my_err != CL_SUCCESS){
printf("Failed to enqueue write buffer\n");
exit(5);
}
// Program to run on the GPU
char *program_source = "__kernel void square( __global float * A, int N){" \
"int id0=get_global_id(0);" \
"int id1=get_global_id(1);" \
"A[id0*N+id1] *= A[id0*N+id1];" \
"}\0";
cl_program program = clCreateProgramWithSource(context, 1, (const char**)&program_source,NULL, &my_err);
if(my_err != CL_SUCCESS){
printf("Failed to create program\n");
exit(5);
}
my_err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if(my_err != CL_SUCCESS){
printf("Failed to build program\n");
int build_log_size = sizeof(char)*10000;
char * build_log = malloc(build_log_size);
clGetProgramBuildInfo(program,device_id,CL_PROGRAM_BUILD_LOG,build_log_size,build_log,NULL);
printf("%s\n",build_log);
exit(5);
}
cl_kernel kernel = clCreateKernel(program, "square", &my_err);
if(my_err != CL_SUCCESS){
printf("Failed to create kernel\n");
exit(5);
}
my_err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &A_mem);
my_err |= clSetKernelArg(kernel, 1, sizeof(int), &N);
if(my_err != CL_SUCCESS){
printf("Failed to set kernel args\n");
exit(5);
}
// Wait until The GPU data is loaded
clFinish(queue);
// Queue the GPU program
my_err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL,&N, NULL, 0, NULL, NULL);
if(my_err != CL_SUCCESS){
printf("Failed to enqueue kernel\n");
exit(5);
}
// Wait until the GPU program finishes
clFinish(queue);
// Read data from GPU
my_err = clEnqueueReadBuffer(
queue, A_mem, CL_TRUE, 0,
A_size, B, 0, NULL, NULL
);
if(my_err != CL_SUCCESS){
printf("Failed to enqueue read buffer\n");
exit(6);
}
clFinish(queue);
// Print output
for(i=0;i<N;i++){
for(j=0;j<N;j++)
printf("%f ",B[i*N+j]);
printf("\n");
}
// Destory Everything
clReleaseMemObject(A_mem);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
Nature is Infinite in its Intricacy |