Headers & Document
Library
Nvidia
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32\OpenCL.lib
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64\OpenCL.lib
Compile yourself
Binary
Included in AMD or Nvidia Driver
Note
- Nvidia only support OpenCL 1.2, AMD OpenCL 2.x
- No useful debugger like Nvidia Nsight. Use printf.
- Cross paltform intermediate language SPIR.
Example
g++ -std=c++11 -I../include/ -O3 -c main.cpp g++ -std=c++11 -L../lib/x64/ -o main.exe main.o -lopencl main.exe kernel.cl NVIDIA CUDA GeForce GTX 750 Ti hello at idx = 5 0 2 4 3 5 7 6 8 10 9
#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // eliminate deprecated warning
#include <CL/cl.hpp>
using namespace cl;
using namespace std;
Platform getPlatform() {
vector<Platform> platforms;
Platform::get( &platforms );
if ( platforms.empty() ) {
printf("no platform found.\n");
exit(1);
}
return platforms[0];
}
Device getDevice(Platform platform) {
vector<Device> devices;
platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
if ( devices.empty() ) {
printf("no device found\n");
exit(1);
}
return devices[0];
}
void readKernelFile(const string &fileName, string &kernelString) {
ifstream ifs( fileName, ifstream::binary );
if ( !ifs.is_open() ) {
printf("open kernel file failed: %s\n", fileName.c_str());
exit(1);
}
stringstream temp;
temp << ifs.rdbuf();
ifs.close();
kernelString = temp.str();
}
int main (int argc, char *argv[]) {
const int vecSize = 10;
// get kernel source code
Program::Sources sources;
string kernelString;
readKernelFile(argv[1], kernelString);
sources.push_back({kernelString.c_str(), kernelString.length()});
// get Platform
Platform platform = getPlatform();
cout << platform.getInfo<CL_PLATFORM_NAME>() << endl;
// get device
Device device = getDevice( platform );
cout << device.getInfo<CL_DEVICE_NAME>() << endl;
// get context
Context context(device);
// build kernel program
Program program(context, sources);
if( program.build({device}) != CL_SUCCESS ){
cout<< " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << endl;
exit(1);
}
// make kernel function
auto vecAdd = make_kernel<Buffer&, Buffer&, Buffer&>(program, "vecAdd");
// command queue
CommandQueue queue(context, device);
// buffer on host
int A[vecSize] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
int B[vecSize] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0};
int C[vecSize];
// create buffers on the device
Buffer devA(context, CL_MEM_READ_ONLY , sizeof(int)*vecSize);
Buffer devB(context, CL_MEM_READ_ONLY , sizeof(int)*vecSize);
Buffer devC(context, CL_MEM_WRITE_ONLY, sizeof(int)*vecSize);
// copy buffer form host to device
queue.enqueueWriteBuffer(devA, CL_TRUE, 0, sizeof(int)*vecSize, A);
queue.enqueueWriteBuffer(devB, CL_TRUE, 0, sizeof(int)*vecSize, B);
// run kernel
EnqueueArgs kernelDim(queue, vecSize);
vecAdd(kernelDim, devA, devB, devC);
// read buffer from device to host
queue.enqueueReadBuffer(devC, CL_TRUE, 0, sizeof(int)*vecSize, C);
// checkout
for (int i = 0; i < vecSize; ++i) {
printf("%d\n", C[i]);
}
return 0;
}
void kernel vecAdd(global const int* A, global const int* B, global int* C) {
int idx = get_global_id(0);
C[idx] = A[idx] + B[idx];
if (idx == 5) {
printf("hello at idx = %d\n", idx);
}
}
