OpenCL baby step using mingw

Headers & Document

Khronos OpenCL Registry

GitHub (C headers)

C++ wrapper (OpenCL 2.x)

C++ wrapper (OpenCL 1.x)

Library

Nvidia

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32\OpenCL.lib

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64\OpenCL.lib

my nvidia develop pack

Compile yourself

OpenCL-ICD-Loader

Binary

Included in AMD or Nvidia Driver

Note

  1. Nvidia only support OpenCL 1.2, AMD OpenCL 2.x
  2. No useful debugger like Nvidia Nsight. Use printf.
  3. Cross paltform intermediate language SPIR.

 Example

OpenClExample

reference

g++ -std=c++11 -I../include/ -O3 -c main.cpp
g++ -std=c++11 -L../lib/x64/ -o main.exe main.o -lopencl

main.exe kernel.cl
NVIDIA CUDA
GeForce GTX 750 Ti
hello at idx = 5
0
2
4
3
5
7
6
8
10
9

 

#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>

#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // eliminate deprecated warning
#include <CL/cl.hpp>

using namespace cl;
using namespace std;

Platform getPlatform() {
	vector<Platform> platforms;
	Platform::get( &platforms );
	if ( platforms.empty() ) {
		printf("no platform found.\n");
		exit(1);
	}
	return platforms[0];
}

Device getDevice(Platform platform) {
	vector<Device> devices;
	platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
	if ( devices.empty() ) {
		printf("no device found\n");
		exit(1);
	}
	return devices[0];
}

void readKernelFile(const string &fileName, string &kernelString) {
	ifstream ifs( fileName, ifstream::binary );
	if ( !ifs.is_open() ) {
		printf("open kernel file failed: %s\n", fileName.c_str());
		exit(1);
	}
	stringstream temp;
	temp << ifs.rdbuf();
	ifs.close();

	kernelString = temp.str();
}

int main (int argc, char *argv[]) {
	const int vecSize = 10;

	// get kernel source code
	Program::Sources sources;
	string kernelString;
	readKernelFile(argv[1], kernelString);
	sources.push_back({kernelString.c_str(), kernelString.length()});

	// get Platform
	Platform platform = getPlatform();
	cout << platform.getInfo<CL_PLATFORM_NAME>() << endl;

	// get device
	Device device = getDevice( platform );
	cout << device.getInfo<CL_DEVICE_NAME>() << endl;

	// get context
	Context context(device);

	// build kernel program
	Program program(context, sources);
	if( program.build({device}) != CL_SUCCESS ){
		cout<< " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << endl;
		exit(1);
	}

	// make kernel function
	auto vecAdd = make_kernel<Buffer&, Buffer&, Buffer&>(program, "vecAdd");

	// command queue
	CommandQueue queue(context, device);

	// buffer on host
	int A[vecSize] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
	int B[vecSize] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0};
	int C[vecSize];

	// create buffers on the device
	Buffer devA(context, CL_MEM_READ_ONLY , sizeof(int)*vecSize);
	Buffer devB(context, CL_MEM_READ_ONLY , sizeof(int)*vecSize);
	Buffer devC(context, CL_MEM_WRITE_ONLY, sizeof(int)*vecSize);

	// copy buffer form host to device
	queue.enqueueWriteBuffer(devA, CL_TRUE, 0, sizeof(int)*vecSize, A);
	queue.enqueueWriteBuffer(devB, CL_TRUE, 0, sizeof(int)*vecSize, B);

	// run kernel
	EnqueueArgs kernelDim(queue, vecSize);
	vecAdd(kernelDim, devA, devB, devC);

	// read buffer from device to host
	queue.enqueueReadBuffer(devC, CL_TRUE, 0, sizeof(int)*vecSize, C);

	// checkout
	for (int i = 0; i < vecSize; ++i) {
		printf("%d\n", C[i]);
	}

	return 0;
}
void kernel vecAdd(global const int* A, global const int* B, global int* C) {
	int idx = get_global_id(0);
	C[idx] = A[idx] + B[idx];
	
	if (idx == 5) {
		printf("hello at idx = %d\n", idx);
	}
}

 

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *