OpenCL baby step using mingw

Headers & Document

Library

Nvidia

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\Win32\OpenCL.lib

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\lib\x64\OpenCL.lib

my nvidia develop pack

Compile yourself

OpenCL-ICD-Loader

Binary

Included in AMD or Nvidia Driver

Note

Nvidia only support OpenCL 1.2, AMD OpenCL 2.x
No useful debugger like Nvidia Nsight. Use printf.
Cross paltform intermediate language SPIR.

Example

OpenClExample

reference

g++ -std=c++11 -I../include/ -O3 -c main.cpp
g++ -std=c++11 -L../lib/x64/ -o main.exe main.o -lopencl

main.exe kernel.cl
NVIDIA CUDA
GeForce GTX 750 Ti
hello at idx = 5
0
2
4
3
5
7
6
8
10
9

#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>

#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // eliminate deprecated warning
#include <CL/cl.hpp>

using namespace cl;
using namespace std;

Platform getPlatform() {
	vector<Platform> platforms;
	Platform::get( &platforms );
	if ( platforms.empty() ) {
		printf("no platform found.\n");
		exit(1);
	}
	return platforms[0];
}

Device getDevice(Platform platform) {
	vector<Device> devices;
	platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
	if ( devices.empty() ) {
		printf("no device found\n");
		exit(1);
	}
	return devices[0];
}

void readKernelFile(const string &fileName, string &kernelString) {
	ifstream ifs( fileName, ifstream::binary );
	if ( !ifs.is_open() ) {
		printf("open kernel file failed: %s\n", fileName.c_str());
		exit(1);
	}
	stringstream temp;
	temp << ifs.rdbuf();
	ifs.close();

	kernelString = temp.str();
}

int main (int argc, char *argv[]) {
	const int vecSize = 10;

	// get kernel source code
	Program::Sources sources;
	string kernelString;
	readKernelFile(argv[1], kernelString);
	sources.push_back({kernelString.c_str(), kernelString.length()});

	// get Platform
	Platform platform = getPlatform();
	cout << platform.getInfo<CL_PLATFORM_NAME>() << endl;

	// get device
	Device device = getDevice( platform );
	cout << device.getInfo<CL_DEVICE_NAME>() << endl;

	// get context
	Context context(device);

	// build kernel program
	Program program(context, sources);
	if( program.build({device}) != CL_SUCCESS ){
		cout<< " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << endl;
		exit(1);
	}

	// make kernel function
	auto vecAdd = make_kernel<Buffer&, Buffer&, Buffer&>(program, "vecAdd");

	// command queue
	CommandQueue queue(context, device);

	// buffer on host
	int A[vecSize] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
	int B[vecSize] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0};
	int C[vecSize];

	// create buffers on the device
	Buffer devA(context, CL_MEM_READ_ONLY , sizeof(int)*vecSize);
	Buffer devB(context, CL_MEM_READ_ONLY , sizeof(int)*vecSize);
	Buffer devC(context, CL_MEM_WRITE_ONLY, sizeof(int)*vecSize);

	// copy buffer form host to device
	queue.enqueueWriteBuffer(devA, CL_TRUE, 0, sizeof(int)*vecSize, A);
	queue.enqueueWriteBuffer(devB, CL_TRUE, 0, sizeof(int)*vecSize, B);

	// run kernel
	EnqueueArgs kernelDim(queue, vecSize);
	vecAdd(kernelDim, devA, devB, devC);

	// read buffer from device to host
	queue.enqueueReadBuffer(devC, CL_TRUE, 0, sizeof(int)*vecSize, C);

	// checkout
	for (int i = 0; i < vecSize; ++i) {
		printf("%d\n", C[i]);
	}

	return 0;
}

void kernel vecAdd(global const int* A, global const int* B, global int* C) {
	int idx = get_global_id(0);
	C[idx] = A[idx] + B[idx];
	
	if (idx == 5) {
		printf("hello at idx = %d\n", idx);
	}
}

Headers & Document

Library

Nvidia

Compile yourself

Binary

Note

Example

發佈留言 取消回覆

發佈留言取消回覆