Stereo Matching using OpenCL

上一篇的OpenCL版本,速度快了不少,但是顯卡會 freeze ….修改 TDR Level 讓 driver可以跑久一點

KeyPath   : HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\GraphicsDrivers
KeyValue  : TdrLevel
ValueType : REG_DWORD
ValueData : TdrLevelOff (0) - Detection disabled 
 TdrLevelBugcheck (1) - Bug check on detected timeout, for example, no recovery.
 TdrLevelRecoverVGA (2) - Recover to VGA (not implemented).
 TdrLevelRecover (3) - Recover on timeout. This is the default value.

 

code

 

#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>
#include <time.h>

#include <opencv2/opencv.hpp>

#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // eliminate deprecated warning
#include <CL/cl.hpp>

using namespace cv;
using namespace cl;
using namespace std;

// parameter
float scale     = 1.0;

Platform getPlatform() {
	vector<Platform> platforms;
	Platform::get( &platforms );
	if ( platforms.empty() ) {
		printf("no platform found.\n");
		exit(1);
	}
	return platforms[0];
}

Device getDevice(Platform platform) {
	vector<Device> devices;
	platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
	if ( devices.empty() ) {
		printf("no device found\n");
		exit(1);
	}
	return devices[0];
}

void readKernelFile(const string &fileName, string &kernelString) {
	ifstream ifs( fileName, ifstream::binary );
	if ( !ifs.is_open() ) {
		printf("open kernel file failed: %s\n", fileName.c_str());
		exit(1);
	}
	stringstream temp;
	temp << ifs.rdbuf();
	ifs.close();

	kernelString = temp.str();
}

int main (int argc, char *argv[]) {
	const char *fileNameL      = argv[1];
	const char *fileNameR      = argv[2];
	const char *fileNameKernel = argv[3];

	// read input image
	Mat1f imgL, imgR;
	imread(fileNameL, 0).convertTo(imgL, CV_32FC1);
	imread(fileNameR, 0).convertTo(imgR, CV_32FC1);

	// scaling down
	resize(imgL, imgL, Size(), scale, scale);
	resize(imgR, imgR, Size(), scale, scale);

	// get kernel source code
	Program::Sources sources;
	string kernelString;
	readKernelFile(fileNameKernel, kernelString);
	sources.push_back({kernelString.c_str(), kernelString.length()});

	// get Platform
	Platform platform = getPlatform();
	cout << platform.getInfo<CL_PLATFORM_NAME>() << endl;

	// get device
	Device device = getDevice( platform );
	cout << device.getInfo<CL_DEVICE_NAME>() << endl;

	// get context
	Context context(device);

	// build kernel program
	Program program(context, sources);
	if( program.build({device}) != CL_SUCCESS ){
		cout<< " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << endl;
		exit(1);
	}

	// command queue
	CommandQueue queue(context, device);

	// host result
	Mat1f disparityL = Mat1f::zeros( imgL.size() );
	Mat1f disparityR = Mat1f::zeros( imgR.size() );

	// create buffers on the device
	Buffer devImgL(context, CL_MEM_READ_ONLY , sizeof(float)*imgL.total());
	Buffer devImgR(context, CL_MEM_READ_ONLY , sizeof(float)*imgR.total());
	Buffer devDispL(context, CL_MEM_WRITE_ONLY, sizeof(float)*disparityL.total());
	Buffer devDispR(context, CL_MEM_WRITE_ONLY, sizeof(float)*disparityR.total());

	// copy buffer form host to device
	queue.enqueueWriteBuffer(devImgL, CL_TRUE, 0, sizeof(float)*imgL.total(), imgL.data);
	queue.enqueueWriteBuffer(devImgR, CL_TRUE, 0, sizeof(float)*imgR.total(), imgR.data);

	// make kernel function
	auto blockMatch = make_kernel<Buffer&, Buffer&, Buffer&>(program, "blockMatch");

	// run kernel
	EnqueueArgs kernelDim(queue, NDRange(imgL.rows, imgL.cols));
	clock_t t0 = clock();
	blockMatch(kernelDim, devImgL, devImgR, devDispL);
	blockMatch(kernelDim, devImgR, devImgL, devDispR);

	// read buffer from device to host
	queue.enqueueReadBuffer(devDispL, CL_TRUE, 0, sizeof(float)*disparityL.total(), disparityL.data);
	queue.enqueueReadBuffer(devDispR, CL_TRUE, 0, sizeof(float)*disparityR.total(), disparityR.data);
	
	clock_t t1 = clock();
	printf("time: %f sec\n", (double)(t1-t0) / CLOCKS_PER_SEC);

	normalize(disparityL, disparityL, 0, 1, NORM_MINMAX);
	normalize(disparityR, disparityR, 0, 1, NORM_MINMAX);
	imshow("disparityL", disparityL);
	imshow("disparityR", disparityR);
	imwrite("disparityL.png", disparityL*255);
	imwrite("disparityR.png", disparityR*255);
	waitKey();

	return 0;
}
#define WIN_SIZE           35
#define SEARCH_RANGE       200

// auto parameter
#define HALF_WIN_SIZE     (WIN_SIZE/2)
#define HALF_SEARCH_RANGE (SEARCH_RANGE/2)

void getPatch(global const float* img, float patch[WIN_SIZE][WIN_SIZE], int cx, int cy, int cols) {
	for (int y = cy-HALF_WIN_SIZE, iy = 0; y <= cy+HALF_WIN_SIZE; ++y, ++iy) {
		for (int x = cx-HALF_WIN_SIZE, ix  = 0; x <= cx+HALF_WIN_SIZE; ++x, ++ix) {
			patch[iy][ix] = img[y*cols+x];
		}
	}
}

float getSad(float patchL[WIN_SIZE][WIN_SIZE], float patchR[WIN_SIZE][WIN_SIZE]){
	float sum = 0;
	for (int y = 0; y < WIN_SIZE; ++y) {
		for (int x = 0; x < WIN_SIZE; ++x) {
			sum += fabs(patchL[y][x] - patchR[y][x]);
		}
	}
	return sum;
}

void kernel blockMatch(global const float* imgL, global const float* imgR, global float* disparity) {
	int cy   = get_global_id(0);
	int cx   = get_global_id(1);
	int rows = get_global_size(0);
	int cols = get_global_size(1);
	
	// skip boundary
	if ( cx < HALF_WIN_SIZE       ||
	     cy < HALF_WIN_SIZE       ||
         cx >= cols-HALF_WIN_SIZE ||
		 cy >= rows-HALF_WIN_SIZE ) return;
	
	// left image patch
	float patchL[WIN_SIZE][WIN_SIZE];
	getPatch(imgL, patchL, cx, cy, cols);
	
	// search range bound
	int minSearchX = max(cx-HALF_SEARCH_RANGE, HALF_WIN_SIZE);
	int maxSearchX = min(cx+HALF_SEARCH_RANGE, cols-HALF_WIN_SIZE-1);
	
	// right image patch, SAD
	float minSad        = 9999999999;
	float bestDisparity = 0;
	for (int x = minSearchX; x <= maxSearchX; ++x) { // search range
		// right image patch
		float patchR[WIN_SIZE][WIN_SIZE];
		getPatch(imgR, patchR, x, cy, cols);
		
		// sad
		float sad = getSad(patchL, patchR);
		if ( sad < minSad ) {
			minSad        = sad;
			bestDisparity = abs( cx - x );
		}
	}
	
	//disparity[cy*cols + cx] = imgL[cy*cols + cx];
	disparity[cy*cols+cx] = bestDisparity;
}

 

在〈Stereo Matching using OpenCL〉中有 1 則留言

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *