繼上一篇的OpenCL版本,速度快了不少,但是顯卡會 freeze ….修改 TDR Level 讓 driver可以跑久一點
KeyPath : HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\GraphicsDrivers KeyValue : TdrLevel ValueType : REG_DWORD ValueData : TdrLevelOff (0) - Detection disabled TdrLevelBugcheck (1) - Bug check on detected timeout, for example, no recovery. TdrLevelRecoverVGA (2) - Recover to VGA (not implemented). TdrLevelRecover (3) - Recover on timeout. This is the default value.
#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>
#include <time.h>
#include <opencv2/opencv.hpp>
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // eliminate deprecated warning
#include <CL/cl.hpp>
using namespace cv;
using namespace cl;
using namespace std;
// parameter
float scale = 1.0;
Platform getPlatform() {
vector<Platform> platforms;
Platform::get( &platforms );
if ( platforms.empty() ) {
printf("no platform found.\n");
exit(1);
}
return platforms[0];
}
Device getDevice(Platform platform) {
vector<Device> devices;
platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
if ( devices.empty() ) {
printf("no device found\n");
exit(1);
}
return devices[0];
}
void readKernelFile(const string &fileName, string &kernelString) {
ifstream ifs( fileName, ifstream::binary );
if ( !ifs.is_open() ) {
printf("open kernel file failed: %s\n", fileName.c_str());
exit(1);
}
stringstream temp;
temp << ifs.rdbuf();
ifs.close();
kernelString = temp.str();
}
int main (int argc, char *argv[]) {
const char *fileNameL = argv[1];
const char *fileNameR = argv[2];
const char *fileNameKernel = argv[3];
// read input image
Mat1f imgL, imgR;
imread(fileNameL, 0).convertTo(imgL, CV_32FC1);
imread(fileNameR, 0).convertTo(imgR, CV_32FC1);
// scaling down
resize(imgL, imgL, Size(), scale, scale);
resize(imgR, imgR, Size(), scale, scale);
// get kernel source code
Program::Sources sources;
string kernelString;
readKernelFile(fileNameKernel, kernelString);
sources.push_back({kernelString.c_str(), kernelString.length()});
// get Platform
Platform platform = getPlatform();
cout << platform.getInfo<CL_PLATFORM_NAME>() << endl;
// get device
Device device = getDevice( platform );
cout << device.getInfo<CL_DEVICE_NAME>() << endl;
// get context
Context context(device);
// build kernel program
Program program(context, sources);
if( program.build({device}) != CL_SUCCESS ){
cout<< " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << endl;
exit(1);
}
// command queue
CommandQueue queue(context, device);
// host result
Mat1f disparityL = Mat1f::zeros( imgL.size() );
Mat1f disparityR = Mat1f::zeros( imgR.size() );
// create buffers on the device
Buffer devImgL(context, CL_MEM_READ_ONLY , sizeof(float)*imgL.total());
Buffer devImgR(context, CL_MEM_READ_ONLY , sizeof(float)*imgR.total());
Buffer devDispL(context, CL_MEM_WRITE_ONLY, sizeof(float)*disparityL.total());
Buffer devDispR(context, CL_MEM_WRITE_ONLY, sizeof(float)*disparityR.total());
// copy buffer form host to device
queue.enqueueWriteBuffer(devImgL, CL_TRUE, 0, sizeof(float)*imgL.total(), imgL.data);
queue.enqueueWriteBuffer(devImgR, CL_TRUE, 0, sizeof(float)*imgR.total(), imgR.data);
// make kernel function
auto blockMatch = make_kernel<Buffer&, Buffer&, Buffer&>(program, "blockMatch");
// run kernel
EnqueueArgs kernelDim(queue, NDRange(imgL.rows, imgL.cols));
clock_t t0 = clock();
blockMatch(kernelDim, devImgL, devImgR, devDispL);
blockMatch(kernelDim, devImgR, devImgL, devDispR);
// read buffer from device to host
queue.enqueueReadBuffer(devDispL, CL_TRUE, 0, sizeof(float)*disparityL.total(), disparityL.data);
queue.enqueueReadBuffer(devDispR, CL_TRUE, 0, sizeof(float)*disparityR.total(), disparityR.data);
clock_t t1 = clock();
printf("time: %f sec\n", (double)(t1-t0) / CLOCKS_PER_SEC);
normalize(disparityL, disparityL, 0, 1, NORM_MINMAX);
normalize(disparityR, disparityR, 0, 1, NORM_MINMAX);
imshow("disparityL", disparityL);
imshow("disparityR", disparityR);
imwrite("disparityL.png", disparityL*255);
imwrite("disparityR.png", disparityR*255);
waitKey();
return 0;
}
#define WIN_SIZE 35
#define SEARCH_RANGE 200
// auto parameter
#define HALF_WIN_SIZE (WIN_SIZE/2)
#define HALF_SEARCH_RANGE (SEARCH_RANGE/2)
void getPatch(global const float* img, float patch[WIN_SIZE][WIN_SIZE], int cx, int cy, int cols) {
for (int y = cy-HALF_WIN_SIZE, iy = 0; y <= cy+HALF_WIN_SIZE; ++y, ++iy) {
for (int x = cx-HALF_WIN_SIZE, ix = 0; x <= cx+HALF_WIN_SIZE; ++x, ++ix) {
patch[iy][ix] = img[y*cols+x];
}
}
}
float getSad(float patchL[WIN_SIZE][WIN_SIZE], float patchR[WIN_SIZE][WIN_SIZE]){
float sum = 0;
for (int y = 0; y < WIN_SIZE; ++y) {
for (int x = 0; x < WIN_SIZE; ++x) {
sum += fabs(patchL[y][x] - patchR[y][x]);
}
}
return sum;
}
void kernel blockMatch(global const float* imgL, global const float* imgR, global float* disparity) {
int cy = get_global_id(0);
int cx = get_global_id(1);
int rows = get_global_size(0);
int cols = get_global_size(1);
// skip boundary
if ( cx < HALF_WIN_SIZE ||
cy < HALF_WIN_SIZE ||
cx >= cols-HALF_WIN_SIZE ||
cy >= rows-HALF_WIN_SIZE ) return;
// left image patch
float patchL[WIN_SIZE][WIN_SIZE];
getPatch(imgL, patchL, cx, cy, cols);
// search range bound
int minSearchX = max(cx-HALF_SEARCH_RANGE, HALF_WIN_SIZE);
int maxSearchX = min(cx+HALF_SEARCH_RANGE, cols-HALF_WIN_SIZE-1);
// right image patch, SAD
float minSad = 9999999999;
float bestDisparity = 0;
for (int x = minSearchX; x <= maxSearchX; ++x) { // search range
// right image patch
float patchR[WIN_SIZE][WIN_SIZE];
getPatch(imgR, patchR, x, cy, cols);
// sad
float sad = getSad(patchL, patchR);
if ( sad < minSad ) {
minSad = sad;
bestDisparity = abs( cx - x );
}
}
//disparity[cy*cols + cx] = imgL[cy*cols + cx];
disparity[cy*cols+cx] = bestDisparity;
}

在〈Stereo Matching using OpenCL〉中有 1 則留言