繼上一篇的OpenCL版本,速度快了不少,但是顯卡會 freeze ….修改 TDR Level 讓 driver可以跑久一點
KeyPath : HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\GraphicsDrivers KeyValue : TdrLevel ValueType : REG_DWORD ValueData : TdrLevelOff (0) - Detection disabled TdrLevelBugcheck (1) - Bug check on detected timeout, for example, no recovery. TdrLevelRecoverVGA (2) - Recover to VGA (not implemented). TdrLevelRecover (3) - Recover on timeout. This is the default value.
#include <vector> #include <iostream> #include <fstream> #include <sstream> #include <time.h> #include <opencv2/opencv.hpp> #define CL_USE_DEPRECATED_OPENCL_1_2_APIS // eliminate deprecated warning #include <CL/cl.hpp> using namespace cv; using namespace cl; using namespace std; // parameter float scale = 1.0; Platform getPlatform() { vector<Platform> platforms; Platform::get( &platforms ); if ( platforms.empty() ) { printf("no platform found.\n"); exit(1); } return platforms[0]; } Device getDevice(Platform platform) { vector<Device> devices; platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); if ( devices.empty() ) { printf("no device found\n"); exit(1); } return devices[0]; } void readKernelFile(const string &fileName, string &kernelString) { ifstream ifs( fileName, ifstream::binary ); if ( !ifs.is_open() ) { printf("open kernel file failed: %s\n", fileName.c_str()); exit(1); } stringstream temp; temp << ifs.rdbuf(); ifs.close(); kernelString = temp.str(); } int main (int argc, char *argv[]) { const char *fileNameL = argv[1]; const char *fileNameR = argv[2]; const char *fileNameKernel = argv[3]; // read input image Mat1f imgL, imgR; imread(fileNameL, 0).convertTo(imgL, CV_32FC1); imread(fileNameR, 0).convertTo(imgR, CV_32FC1); // scaling down resize(imgL, imgL, Size(), scale, scale); resize(imgR, imgR, Size(), scale, scale); // get kernel source code Program::Sources sources; string kernelString; readKernelFile(fileNameKernel, kernelString); sources.push_back({kernelString.c_str(), kernelString.length()}); // get Platform Platform platform = getPlatform(); cout << platform.getInfo<CL_PLATFORM_NAME>() << endl; // get device Device device = getDevice( platform ); cout << device.getInfo<CL_DEVICE_NAME>() << endl; // get context Context context(device); // build kernel program Program program(context, sources); if( program.build({device}) != CL_SUCCESS ){ cout<< " Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << endl; exit(1); } // command queue CommandQueue queue(context, device); // host result Mat1f disparityL = Mat1f::zeros( imgL.size() ); Mat1f disparityR = Mat1f::zeros( imgR.size() ); // create buffers on the device Buffer devImgL(context, CL_MEM_READ_ONLY , sizeof(float)*imgL.total()); Buffer devImgR(context, CL_MEM_READ_ONLY , sizeof(float)*imgR.total()); Buffer devDispL(context, CL_MEM_WRITE_ONLY, sizeof(float)*disparityL.total()); Buffer devDispR(context, CL_MEM_WRITE_ONLY, sizeof(float)*disparityR.total()); // copy buffer form host to device queue.enqueueWriteBuffer(devImgL, CL_TRUE, 0, sizeof(float)*imgL.total(), imgL.data); queue.enqueueWriteBuffer(devImgR, CL_TRUE, 0, sizeof(float)*imgR.total(), imgR.data); // make kernel function auto blockMatch = make_kernel<Buffer&, Buffer&, Buffer&>(program, "blockMatch"); // run kernel EnqueueArgs kernelDim(queue, NDRange(imgL.rows, imgL.cols)); clock_t t0 = clock(); blockMatch(kernelDim, devImgL, devImgR, devDispL); blockMatch(kernelDim, devImgR, devImgL, devDispR); // read buffer from device to host queue.enqueueReadBuffer(devDispL, CL_TRUE, 0, sizeof(float)*disparityL.total(), disparityL.data); queue.enqueueReadBuffer(devDispR, CL_TRUE, 0, sizeof(float)*disparityR.total(), disparityR.data); clock_t t1 = clock(); printf("time: %f sec\n", (double)(t1-t0) / CLOCKS_PER_SEC); normalize(disparityL, disparityL, 0, 1, NORM_MINMAX); normalize(disparityR, disparityR, 0, 1, NORM_MINMAX); imshow("disparityL", disparityL); imshow("disparityR", disparityR); imwrite("disparityL.png", disparityL*255); imwrite("disparityR.png", disparityR*255); waitKey(); return 0; }
#define WIN_SIZE 35 #define SEARCH_RANGE 200 // auto parameter #define HALF_WIN_SIZE (WIN_SIZE/2) #define HALF_SEARCH_RANGE (SEARCH_RANGE/2) void getPatch(global const float* img, float patch[WIN_SIZE][WIN_SIZE], int cx, int cy, int cols) { for (int y = cy-HALF_WIN_SIZE, iy = 0; y <= cy+HALF_WIN_SIZE; ++y, ++iy) { for (int x = cx-HALF_WIN_SIZE, ix = 0; x <= cx+HALF_WIN_SIZE; ++x, ++ix) { patch[iy][ix] = img[y*cols+x]; } } } float getSad(float patchL[WIN_SIZE][WIN_SIZE], float patchR[WIN_SIZE][WIN_SIZE]){ float sum = 0; for (int y = 0; y < WIN_SIZE; ++y) { for (int x = 0; x < WIN_SIZE; ++x) { sum += fabs(patchL[y][x] - patchR[y][x]); } } return sum; } void kernel blockMatch(global const float* imgL, global const float* imgR, global float* disparity) { int cy = get_global_id(0); int cx = get_global_id(1); int rows = get_global_size(0); int cols = get_global_size(1); // skip boundary if ( cx < HALF_WIN_SIZE || cy < HALF_WIN_SIZE || cx >= cols-HALF_WIN_SIZE || cy >= rows-HALF_WIN_SIZE ) return; // left image patch float patchL[WIN_SIZE][WIN_SIZE]; getPatch(imgL, patchL, cx, cy, cols); // search range bound int minSearchX = max(cx-HALF_SEARCH_RANGE, HALF_WIN_SIZE); int maxSearchX = min(cx+HALF_SEARCH_RANGE, cols-HALF_WIN_SIZE-1); // right image patch, SAD float minSad = 9999999999; float bestDisparity = 0; for (int x = minSearchX; x <= maxSearchX; ++x) { // search range // right image patch float patchR[WIN_SIZE][WIN_SIZE]; getPatch(imgR, patchR, x, cy, cols); // sad float sad = getSad(patchL, patchR); if ( sad < minSad ) { minSad = sad; bestDisparity = abs( cx - x ); } } //disparity[cy*cols + cx] = imgL[cy*cols + cx]; disparity[cy*cols+cx] = bestDisparity; }
在〈Stereo Matching using OpenCL〉中有 1 則留言