#include "fastHoughGPU_IFC.h" #include #include #include #include "fastHoughGPU_IFC.cuh" fastHoughGPU_IFC::fastHoughGPU_IFC(float SCALING , int MAXSIZE) { _initP = false; _initC = false; _MAXSIZE = MAXSIZE; _RIEMANNSCALING = SCALING; _CUTX = true; _threads = 128; _p0 = (float*) malloc(2*MAXSIZE*sizeof(float)); _p1 = (float*) malloc(2*MAXSIZE*sizeof(float)); _p2 = (float*) malloc(2*MAXSIZE*sizeof(float)); _p3 = (float*) malloc(2*MAXSIZE*sizeof(float)); _p4 = (float*) malloc(2*MAXSIZE*sizeof(float)); _votes = (uint*) malloc(2*MAXSIZE*sizeof(uint)); allocateArray((void**)&_p0_d, _MAXSIZE*2*sizeof(float)); allocateArray((void**)&_p1_d, _MAXSIZE*2*sizeof(float)); allocateArray((void**)&_p2_d, _MAXSIZE*2*sizeof(float)); allocateArray((void**)&_p3_d, _MAXSIZE*2*sizeof(float)); allocateArray((void**)&_p4_d, _MAXSIZE*2*sizeof(float)); } fastHoughGPU_IFC::~fastHoughGPU_IFC() { //TODO: implement!!! need more wrappers for cudaFree() } void fastHoughGPU_IFC::initClusters(std::vector clist) { int size=0; for(int i=0; ipos(); if(pos.X()>0) size++; } _nClusters=size; _blocks=_nClusters/_threads+1; //allocate host and GPU arrays _clusterPos = (float*) malloc(3*size*sizeof(float)); allocateArray((void**)&_clusterPos_d, 3*size*sizeof(float)); //resulting parameter space positions: allocateArray((void**)&_clusterData_d, 5*size*sizeof(float)); int count=0; //fill host position array for(int i=0; ipos(); if(pos.X()<0) { count++; continue; } _clusterPos[3*(i-count)] = (float)pos.X(); _clusterPos[3*(i-count)+1] = (float)pos.Y(); _clusterPos[3*(i-count)+2] = (float)pos.Z(); } copyArrayToDevice(_clusterPos_d, _clusterPos,_nClusters*3*sizeof(float)); //kernel invocation via wrapper: callRiemannKernel(_clusterPos_d, _clusterData_d, _nClusters, _RIEMANNSCALING, _threads, _blocks); //result resides on the GPU and will not be copied back to host! _initC=true; } void fastHoughGPU_IFC::initParameterSpace(std::vector mins, std::vector maxs) { assert(mins.size()==5); assert(maxs.size()==5); float _mins[5]; float _maxs[5]; for(int i=0; i<5; i++) { _mins[i] = mins[i]; _maxs[i] = maxs[i]; } //wrapper function call setParameterSpace(_mins, _maxs); _initP=true; } void fastHoughGPU_IFC::initParameterSpace(float* mins, float* maxs) { float _mins[5]; float _maxs[5]; for(int i=0; i<5; i++) { _mins[i] = mins[i]; _maxs[i] = maxs[i]; } //wrapper function call setParameterSpace(_mins, _maxs); _initP=true; } void fastHoughGPU_IFC::testIntersect(std::vector nodes, int level, int THRESHOLD) { assert(nodes.size()<_MAXSIZE); if(_initP && _initC) { for(int n=0; ngetProjection0(); float* p1 = (nodes[n])->getProjection1(); float* p2 = (nodes[n])->getProjection2(); float* p3 = (nodes[n])->getProjection3(); float* p4 = (nodes[n])->getProjection4(); _p0[2*n] = p0[0]; _p0[2*n+1] = p0[1]; _p1[2*n] = p1[0]; _p1[2*n+1] = p1[1]; _p2[2*n] = p2[0]; _p2[2*n+1] = p2[1]; _p3[2*n] = p3[0]; _p3[2*n+1] = p3[1]; _p4[2*n] = p4[0]; _p4[2*n+1] = p4[1]; } copyArrayToDevice(_p0_d, _p0,nodes.size()*2*sizeof(float)); copyArrayToDevice(_p1_d, _p1,nodes.size()*2*sizeof(float)); copyArrayToDevice(_p2_d, _p2,nodes.size()*2*sizeof(float)); copyArrayToDevice(_p3_d, _p3,nodes.size()*2*sizeof(float)); copyArrayToDevice(_p4_d, _p4,nodes.size()*2*sizeof(float)); //kernel call callIntersectKernel(nodes.size(),level,_nClusters,_clusterData_d, _p0_d,_p1_d,_p2_d,_p3_d,_p4_d,_votes_d, _threads, _blocks); copyArrayFromDevice(_votes, _votes_d, nodes.size()*sizeof(uint)); } } void fastHoughGPU_IFC::setKernelPars(uint threads) { _threads = threads; _blocks = _nClusters/threads+1; }