forked from anuranbaka/OpenDTAM
-
Notifications
You must be signed in to change notification settings - Fork 8
/
Optimizer.cpp
107 lines (91 loc) · 3.35 KB
/
Optimizer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
// Free for non-commercial, non-military, and non-critical
// use unless incorporated in OpenCV.
// Inherits OpenCV Licence if in OpenCV.
#include "Optimizer.hpp"
#include "Optimizer.cuh"
#include <opencv2/core/cuda_stream_accessor.hpp>
#include <opencv2/core/core.hpp>
#include <iostream>
using namespace std;
using namespace cv;
using namespace cuda;
static void memZero(GpuMat& in,Stream& cvStream);
void Optimizer::setDefaultParams(){
float off=cv.layers;
thetaStart = 200.0;
thetaMin = 0.1;
thetaStep = .97;
epsilon = .1;
lambda = .01;
}
static void memZero(GpuMat& in,Stream& cvStream){
cudaSafeCall(cudaMemsetAsync(in.data,0,in.rows*in.cols*sizeof(float),cv::cuda::StreamAccessor::getStream(cvStream)));
}
Optimizer::Optimizer(CostVolume& cv) : cv(cv), cvStream(cv.cvStream)
{
//For performance reasons, OpenDTAM only supports multiple of 32 image sizes with cols >= 64
CV_Assert(cv.rows % 32 == 0 && cv.cols % 32 == 0 && cv.cols >= 64);
allocate();
setDefaultParams();
stableDepthEnqueued=haveStableDepth=0;
}
void Optimizer::attach(CostVolume& cv){
this->cv=cv;
cvStream=cv.cvStream;
}
#define FLATALLOC( n) n.create(1,cv.rows*cv.cols, CV_32FC1); n=n.reshape(0,cv.rows);CV_Assert(n.isContinuous())
void Optimizer::allocate(){
FLATALLOC(_a);
FLATALLOC(_d);
}
void Optimizer::initOptimization(){
theta=thetaStart;
initA();
}
void Optimizer::initA() {
cv.loInd.copyTo(_a,cvStream);
}
bool Optimizer::optimizeA(const cv::cuda::GpuMat _d,cv::cuda::GpuMat _a){
using namespace cv::cuda::dtam_optimizer;
localStream = cv::cuda::StreamAccessor::getStream(cvStream);
this->_a=_a;
Mat tmp(cv.rows,cv.cols,CV_32FC1);
bool doneOptimizing = theta <= thetaMin;
int layerStep = cv.rows * cv.cols;
float* d = (float*) _d.data;
float* a = (float*) _a.data;
loadConstants(cv.rows, cv.cols, cv.layers, layerStep, a, d, cv.data, (float*)cv.lo.data,
(float*)cv.hi.data, (float*)cv.loInd.data);
minimizeACaller ( cv.data, a, d, cv.layers, theta,lambda);
theta*=thetaStep;
if (doneOptimizing){
stableDepthReady=Ptr<char>((char*)(new cudaEvent_t));
cudaEventCreate((cudaEvent_t*)(char*)stableDepthReady,cudaEventBlockingSync);
_a.convertTo(stableDepth,CV_32FC1,cv.depthStep,cv.far,cvStream);
// cvStream.enqueueConvert(_a,stableDepth,CV_32FC1,cv.depthStep,cv.far);
cudaEventRecord(*(cudaEvent_t*)(char*)stableDepthReady,localStream);
stableDepthEnqueued = 1;
}
return doneOptimizing;
}
const cv::Mat Optimizer::depthMap(){
//Returns the best available depth map
// Code should not rely on the particular mapping of true
// internal data to true inverse depth, as this may change.
// Currently depth is just a constant multiple of the index, so
// infinite depth is always represented. This is likely to change.
Mat tmp(cv.rows,cv.cols,CV_32FC1);
cv::cuda::Stream str;
if(stableDepthEnqueued){
cudaEventSynchronize(*(cudaEvent_t*)(char*)stableDepthReady);
stableDepth.download(tmp,str);
// str.enqueueDownload(stableDepth,tmp);
str.waitForCompletion();
}else{
_a.download(tmp,str);
// str.enqueueDownload(_a,tmp);
str.waitForCompletion();
tmp = tmp * cv.depthStep + cv.far;
}
return tmp;
}