Skip to content

Commit

Permalink
added cuda support for chambolle parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
ernest-galbrun committed Jul 2, 2014
1 parent afb9b95 commit b66a131
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 34 deletions.
60 changes: 39 additions & 21 deletions modules/cudaoptflow/src/cuda/tvl1flow.cu
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,9 @@ namespace tvl1flow

__global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy,
const PtrStepf grad, const PtrStepf rho_c,
const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22,
PtrStepf u1, PtrStepf u2, PtrStepf error,
const float l_t, const float theta, const bool calcError)
const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22, const PtrStepf p31, const PtrStepf p32,
PtrStepf u1, PtrStepf u2, PtrStepf u3, PtrStepf error,
const float l_t, const float theta, const float gamma, const bool calcError)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
Expand All @@ -223,66 +223,76 @@ namespace tvl1flow
const float I1wyVal = I1wy(y, x);
const float gradVal = grad(y, x);
const float u1OldVal = u1(y, x);
const float u2OldVal = u2(y, x);
const float u2OldVal = u2(y, x);
const float u3OldVal = u3(y, x);

const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal + gamma * u3OldVal);

// estimate the values of the variable (v1, v2) (thresholding operator TH)

float d1 = 0.0f;
float d2 = 0.0f;
float d3 = 0.0f;

if (rho < -l_t * gradVal)
{
d1 = l_t * I1wxVal;
d2 = l_t * I1wyVal;
d3 = l_t * gamma;
}
else if (rho > l_t * gradVal)
{
d1 = -l_t * I1wxVal;
d2 = -l_t * I1wyVal;
d3 = -l_t * gamma;
}
else if (gradVal > numeric_limits<float>::epsilon())
{
const float fi = -rho / gradVal;
d1 = fi * I1wxVal;
d2 = fi * I1wyVal;
d3 = fi * gamma;
}

const float v1 = u1OldVal + d1;
const float v2 = u2OldVal + d2;
const float v2 = u2OldVal + d2;
const float v3 = u3OldVal + d3;

// compute the divergence of the dual variable (p1, p2)

const float div_p1 = divergence(p11, p12, y, x);
const float div_p2 = divergence(p21, p22, y, x);
const float div_p2 = divergence(p21, p22, y, x);
const float div_p3 = divergence(p31, p32, y, x);

// estimate the values of the optical flow (u1, u2)

const float u1NewVal = v1 + theta * div_p1;
const float u2NewVal = v2 + theta * div_p2;
const float u2NewVal = v2 + theta * div_p2;
const float u3NewVal = v3 + theta * div_p3;

u1(y, x) = u1NewVal;
u2(y, x) = u2NewVal;
u2(y, x) = u2NewVal;
u3(y, x) = u3NewVal;

if (calcError)
{
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
error(y, x) = n1 + n2;
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
const float n3 = 0;// (u3OldVal - u3NewVal) * (u3OldVal - u3NewVal);
error(y, x) = n1 + n2 + n3;
}
}

void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
float l_t, float theta, bool calcError)
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,
float l_t, float theta, float gamma, bool calcError)
{
const dim3 block(32, 8);
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));

estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta, calcError);
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
cudaSafeCall( cudaGetLastError() );

cudaSafeCall( cudaDeviceSynchronize() );
Expand All @@ -294,7 +304,8 @@ namespace tvl1flow

namespace tvl1flow
{
__global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut)
__global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, const PtrStepSzf u3,
PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, PtrStepf p31, PtrStepf p32, const float taut)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
Expand All @@ -308,24 +319,31 @@ namespace tvl1flow
const float u2x = u2(y, ::min(x + 1, u1.cols - 1)) - u2(y, x);
const float u2y = u2(::min(y + 1, u1.rows - 1), x) - u2(y, x);

const float u3x = u3(y, ::min(x + 1, u1.cols - 1)) - u3(y, x);
const float u3y = u3(::min(y + 1, u1.rows - 1), x) - u3(y, x);

const float g1 = ::hypotf(u1x, u1y);
const float g2 = ::hypotf(u2x, u2y);
const float g2 = ::hypotf(u2x, u2y);
const float g3 = ::hypotf(u3x, u3y);

const float ng1 = 1.0f + taut * g1;
const float ng2 = 1.0f + taut * g2;
const float ng2 = 1.0f + taut * g2;
const float ng3 = 1.0f + taut * g3;

p11(y, x) = (p11(y, x) + taut * u1x) / ng1;
p12(y, x) = (p12(y, x) + taut * u1y) / ng1;
p21(y, x) = (p21(y, x) + taut * u2x) / ng2;
p22(y, x) = (p22(y, x) + taut * u2y) / ng2;
p22(y, x) = (p22(y, x) + taut * u2y) / ng2;
p31(y, x) = (p31(y, x) + taut * u3x) / ng3;
p32(y, x) = (p32(y, x) + taut * u3y) / ng3;
}

void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut)
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut)
{
const dim3 block(32, 8);
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));

estimateDualVariablesKernel<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut);
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut);
cudaSafeCall( cudaGetLastError() );

cudaSafeCall( cudaDeviceSynchronize() );
Expand Down
38 changes: 25 additions & 13 deletions modules/cudaoptflow/src/tvl1flow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ cv::cuda::OpticalFlowDual_TVL1_CUDA::OpticalFlowDual_TVL1_CUDA()
epsilon = 0.01;
iterations = 300;
scaleStep = 0.8;
gamma = 0.0;
useInitialFlow = false;
}

Expand All @@ -80,6 +81,7 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const Gp
I1s.resize(nscales);
u1s.resize(nscales);
u2s.resize(nscales);
u3s.resize(nscales);

I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0);
I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
Expand All @@ -92,6 +94,7 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const Gp

u1s[0] = flowx;
u2s[0] = flowy;
u3s[0].create(I0.size(), CV_32FC1);

I1x_buf.create(I0.size(), CV_32FC1);
I1y_buf.create(I0.size(), CV_32FC1);
Expand All @@ -106,7 +109,9 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const Gp
p11_buf.create(I0.size(), CV_32FC1);
p12_buf.create(I0.size(), CV_32FC1);
p21_buf.create(I0.size(), CV_32FC1);
p22_buf.create(I0.size(), CV_32FC1);
p22_buf.create(I0.size(), CV_32FC1);
p31_buf.create(I0.size(), CV_32FC1);
p32_buf.create(I0.size(), CV_32FC1);

diff_buf.create(I0.size(), CV_32FC1);

Expand Down Expand Up @@ -134,20 +139,22 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const Gp
{
u1s[s].create(I0s[s].size(), CV_32FC1);
u2s[s].create(I0s[s].size(), CV_32FC1);
}
}
u3s[s].create(I0s[s].size(), CV_32FC1);
}

if (!useInitialFlow)
{
u1s[nscales-1].setTo(Scalar::all(0));
u2s[nscales-1].setTo(Scalar::all(0));
}
u3s[nscales - 1].setTo(Scalar::all(0));

// pyramidal structure for computing the optical flow
for (int s = nscales - 1; s >= 0; --s)
{
// compute the optical flow at the current scale
procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]);
procOneScale(I0s[s], I1s[s], u1s[s], u2s[s], u3s[s]);

// if this was the last scale, finish now
if (s == 0)
Expand All @@ -157,7 +164,8 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const Gp

// zoom the optical flow for the next finer scale
cuda::resize(u1s[s], u1s[s - 1], I0s[s - 1].size());
cuda::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
cuda::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
cuda::resize(u3s[s], u3s[s - 1], I0s[s - 1].size());

// scale the optical flow with the appropriate zoom factor
cuda::multiply(u1s[s - 1], Scalar::all(1/scaleStep), u1s[s - 1]);
Expand All @@ -171,13 +179,13 @@ namespace tvl1flow
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho);
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
float l_t, float theta, bool calcError);
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut);
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,
float l_t, float theta, float gamma, bool calcError);
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut);
}

void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2)
void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3)
{
using namespace tvl1flow;

Expand All @@ -202,11 +210,15 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const G
GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p31 = p31_buf(Rect(0, 0, I0.cols, I0.rows));
GpuMat p32 = p32_buf(Rect(0, 0, I0.cols, I0.rows));
p11.setTo(Scalar::all(0));
p12.setTo(Scalar::all(0));
p21.setTo(Scalar::all(0));
p22.setTo(Scalar::all(0));
p22.setTo(Scalar::all(0));
p31.setTo(Scalar::all(0));
p32.setTo(Scalar::all(0));

GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows));

Expand All @@ -224,7 +236,7 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const G
// some tweaks to make sum operation less frequently
bool calcError = (epsilon > 0) && (n & 0x1) && (prevError < scaledEpsilon);

estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta), calcError);
estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, gamma, static_cast<float>(theta), calcError);

if (calcError)
{
Expand All @@ -237,7 +249,7 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const G
prevError -= scaledEpsilon;
}

estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
estimateDualVariables(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut);
}
}
}
Expand Down

0 comments on commit b66a131

Please sign in to comment.