Skip to content

Commit

Permalink
MAPSuperresolution
Browse files Browse the repository at this point in the history
  • Loading branch information
jwetzl committed Jan 2, 2013
1 parent 53d7686 commit f1f9382
Show file tree
Hide file tree
Showing 133 changed files with 3,598 additions and 4 deletions.
51 changes: 51 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
cmake_minimum_required(VERSION 2.8)
project(Superresolution)

set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")

find_package(CUDA REQUIRED)
find_package(FreeImage REQUIRED)
find_package(CudaLBFGS REQUIRED)

include(CheckComputeCapability.cmake)

include_directories(${CUDALBFGS_INCLUDE_DIRS})
include_directories(${FreeImage_INCLUDE_DIRS})

# ---

option(SUPERRES_ERROR_CHECKING "Enable CUDA error checking for the superres project.")
if (SUPERRES_ERROR_CHECKING)
add_definitions(-DSUPERRES_ERROR_CHECKING)
endif()

option(SUPERRES_TIMING "Enable timing for the superres project.")
if (SUPERRES_TIMING)
add_definitions(-DSUPERRES_TIMING)
endif()

option(SUPERRES_STORE_TRANSPOSE "Store transpose of system matrix for faster A^Tx multiplication." ON)

if (SUPERRES_STORE_TRANSPOSE)
add_definitions(-DSUPERRES_STORE_TRANSPOSE)
endif()

# ---

file(GLOB CU_FILES *.cu)
file(GLOB H_FILES *.h)
file(GLOB CPP_FILES *.cpp)

source_group("CUDA Sources" FILES ${CU_FILES})
source_group("CPP Sources" FILES ${CPP_FILES})
source_group("Headers" FILES ${H_FILES})

if (NOT DEFINED CUDA_cusparse_LIBRARY OR NOT ${CUDA_cusparse_LIBRARY})
find_library(CUDA_cusparse_LIBRARY NAMES cusparse HINTS /usr/local/cuda/lib)
endif()

cuda_add_executable(superres ${CU_FILES} ${CPP_FILES} ${H_FILES})
target_link_libraries(superres
${CUDALBFGS_LIBRARIES} ${FreeImage_LIBRARY}
${CUDA_cusparse_LIBRARY})
cuda_add_cublas_to_target(superres)
33 changes: 33 additions & 0 deletions CheckComputeCapability.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#############################
# Check for GPUs present and their compute capability
# based on http://stackoverflow.com/questions/2285185/easiest-way-to-test-for-existence-of-cuda-capable-gpu-from-cmake/2297877#2297877 (Christopher Bruns)
if(CUDA_FOUND)
message(STATUS "${CMAKE_SOURCE_DIR}/cuda_compute_capability.c")
try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR
${CMAKE_BINARY_DIR}
${CMAKE_SOURCE_DIR}/cuda_compute_capability.c
CMAKE_FLAGS
-DINCLUDE_DIRECTORIES:STRING=${CUDA_TOOLKIT_INCLUDE}
-DLINK_LIBRARIES:STRING=${CUDA_CUDART_LIBRARY}
COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR
RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR)
message(STATUS "Compile: ${RUN_OUTPUT_VAR}")
if (COMPILE_RESULT_VAR)
message(STATUS "compiled -> " ${RUN_RESULT_VAR})
else()
message(STATUS "didn't compile")
endif()
# COMPILE_RESULT_VAR is TRUE when compile succeeds
# RUN_RESULT_VAR is zero when a GPU is found
if(COMPILE_RESULT_VAR AND NOT RUN_RESULT_VAR)
message(STATUS "worked")
set(CUDA_HAVE_GPU TRUE CACHE BOOL "Whether CUDA-capable GPU is present")
set(CUDA_COMPUTE_CAPABILITY ${RUN_OUTPUT_VAR} CACHE STRING "Compute capability of CUDA-capable GPU present")
set(CUDA_GENERATE_CODE "arch=compute_${CUDA_COMPUTE_CAPABILITY},code=sm_${CUDA_COMPUTE_CAPABILITY}" CACHE STRING "Which GPU architectures to generate code for (each arch/code pair will be passed as --generate-code option to nvcc, separate multiple pairs by ;)")
mark_as_advanced(CUDA_COMPUTE_CAPABILITY CUDA_GENERATE_CODE)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -arch compute_${CUDA_COMPUTE_CAPABILITY})
else()
message(STATUS "didn't work")
set(CUDA_HAVE_GPU FALSE CACHE BOOL "Whether CUDA-capable GPU is present")
endif()
endif()
10 changes: 10 additions & 0 deletions FindFreeImage.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
find_path(FreeImage_INCLUDE_DIR FreeImage.h HINTS /usr/include /usr/local/include /opt/local/include ${FreeImage_DIR}/include)
find_library(FreeImage_LIBRARY NAMES freeimage HINTS /usr/lib /usr/local/lib /opt/local/lib ${FreeImage_DIR}/lib)

set(FreeImage_INCLUDE_DIRS ${FreeImage_INCLUDE_DIR})
set(FreeImage_LIBRARIES ${FreeImage_LIBRARY})

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(FreeImage DEFAULT_MSG FreeImage_INCLUDE_DIR FreeImage_LIBRARY)

mark_as_advanced(FreeImage_INCLUDE_DIR FreeImage_LIBRARY)
51 changes: 51 additions & 0 deletions GPUHandles.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/**
* ___ _ _ ___ _ __ __ _ ___
* / __| | | | \ /_\ | \/ | /_\ | _ \
* | (__| |_| | |) / _ \ | |\/| |/ _ \| _/
* \___|\___/|___/_/_\_\_|_|__|_/_/_\_\_|_ ___
* / __| | | | _ \ __| _ \___| _ \ __/ __|
* \__ \ |_| | _/ _|| /___| / _|\__ \
* |___/\___/|_| |___|_|_\ |_|_\___|___/
* 2012
*
* by Jens Wetzl ([email protected])
* and Oliver Taubmann ([email protected])
*
* This work is licensed under a Creative Commons
* Attribution 3.0 Unported License. (CC-BY)
* http://creativecommons.org/licenses/by/3.0/
*
**/

#ifndef GPU_HANDLES_H
#define GPU_HANDLES_H

#include "cudalbfgs_error_checking.h"

// A small wrapper for cuBLAS and cuSPARSE states with
// automatic initialization and cleanup.
struct GPUHandles
{
cublasHandle_t cublasHandle;
cusparseHandle_t cusparseHandle;
cusparseMatDescr_t cusparseDescriptor;

GPUHandles()
{
CublasSafeCall ( cublasCreate (&cublasHandle) );
CusparseSafeCall( cusparseCreate (&cusparseHandle) );
CusparseSafeCall( cusparseCreateMatDescr(&cusparseDescriptor) );

CusparseSafeCall( cusparseSetMatType (cusparseDescriptor, CUSPARSE_MATRIX_TYPE_GENERAL) );
CusparseSafeCall( cusparseSetMatIndexBase(cusparseDescriptor, CUSPARSE_INDEX_BASE_ZERO) );
}

~GPUHandles()
{
CublasSafeCall ( cublasDestroy (cublasHandle) );
CusparseSafeCall( cusparseDestroy (cusparseHandle) );
CusparseSafeCall( cusparseDestroyMatDescr(cusparseDescriptor) );
}
};

#endif // GPU_HANDLES_H
206 changes: 206 additions & 0 deletions HuberLaplacian.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/**
* ___ _ _ ___ _ __ __ _ ___
* / __| | | | \ /_\ | \/ | /_\ | _ \
* | (__| |_| | |) / _ \ | |\/| |/ _ \| _/
* \___|\___/|___/_/_\_\_|_|__|_/_/_\_\_|_ ___
* / __| | | | _ \ __| _ \___| _ \ __/ __|
* \__ \ |_| | _/ _|| /___| / _|\__ \
* |___/\___/|_| |___|_|_\ |_|_\___|___/
* 2012
*
* by Jens Wetzl ([email protected])
* and Oliver Taubmann ([email protected])
*
* This work is licensed under a Creative Commons
* Attribution 3.0 Unported License. (CC-BY)
* http://creativecommons.org/licenses/by/3.0/
*
**/

#include "HuberLaplacian.h"
#include "Reduction.h"

#include "CudaLBFGS/error_checking.h"

namespace gpu_HuberLaplacian
{
__global__ void laplacian(float *dst, const float *src, const size_t width, const size_t height,
const size_t pixelsPerThread);
__global__ void huber(float *x, const size_t width, const size_t height, const float alpha,
const float strength, const size_t pixelsPerThread, float *f);

float *d_tmp;
}

HuberLaplacian::HuberLaplacian(const size_t height, const size_t width,
const float alpha, const float strength)
: cost_function(height * width)
, m_height(height)
, m_width(width)
, m_alpha(alpha)
, m_strength(strength)
{
CudaSafeCall( cudaMalloc(&gpu_HuberLaplacian::d_tmp, m_numDimensions * sizeof(float)) );
CudaSafeCall( cudaMalloc( (void**) &m_reductionArray, width * height * sizeof(float)) );
CudaSafeCall( cudaMalloc( (void**) &m_reductionArray2, 1024 * sizeof(float)) );

#ifdef SUPERRES_TIMING
m_atomic = new timer("priorOther");
m_filter = new timer("priorFilter");
#endif
}

HuberLaplacian::~HuberLaplacian()
{
CudaSafeCall( cudaFree(gpu_HuberLaplacian::d_tmp) );
CudaSafeCall( cudaFree(m_reductionArray) );
CudaSafeCall( cudaFree(m_reductionArray2) );

#ifdef SUPERRES_TIMING
m_atomic->saveMeasurement();
m_filter->saveMeasurement();

delete m_atomic;
delete m_filter;
#endif
}

void HuberLaplacian::f_gradf(const float *d_x, float *d_f, float *d_gradf)
{
using namespace gpu_HuberLaplacian;

dim3 blockDim(512);

const size_t pixelsPerThread = 8;
size_t threadsPerColumn = (m_height % pixelsPerThread == 0) ? (m_height / pixelsPerThread)
: (m_height / pixelsPerThread) + 1;
size_t threads = threadsPerColumn * m_width;

dim3 gridDim = (threads % blockDim.x == 0) ? (threads / blockDim.x)
: (threads / blockDim.x) + 1;

#ifdef SUPERRES_TIMING
m_filter->start();
#endif

// Compute image Laplacian
laplacian<<<gridDim, blockDim>>>(d_tmp, d_x, m_width, m_height, pixelsPerThread);

CudaCheckError();
cudaDeviceSynchronize();

#ifdef SUPERRES_TIMING
m_filter->stop();
m_atomic->start();
#endif

CudaSafeCall( cudaMemset(m_reductionArray, 0, m_width * m_height * sizeof(float)) );

// Compute prior function value and gradient without final filtering
huber<<<gridDim, blockDim>>>(d_tmp, m_width, m_height, m_alpha, m_strength, pixelsPerThread, m_reductionArray);

CudaCheckError();
cudaDeviceSynchronize();

Reduction::sumReduction(m_reductionArray, m_width, m_height, m_width, d_f, m_reductionArray2);

#ifdef SUPERRES_TIMING
m_atomic->stop();
m_filter->start();
#endif

// Compute Laplacian of the gradient
laplacian<<<gridDim, blockDim>>>(d_gradf, d_tmp, m_width, m_height, pixelsPerThread);

CudaCheckError();
cudaDeviceSynchronize();

#ifdef SUPERRES_TIMING
m_filter->stop();
#endif
}


namespace gpu_HuberLaplacian
{

__global__ void laplacian(float *dst, const float *src, const size_t width, const size_t height,
const size_t pixelsPerThread)
{
const size_t col = (blockIdx.x * blockDim.x + threadIdx.x) % width;
const size_t crow = (blockIdx.x * blockDim.x + threadIdx.x) / width * pixelsPerThread;

if (col >= width || crow >= height)
return;

const size_t srow = crow + 1;
const size_t erow = min((unsigned int)(crow + pixelsPerThread - 1), (unsigned int)(height - 1));

// First element

const size_t firstIdx = crow * width + col;

dst[firstIdx] = src[firstIdx];

if (crow + 1 < height) dst[firstIdx] -= 0.25f * src[firstIdx + width]; // S
if (crow >= 1) dst[firstIdx] -= 0.25f * src[firstIdx - width]; // N
if (col + 1 < width) dst[firstIdx] -= 0.25f * src[firstIdx + 1]; // E
if (col >= 1) dst[firstIdx] -= 0.25f * src[firstIdx - 1]; // W

// Inner elements

for (int row = srow; row < erow; ++row)
{
const size_t cIdx = row * width + col;

// C, S, N (always exist)
dst[cIdx] = src[cIdx] - 0.25f * (src[cIdx + width] + src[cIdx - width]);

if (col + 1 < width) dst[cIdx] -= 0.25f * src[cIdx + 1]; // E
if (col >= 1) dst[cIdx] -= 0.25f * src[cIdx - 1]; // W
}

if (erow <= crow)
return;

// Last element

const size_t lastIdx = erow * width + col;

dst[lastIdx] = src[lastIdx] - 0.25f * src[lastIdx - width]; // C, N

if (erow + 1 < height) dst[lastIdx] -= 0.25f * src[lastIdx + width]; // S
if (col + 1 < width) dst[lastIdx] -= 0.25f * src[lastIdx + 1]; // E
if (col >= 1) dst[lastIdx] -= 0.25f * src[lastIdx - 1]; // W
}

__global__ void huber(float *a, const size_t width, const size_t height, const float alpha,
const float strength, const size_t pixelsPerThread, float *f)
{
const size_t col = (blockIdx.x * blockDim.x + threadIdx.x) % width;
const size_t crow = (blockIdx.x * blockDim.x + threadIdx.x) / width * pixelsPerThread;

if (col >= width || crow >= height)
return;

const size_t erow = min((unsigned int)(crow + pixelsPerThread), (unsigned int)height);

const float alpha2 = alpha * alpha;

float colF = 0.0f;

for (size_t row = crow; row < erow; ++row)
{
const size_t idx = row * width + col;

// Pseudo-Huber loss function
const float root = sqrtf(1.0f + a[idx]*a[idx] / alpha2);
colF += alpha2 * (root - 1.0f);
a[idx] *= strength / root;
}

colF *= strength;
f[blockIdx.x * blockDim.x + threadIdx.x] = colF;
}

}
Loading

0 comments on commit f1f9382

Please sign in to comment.