Skip to content

Commit

Permalink
Add MAGMA implementations of Torch LAPACK functions
Browse files Browse the repository at this point in the history
  • Loading branch information
colesbury committed Jun 24, 2015
1 parent 9db9603 commit 7d432a5
Show file tree
Hide file tree
Showing 12 changed files with 951 additions and 8 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.8 FATAL_ERROR)
CMAKE_POLICY(VERSION 2.8)

SET(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})

FIND_PACKAGE(Torch REQUIRED)
FIND_PACKAGE(CUDA 5.5 REQUIRED)
FIND_PACKAGE(MAGMA)

SET(CMAKE_C_FLAGS "-std=c99 -Werror=implicit-function-declaration")

INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})

ADD_SUBDIRECTORY(lib)

INCLUDE_DIRECTORIES(BEFORE "${CMAKE_CURRENT_BINARY_DIR}/lib/THC")
INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/lib/THC")
INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/torch")

Expand Down
149 changes: 149 additions & 0 deletions TensorMath.lua
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,56 @@ wrap.types.LongArg = {
end
}

wrap.types.charoption = {

helpname = function(arg)
if arg.values then
return "(" .. table.concat(arg.values, '|') .. ")"
end
end,

declare = function(arg)
local txt = {}
table.insert(txt, string.format("const char *arg%d = NULL;", arg.i))
if arg.default then
table.insert(txt, string.format("char arg%d_default = '%s';", arg.i, arg.default))
end
return table.concat(txt, '\n')
end,

init = function(arg)
return string.format("arg%d = &arg%d_default;", arg.i, arg.i)
end,

check = function(arg, idx)
local txt = {}
local txtv = {}
table.insert(txt, string.format('(arg%d = lua_tostring(L, %d)) && (', arg.i, idx))
for _,value in ipairs(arg.values) do
table.insert(txtv, string.format("*arg%d == '%s'", arg.i, value))
end
table.insert(txt, table.concat(txtv, ' || '))
table.insert(txt, ')')
return table.concat(txt, '')
end,

read = function(arg, idx)
end,

carg = function(arg, idx)
return string.format('arg%d', arg.i)
end,

creturn = function(arg, idx)
end,

precall = function(arg)
end,

postcall = function(arg)
end
}

function interface.luaname2wrapname(self, name)
return string.format('cutorch_CudaTensor_%s', name)
end
Expand Down Expand Up @@ -499,6 +549,18 @@ for _,name in ipairs({"min", "max"}) do
{name="index"}})
end

wrap("tril",
cname("tril"),
{{name=Tensor, default=true, returned=true},
{name=Tensor},
{name="int", default=0}})

wrap("triu",
cname("triu"),
{{name=Tensor, default=true, returned=true},
{name=Tensor},
{name="int", default=0}})

for _,name in ipairs({"log", "log1p", "exp",
"cos", "acos", "cosh",
"sin", "asin", "sinh",
Expand Down Expand Up @@ -597,6 +659,93 @@ for _,f in ipairs({{name='exponential'}}) do
{name=real, default=f.a}})
end

for _,name in ipairs({"gesv","gels"}) do
wrap(name,
cname(name),
{{name=Tensor, returned=true},
{name=Tensor, returned=true},
{name=Tensor},
{name=Tensor}},
cname(name),
{{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor},
{name=Tensor}})
end

wrap("symeig",
cname("syev"),
{{name=Tensor, returned=true},
{name=Tensor, returned=true},
{name=Tensor},
{name='charoption', values={'N', 'V'}, default='N'},
{name='charoption', values={'U', 'L'}, default='U'}},
cname("syev"),
{{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor},
{name='charoption', values={'N', 'V'}, default='N'},
{name='charoption', values={'U', 'L'}, default='U'}})

wrap("eig",
cname("geev"),
{{name=Tensor, returned=true},
{name=Tensor, returned=true},
{name=Tensor},
{name='charoption', values={'N', 'V'}, default='N'}},
cname("geev"),
{{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor},
{name='charoption', values={'N', 'V'}, default='N'}})

wrap("svd",
cname("gesvd"),
{{name=Tensor, returned=true},
{name=Tensor, returned=true},
{name=Tensor, returned=true},
{name=Tensor},
{name='charoption', values={'A', 'S'}, default='S'}},
cname("gesvd"),
{{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor},
{name='charoption', values={'A', 'S'}, default='S'}})

wrap("inverse",
cname("getri"),
{{name=Tensor, returned=true},
{name=Tensor}},
cname("getri"),
{{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor}})

wrap("potri",
cname("potri"),
{{name=Tensor, returned=true},
{name=Tensor}},
cname("potri"),
{{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor}})

wrap("potrf",
cname("potrf"),
{{name=Tensor, returned=true},
{name=Tensor}},
cname("potrf"),
{{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor}})

wrap("qr",
cname("qr"),
{{name=Tensor, returned=true},
{name=Tensor, returned=true},
{name=Tensor}},
cname("qr"),
{{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor, default=true, returned=true, invisible=true},
{name=Tensor}})

wrap("mean",
cname("meanall"),
Expand Down
27 changes: 27 additions & 0 deletions cmake/FindMAGMA.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# - Find MAGMA library
# This module finds an installed MAGMA library, a matrix algebra library
# similar to LAPACK for GPU and multicore systems
# (see http://icl.cs.utk.edu/magma/).
#
# This module sets the following variables:
# MAGMA_FOUND - set to true if the MAGMA library is found.
# MAGMA_LIBRARIES - list of libraries to link against to use MAGMA
# MAGMA_INCLUDE_DIR - include directory

IF(NOT MAGMA_FOUND)

include(FindPackageHandleStandardArgs)

SET(MAGMA_LIBRARIES)
SET(MAGMA_INCLUDE_DIR)

FIND_LIBRARY(MAGMA_LIBRARIES magma /usr/local/magma/lib)
FIND_PATH(MAGMA_INCLUDE_DIR magma.h /usr/local/magma/include)

IF (MAGMA_LIBRARIES)
SET(MAGMA_FOUND TRUE)
ELSE (MAGMA_LIBRARIES)
SET(MAGMA_FOUND FALSE)
ENDIF (MAGMA_LIBRARIES)

ENDIF(NOT MAGMA_FOUND)
6 changes: 6 additions & 0 deletions init.c
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,12 @@ int luaopen_libcutorch(lua_State *L)
luaT_pushudata(L, state->cudaHostAllocator, "torch.Allocator");
lua_setfield(L, -2, "CudaHostAllocator");

#ifdef USE_MAGMA
THCMagma_init(state);
lua_pushboolean(L, 1);
lua_setfield(L, -2, "magma");
#endif

cutorch_CudaStorage_init(L);
cutorch_CudaTensor_init(L);
cutorch_CudaTensorMath_init(L);
Expand Down
29 changes: 24 additions & 5 deletions lib/THC/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
SET(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../cmake ${CMAKE_MODULE_PATH})

FIND_PACKAGE(Torch)
FIND_PACKAGE(CUDA 5.5 REQUIRED)
FIND_PACKAGE(MAGMA)

IF(MAGMA_FOUND)
SET(USE_MAGMA 1)
MESSAGE(STATUS "Compiling with MAGMA support")
ENDIF(MAGMA_FOUND)

IF(APPLE)
IF(${CUDA_VERSION} LESS 6.0)
Expand All @@ -25,6 +34,9 @@ LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_20")

INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
INCLUDE_DIRECTORIES("${CUDA_SDK_ROOT_DIR}/common/inc")
IF(USE_MAGMA)
INCLUDE_DIRECTORIES(${MAGMA_INCLUDE_DIR})
ENDIF(USE_MAGMA)

IF(NOT THC_INSTALL_BIN_SUBDIR
OR NOT THC_INSTALL_LIB_SUBDIR
Expand All @@ -45,6 +57,8 @@ ELSE()
SET(THC_INSTALL_CMAKE_SUBDIR ${Torch_INSTALL_CMAKE_SUBDIR})
ENDIF()

INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}")
CONFIGURE_FILE(THCGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THCGeneral.h")

SET(CMAKE_C_FLAGS "-std=c99")
SET(src
Expand All @@ -57,11 +71,12 @@ SET(src-cuda
THCStorageCopy.cu
THCTensor.cu
THCTensorCopy.cu
THCTensorMath.cu
THCTensorMath2.cu
THCTensorMathBlas.cu
THCTensorMathCompare.cu
THCTensorMathCompareT.cu
THCTensorMath.cu
THCTensorMathMagma.cu
THCTensorMathPairwise.cu
THCTensorMathPointwise.cu
THCTensorMathScan.cu
Expand All @@ -78,25 +93,29 @@ CUDA_ADD_LIBRARY(THC SHARED ${src} ${src-cuda})
CUDA_ADD_CUBLAS_TO_TARGET(THC)
TARGET_LINK_LIBRARIES(THC TH ${CUDA_curand_LIBRARY})

IF(USE_MAGMA)
TARGET_LINK_LIBRARIES(THC ${MAGMA_LIBRARIES})
ENDIF(USE_MAGMA)

INSTALL(TARGETS THC
RUNTIME DESTINATION "${THC_INSTALL_BIN_SUBDIR}"
LIBRARY DESTINATION "${THC_INSTALL_LIB_SUBDIR}"
ARCHIVE DESTINATION "${THC_INSTALL_LIB_SUBDIR}")

INSTALL(FILES
THC.h
THCGeneral.h
${CMAKE_CURRENT_BINARY_DIR}/THCGeneral.h
THCBlas.h
THCStorage.h
THCStorageCopy.h
THCStorageCopy.h
THCTensor.h
THCTensorCopy.h
THCTensorCopy.h
THCTensorRandom.h
THCTensorMath.h
THCTensorConv.h
THCTensorSort.h
THCApply.cuh
THCReduce.cuh
THCReduceApplyUtils.cuh
THCAllocator.h
THCAllocator.h
DESTINATION "${THC_INSTALL_INCLUDE_SUBDIR}/THC")
4 changes: 4 additions & 0 deletions lib/THC/THCGeneral.h → lib/THC/THCGeneral.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include "cuda_runtime.h"
#include "cublas_v2.h"

#cmakedefine USE_MAGMA

#ifdef __cplusplus
# define THC_EXTERNC extern "C"
#else
Expand Down Expand Up @@ -78,6 +80,8 @@ THC_API void THCudaInit(THCState* state);
THC_API void THCudaShutdown(THCState* state);
THC_API void THCudaEnablePeerToPeerAccess(THCState* state);

THC_API void THCMagma_init(THCState *state);

/* State manipulators and accessors */
THC_API int THCState_getNumDevices(THCState* state);
THC_API void THCState_reserveStreams(THCState* state, int numStreams);
Expand Down
3 changes: 1 addition & 2 deletions lib/THC/THCTensor.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ void THCudaTensor_clearFlag(THCState *state, THCudaTensor *self, const char flag

static void THCudaTensor_rawInit(THCState *state, THCudaTensor *self);
static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStorage *storage, long storageOffset, int nDimension, long *size, long *stride);
static void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, long *size, long *stride);


/* Empty init */
Expand Down Expand Up @@ -634,7 +633,7 @@ static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStora
THCudaTensor_rawResize(state, self, nDimension, size, stride);
}

static void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, long *size, long *stride)
void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, long *size, long *stride)
{
int d;
int nDimension_;
Expand Down
1 change: 1 addition & 0 deletions lib/THC/THCTensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ THC_API void THCudaTensor_resize2d(THCState *state, THCudaTensor *tensor, long s
THC_API void THCudaTensor_resize3d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_);
THC_API void THCudaTensor_resize4d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_, long size3_);
THC_API void THCudaTensor_resize5d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_);
THC_API void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, long *size, long *stride);

THC_API void THCudaTensor_set(THCState *state, THCudaTensor *self, THCudaTensor *src);
THC_API void THCudaTensor_setStorage(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_);
Expand Down
16 changes: 15 additions & 1 deletion lib/THC/THCTensorMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ THC_API void THCudaTensor_add(THCState *state, THCudaTensor *self, THCudaTensor
THC_API void THCudaTensor_mul(THCState *state, THCudaTensor *self, THCudaTensor *src, float value);
THC_API void THCudaTensor_div(THCState *state, THCudaTensor *self, THCudaTensor *src, float value);

THC_API void THCudaTensor_tril(THCState *state, THCudaTensor *self, THCudaTensor *src, long k);
THC_API void THCudaTensor_triu(THCState *state, THCudaTensor *self, THCudaTensor *src, long k);

THC_API void THCudaTensor_cadd(THCState *state, THCudaTensor *self, THCudaTensor *src1, float value, THCudaTensor *src2);
THC_API void THCudaTensor_cmul(THCState *state, THCudaTensor *self, THCudaTensor *src1, THCudaTensor *src2);
Expand Down Expand Up @@ -65,7 +67,19 @@ THC_API void THCudaTensor_floor(THCState *state, THCudaTensor *self, THCudaTenso
THC_API void THCudaTensor_abs(THCState *state, THCudaTensor *self, THCudaTensor *src);
THC_API void THCudaTensor_sign(THCState *state, THCudaTensor *self, THCudaTensor *src);
THC_API void THCudaTensor_round(THCState *state, THCudaTensor *self, THCudaTensor *src);
TH_API void THCudaTensor_atan2(THCState *state, THCudaTensor *r_, THCudaTensor *tx, THCudaTensor *ty);
THC_API void THCudaTensor_atan2(THCState *state, THCudaTensor *r_, THCudaTensor *tx, THCudaTensor *ty);

// MAGMA (i.e. CUDA implementation of LAPACK functions)
THC_API void THCudaTensor_gesv(THCState *state, THCudaTensor *rb_, THCudaTensor *ra_, THCudaTensor *b_, THCudaTensor *a_);
THC_API void THCudaTensor_gels(THCState *state, THCudaTensor *rb_, THCudaTensor *ra_, THCudaTensor *b_, THCudaTensor *a_);
THC_API void THCudaTensor_syev(THCState *state, THCudaTensor *re_, THCudaTensor *rv_, THCudaTensor *a_, const char *jobz, const char *uplo);
THC_API void THCudaTensor_geev(THCState *state, THCudaTensor *re_, THCudaTensor *rv_, THCudaTensor *a_, const char *jobvr);
THC_API void THCudaTensor_gesvd(THCState *state, THCudaTensor *ru_, THCudaTensor *rs_, THCudaTensor *rv_, THCudaTensor *a, const char *jobu);
THC_API void THCudaTensor_gesvd2(THCState *state, THCudaTensor *ru_, THCudaTensor *rs_, THCudaTensor *rv_, THCudaTensor *ra_, THCudaTensor *a, const char *jobu);
THC_API void THCudaTensor_getri(THCState *state, THCudaTensor *ra_, THCudaTensor *a);
THC_API void THCudaTensor_potri(THCState *state, THCudaTensor *ra_, THCudaTensor *a);
THC_API void THCudaTensor_potrf(THCState *state, THCudaTensor *ra_, THCudaTensor *a);
THC_API void THCudaTensor_qr(THCState *state, THCudaTensor *rq_, THCudaTensor *rr_, THCudaTensor *a);

THC_API void THCudaTensor_ltValue(THCState *state, THCudaTensor *self_, THCudaTensor *src, float value);
THC_API void THCudaTensor_gtValue(THCState *state, THCudaTensor *self_, THCudaTensor *src, float value);
Expand Down
Loading

0 comments on commit 7d432a5

Please sign in to comment.