forked from foges/pogs
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
385 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
# Instructions | ||
# 1. To build with openmp set IFLAGS=-fopenmp | ||
|
||
# C++ Flags | ||
CXX=g++ | ||
CXXFLAGS=$(IFLAGS) -g -O3 -Wall -std=c++11 -fPIC #-DDEBUG # -Wconversion | ||
|
||
# CUDA Flags | ||
CUXX=$(CUDA_HOME)/bin/nvcc | ||
CUFLAGS=$(IFLAGS) -arch=sm_20 -Xcompiler -fPIC #-DDEBUG | ||
|
||
# Bulid directory | ||
OBJDIR=build | ||
|
||
# Load R specific config | ||
include config.mk | ||
|
||
|
||
# POGS header files. | ||
POGS_HDR=\ | ||
include/interface_defs.h \ | ||
include/pogs.h \ | ||
include/prox_lib.h \ | ||
include/util.h \ | ||
include/matrix/matrix.h \ | ||
include/matrix/matrix_dense.h \ | ||
include/matrix/matrix_sparse.h \ | ||
include/projector/projector_cgls.h \ | ||
include/projector/projector_direct.h | ||
|
||
# CPU Specific headers and object files. | ||
GSL_HDR=\ | ||
cpu/include/gsl/cblas.h \ | ||
cpu/include/gsl/gsl_blas.h \ | ||
cpu/include/gsl/gsl_linalg.h \ | ||
cpu/include/gsl/gsl_matrix.h \ | ||
cpu/include/gsl/gsl_rand.h \ | ||
cpu/include/gsl/gsl_spblas.h \ | ||
cpu/include/gsl/gsl_spmat.h \ | ||
cpu/include/gsl/gsl_vector.h | ||
|
||
CPU_HDR=\ | ||
cpu/include/cgls.h \ | ||
cpu/include/equil_helper.h \ | ||
cpu/include/projector_helper.h | ||
CPU_MTX_OBJ=\ | ||
$(OBJDIR)/cpu/matrix/matrix_sparse.o \ | ||
$(OBJDIR)/cpu/matrix/matrix_dense.o | ||
CPU_PRJ_OBJ=\ | ||
$(OBJDIR)/cpu/projector/projector_cgls.o \ | ||
$(OBJDIR)/cpu/projector/projector_direct_dense.o | ||
CPU_OBJ=$(OBJDIR)/cpu/pogs.o | ||
|
||
# GPU Specific headers and object files. | ||
CML_HDR=\ | ||
gpu/include/cml/cblas.h \ | ||
gpu/include/cml/cml_blas.cuh \ | ||
gpu/include/cml/cml_defs.cuh \ | ||
gpu/include/cml/cml_linalg.cuh \ | ||
gpu/include/cml/cml_matrix.cuh \ | ||
gpu/include/cml/cml_rand.cuh \ | ||
gpu/include/cml/cml_spblas.cuh \ | ||
gpu/include/cml/cml_spmat.cuh \ | ||
gpu/include/cml/cml_utils.cuh \ | ||
gpu/include/cml/cml_vector.cuh | ||
|
||
GPU_HDR=\ | ||
gpu/include/cgls.cuh \ | ||
gpu/include/equil_helper.cuh \ | ||
gpu/include/projector_helper.cuh | ||
GPU_MTX_OBJ=\ | ||
$(OBJDIR)/gpu/matrix/matrix_dense.o \ | ||
$(OBJDIR)/gpu/matrix/matrix_sparse.o | ||
GPU_PRJ_OBJ=\ | ||
$(OBJDIR)/gpu/projector/projector_cgls.o \ | ||
$(OBJDIR)/gpu/projector/projector_direct_dense.o | ||
GPU_OBJ=$(OBJDIR)/gpu/pogs.o | ||
|
||
|
||
# Set vpath for build | ||
VPATH=cpu cpu/matrix cpu/projector gpu gpu/matrix gpu/projector | ||
|
||
|
||
# Build all | ||
cpu: $(CPU_OBJ) $(CPU_MTX_OBJ) $(CPU_PRJ_OBJ) | ||
ar cr $(OBJDIR)/pogs.a $^ | ||
|
||
gpu: $(OBJDIR)/pogs_link.o $(GPU_OBJ) $(GPU_MTX_OBJ) $(GPU_PRJ_OBJ) | ||
ar cr $(OBJDIR)/pogs.a $^ | ||
|
||
|
||
# Directories | ||
$(OBJDIR): | ||
mkdir -p $@ | ||
|
||
# Directories CPU | ||
$(OBJDIR)/cpu: $(OBJDIR) | ||
mkdir -p $@ | ||
|
||
$(OBJDIR)/cpu/matrix: $(OBJDIR)/cpu | ||
mkdir -p $@ | ||
|
||
$(OBJDIR)/cpu/projector: $(OBJDIR)/cpu | ||
mkdir -p $@ | ||
|
||
# Directories GPU | ||
$(OBJDIR)/gpu: $(OBJDIR) | ||
mkdir -p $@ | ||
|
||
$(OBJDIR)/gpu/matrix: $(OBJDIR)/gpu | ||
mkdir -p $@ | ||
|
||
$(OBJDIR)/gpu/projector: $(OBJDIR)/gpu | ||
mkdir -p $@ | ||
|
||
|
||
# POGS CPU objects | ||
$(OBJDIR)/cpu/pogs.o: cpu/pogs.cpp $(POGS_HDR) $(GSL_HDR) | $(OBJDIR)/cpu | ||
$(CXX) -I include -Icpu/include $< $(CXXFLAGS) -c -o $@ | ||
|
||
$(OBJDIR)/cpu/matrix/%.o: %.cpp $(CPU_HDR) | $(OBJDIR)/cpu/matrix | ||
$(CXX) -Iinclude -Icpu/include $< $(CXXFLAGS) $(IFLAGS) -c -o $@ | ||
|
||
$(OBJDIR)/cpu/projector/%.o: %.cpp $(CPU_HDR) | $(OBJDIR)/cpu/projector | ||
$(CXX) -Iinclude -Icpu/include $< $(CXXFLAGS) $(IFLAGS) -c -o $@ | ||
|
||
$(OBJDIR)/cpu/%.o: %.cpp $(CPU_HDR) | $(OBJDIR)/cpu | ||
$(CXX) -Iinclude -Icpu/include $< $(CXXFLAGS) $(IFLAGS) -c -o $@ | ||
|
||
# POGS GPU objects | ||
$(OBJDIR)/pogs_link.o: $(GPU_OBJ) $(GPU_MTX_OBJ) $(GPU_PRJ_OBJ) | $(OBJDIR) | ||
$(CUXX) $(CUFLAGS) $^ -dlink -o $@ | ||
|
||
$(OBJDIR)/gpu/pogs.o: gpu/pogs.cu $(POGS_HDR) $(CML_HDR) | $(OBJDIR)/gpu | ||
$(CUXX) -Iinclude -Igpu/include $< $(CUFLAGS) -dc -o $@ | ||
|
||
$(OBJDIR)/gpu/matrix/%.o: %.cu $(GPU_HDR) | $(OBJDIR)/gpu/matrix | ||
$(CUXX) -Iinclude -Igpu/include $< $(CUFLAGS) $(IFLAGS) -dc -o $@ | ||
|
||
$(OBJDIR)/gpu/projector/%.o: %.cu $(GPU_HDR) | $(OBJDIR)/gpu/projector | ||
$(CUXX) -Iinclude -Igpu/include $< $(CUFLAGS) $(IFLAGS) -dc -o $@ | ||
|
||
$(OBJDIR)/gpu/%.o: %.cu $(GPU_HDR) | $(OBJDIR)/gpu | ||
$(CUXX) -Iinclude -Igpu/include $< $(CUFLAGS) $(IFLAGS) -dc -o $@ | ||
|
||
clean: | ||
rm -rf pogs.a *.o build/ *.dSYM | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,237 @@ | ||
#include "cpu/include/gsl/cblas.h" | ||
|
||
#define INT int | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
/* | ||
* =========================================================================== | ||
* Prototypes for level 1 BLAS functions (complex are recast as routines) | ||
* =========================================================================== | ||
*/ | ||
double ddot_(const INT *N, const double *X, const INT *incX, const double *Y, | ||
const INT *incY); | ||
double cblas_ddot(const int N, const double *X, const int incX, const double *Y, | ||
const int incY) { | ||
INT N_ = N, incX_ = incX, incY_ = incY; | ||
return ddot_(&N_, X, &incX_, Y, &incY_); | ||
} | ||
|
||
double dnrm2_(const INT *N, const double *X, const INT *incX); | ||
double cblas_dnrm2(const int N, const double *X, const int incX) { | ||
INT N_ = N, incX_ = incX; | ||
return dnrm2_(&N_, X, &incX_); | ||
} | ||
|
||
double dasum_(const INT *N, const double *X, const INT *incX); | ||
double cblas_dasum(const int N, const double *X, const int incX) { | ||
INT N_ = N, incX_ = incX; | ||
return dasum_(&N_, X, &incX_); | ||
} | ||
|
||
/* | ||
* =========================================================================== | ||
* Prototypes for level 1 BLAS routines | ||
* =========================================================================== | ||
*/ | ||
void daxpy_(const INT *N, const double *alpha, const double *X, const INT *incX, | ||
double *Y, const INT *incY); | ||
void cblas_daxpy(const int N, const double alpha, const double *X, | ||
const int incX, double *Y, const int incY) { | ||
INT N_ = N, incX_ = incX, incY_ = incY; | ||
daxpy_(&N_, &alpha, X, &incX_, Y, &incY_); | ||
} | ||
|
||
void dscal_(const INT *N, const double *alpha, double *X, const INT *incX); | ||
void cblas_dscal(const int N, const double alpha, double *X, const int incX) { | ||
INT N_ = N, incX_ = incX; | ||
dscal_(&N_, &alpha, X, &incX_); | ||
} | ||
|
||
/* | ||
* =========================================================================== | ||
* Prototypes for level 2 BLAS | ||
* =========================================================================== | ||
*/ | ||
void dgemv_(const char *trans, const INT *M, const INT *N, const double *alpha, | ||
const double *A, const INT *lda, const double* X, const INT *incX, | ||
const double *beta, double *Y, const INT *incY); | ||
void cblas_dgemv(const enum CBLAS_ORDER order, | ||
const enum CBLAS_TRANSPOSE TransA, const int M, const int N, | ||
const double alpha, const double *A, const int lda, | ||
const double *X, const int incX, const double beta, | ||
double *Y, const int incY) { | ||
char TA; | ||
INT M_ = M, N_ = N, lda_ = lda, incX_ = incX, incY_ = incY; | ||
if (order == CblasColMajor) { | ||
if (TransA == CblasNoTrans) TA = 'N'; | ||
else if (TransA == CblasTrans) TA = 'T'; | ||
else TA = 'C'; | ||
|
||
dgemv_(&TA, &M_, &N_, &alpha, A, &lda_, X, &incX_, &beta, Y, &incY_); | ||
} else { | ||
if (TransA == CblasNoTrans) TA = 'T'; | ||
else if (TransA == CblasTrans) TA = 'N'; | ||
else TA = 'N'; | ||
|
||
dgemv_(&TA, &N_, &M_, &alpha, A, &lda_, X, &incX_, &beta, Y, &incY_); | ||
} | ||
} | ||
|
||
void dtrsv_(const char *uplo, const char *TransA, const char *diag, | ||
const INT *N, const double *A, const INT *lda, double *X, | ||
const INT *incX); | ||
void cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, | ||
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, | ||
const int N, const double *A, const int lda, double *X, | ||
const int incX) { | ||
char TA, UL, DI; | ||
INT N_ = N, lda_ = lda, incX_ = incX; | ||
if (order == CblasColMajor) { | ||
if (Uplo == CblasUpper) UL = 'U'; | ||
else UL = 'L'; | ||
|
||
if (TransA == CblasNoTrans) TA = 'N'; | ||
else if (TransA == CblasTrans) TA = 'T'; | ||
else TA = 'C'; | ||
|
||
if (Diag == CblasUnit) DI = 'U'; | ||
else DI = 'N'; | ||
|
||
dtrsv_(&UL, &TA, &DI, &N_, A, &lda_, X, &incX_); | ||
} else { | ||
if (Uplo == CblasUpper) UL = 'L'; | ||
else UL = 'U'; | ||
|
||
if (TransA == CblasNoTrans) TA = 'T'; | ||
else if (TransA == CblasTrans) TA = 'N'; | ||
else TA = 'N'; | ||
|
||
if (Diag == CblasUnit) DI = 'U'; | ||
else DI = 'N'; | ||
|
||
dtrsv_(&UL, &TA, &DI, &N_, A, &lda_, X, &incX_); | ||
} | ||
} | ||
|
||
/* | ||
* =========================================================================== | ||
* Prototypes for level 3 BLAS | ||
* =========================================================================== | ||
*/ | ||
void dgemm_(const char *TransA, const char *TransB, const INT *M, const INT *N, | ||
const INT *K, const double *alpha, const double *A, const INT *lda, | ||
const double *B, const INT *ldb, const double *beta, double *C, | ||
const INT *ldc); | ||
void cblas_dgemm(const enum CBLAS_ORDER Order, | ||
const enum CBLAS_TRANSPOSE TransA, | ||
const enum CBLAS_TRANSPOSE TransB, const int M, const int N, | ||
const int K, const double alpha, const double *A, | ||
const int lda, const double *B, const int ldb, | ||
const double beta, double *C, const int ldc) { | ||
char TA, TB; | ||
INT M_ = M, N_ = N, K_ = K, lda_ = lda, ldb_ = ldb, ldc_ = ldc; | ||
if (Order == CblasColMajor) { | ||
if (TransA == CblasTrans) TA = 'T'; | ||
else if (TransA == CblasConjTrans) TA = 'C'; | ||
else TA = 'N'; | ||
|
||
if (TransB == CblasTrans) TB = 'T'; | ||
else if (TransB == CblasConjTrans) TB = 'C'; | ||
else TB = 'N'; | ||
|
||
dgemm_(&TA, &TB, &M_, &N_, &K_, &alpha, A, &lda_, B, &ldb_, &beta, C, | ||
&ldc_); | ||
} else { | ||
if (TransA == CblasTrans) TB = 'T'; | ||
else if (TransA == CblasConjTrans) TB = 'C'; | ||
else TB = 'N'; | ||
|
||
if (TransB == CblasTrans) TA = 'T'; | ||
else if (TransB == CblasConjTrans) TA = 'C'; | ||
else TA = 'N'; | ||
|
||
dgemm_(&TA, &TB, &N_, &M_, &K_, &alpha, B, &ldb_, A, &lda_, &beta, C, | ||
&ldc_); | ||
} | ||
} | ||
|
||
void dsyrk_(const char *Uplo, const char *Trans, const INT *N, const INT *K, | ||
const double *alpha, const double *A, const INT *lda, | ||
const double *beta, double *C, const INT *ldc); | ||
void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, | ||
const enum CBLAS_TRANSPOSE Trans, const int N, const int K, | ||
const double alpha, const double *A, const int lda, | ||
const double beta, double *C, const int ldc) { | ||
char UL, TR; | ||
INT N_ = N, K_ = K, lda_ = lda, ldc_ = ldc; | ||
if (Order == CblasColMajor) { | ||
if (Uplo == CblasUpper) UL = 'U'; | ||
else UL = 'L'; | ||
|
||
if (Trans == CblasTrans) TR ='T'; | ||
else if (Trans == CblasConjTrans) TR = 'C'; | ||
else TR = 'N'; | ||
|
||
dsyrk_(&UL, &TR, &N_, &K_, &alpha, A, &lda_, &beta, C, &ldc_); | ||
} else { | ||
if (Uplo == CblasUpper) UL = 'L'; | ||
else UL = 'U'; | ||
|
||
if (Trans == CblasTrans) TR = 'N'; | ||
else if (Trans == CblasConjTrans) TR = 'N'; | ||
else TR = 'T'; | ||
|
||
dsyrk_(&UL, &TR, &N_, &K_, &alpha, A, &lda_, &beta, C, &ldc_); | ||
} | ||
} | ||
|
||
void dtrsm_(const char *Side, const char *Uplo, const char *TransA, | ||
const char *Diag, const INT *M, const INT *N, const double *alpha, | ||
const double *A, const INT *lda, double *B, const INT *ldb); | ||
void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, | ||
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, | ||
const enum CBLAS_DIAG Diag, const int M, const int N, | ||
const double alpha, const double *A, const int lda, | ||
double *B, const int ldb) { | ||
char UL, TA, SD, DI; | ||
INT M_ = M, N_ = N, lda_ = lda, ldb_ = ldb; | ||
if (Order == CblasColMajor) { | ||
if (Side == CblasRight) SD = 'R'; | ||
else SD = 'L'; | ||
|
||
if (Uplo == CblasUpper) UL = 'U'; | ||
else UL = 'L'; | ||
|
||
if (TransA == CblasTrans) TA = 'T'; | ||
else if (TransA == CblasConjTrans) TA = 'C'; | ||
else TA = 'N'; | ||
|
||
if (Diag == CblasUnit) DI = 'U'; | ||
else DI = 'N'; | ||
|
||
dtrsm_(&SD, &UL, &TA, &DI, &M_, &N_, &alpha, A, &lda_, B, &ldb_); | ||
} else { | ||
if (Side == CblasRight) SD = 'L'; | ||
else SD = 'R'; | ||
|
||
if (Uplo == CblasUpper) UL = 'L'; | ||
else UL = 'U'; | ||
|
||
if (TransA == CblasTrans) TA = 'T'; | ||
else if (TransA == CblasConjTrans) TA = 'C'; | ||
else TA = 'N'; | ||
|
||
if (Diag == CblasUnit) DI = 'U'; | ||
else DI = 'N'; | ||
|
||
dtrsm_(&SD, &UL, &TA, &DI, &N_, &M_, &alpha, A, &lda_, B, &ldb_); | ||
} | ||
} | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
Binary file not shown.