Skip to content

Commit

Permalink
Remove trueFP16 gemms based on MSHADOW_USE_PASCAL=1. (dmlc#283)
Browse files Browse the repository at this point in the history
  • Loading branch information
DickJC123 authored and piiswrong committed Aug 18, 2017
1 parent 1824582 commit e510277
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 18 deletions.
5 changes: 3 additions & 2 deletions make/mshadow.mk
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ else
MSHADOW_CFLAGS+= -DMSHADOW_DIST_PS=0
endif

# Set MSHADOW_USE_PASCAL to one to enable nvidia pascal gpu features.
# Like cublasHgemm
# MSHADOW_USE_PASCAL=1 used to enable true-fp16 gemms. Now, mshadow
# only uses pseudo-fp16 gemms, so this flag will be removed after
# dependent projects no longer reference it.
MSHADOW_CFLAGS += -DMSHADOW_USE_PASCAL=0
19 changes: 3 additions & 16 deletions mshadow/dot_engine-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,14 +430,9 @@ struct BLASEngine<gpu, half::half_t> {
const half::half_t *B, int ldb, half::half_t beta,
half::half_t *C, int ldc) {
#if defined(CUDA_VERSION) && CUDA_VERSION >= 7050
if (
#if MSHADOW_USE_PASCAL == 1
false ||
#endif
stream->dev_id == -1 || (stream->prop.major <= 5 && stream->prop.minor <= 2)) {
// Not PASCAL
float alpha_f = float(alpha); // NOLINT(*)
float beta_f = float(beta); // NOLINT(*)
// Always use pseudo-fp16: fp32 compute with fp16 I/O.
float alpha_f = float(alpha); // NOLINT(*)
float beta_f = float(beta); // NOLINT(*)
#if CUDA_VERSION >= 8000
cublasStatus_t err = cublasSgemmEx(Stream<gpu>::GetBlasHandle(stream),
GetT(transa), GetT(transb), m, n, k, &alpha_f,
Expand All @@ -451,14 +446,6 @@ struct BLASEngine<gpu, half::half_t> {
ldb, &beta_f, C, CUBLAS_DATA_HALF, ldc);
CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Cublas SgemmEx fail";
#endif // CUDA_VERSION >= 8000
} else {
// PASCAL
cublasStatus_t err = cublasHgemm(Stream<gpu>::GetBlasHandle(stream),
GetT(transa), GetT(transb), m, n, k, &alpha.cuhalf_,
&A->cuhalf_, lda, &B->cuhalf_, ldb,
&beta.cuhalf_, &C->cuhalf_, ldc);
CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Cublas Hgemm fail";
}
#else
LOG(FATAL) << "Require CUDA version >= 7.5!";
#endif // defined(CUDA_VERSION) && CUDA_VERSION >= 7050
Expand Down

0 comments on commit e510277

Please sign in to comment.