Remove BUILD_CAFFE2 and build everything (pytorch#8338)

Summary: This completely removes BUILD_CAFFE2 from CMake. There is still a little bit of "full build" stuff in setup.py that enables USE_CUDNN and BUILD_PYTHON, but otherwise everything should be enabled for PyTorch as well as Caffe2. This gets us a lot closer to full unification. cc mingzhe09088, pjh5, ezyang, smessmer, Yangqing Pull Request resolved: pytorch#8338 Reviewed By: mingzhe09088 Differential Revision: D9600513 Pulled By: orionr fbshipit-source-id: 9f6ca49df35b920d3439dcec56e7b26ad4768b7d
juhofuriosa · Aug 31, 2018 · 6508db7 · 6508db7
1 parent a2a584f
commit 6508db7
Show file tree

Hide file tree

Showing 35 changed files with 520 additions and 503 deletions.
diff --git a/.jenkins/caffe2/build.sh b/.jenkins/caffe2/build.sh
@@ -218,13 +218,21 @@ if [[ -z "$INTEGRATED" ]]; then
 
 else
 
+  # sccache will be stuck if  all cores are used for compiling
+  # see https://github.com/pytorch/pytorch/pull/7361
+  if [[ -n "${SCCACHE}" ]]; then
+    export MAX_JOBS=`expr $(nproc) - 1`
+  fi
+
   FULL_CAFFE2=1 python setup.py install --user
-  # TODO: I'm not sure why this is necessary
+
+  # This is to save test binaries for testing
   cp -r torch/lib/tmp_install $INSTALL_PREFIX
 
-fi
+  ls $INSTALL_PREFIX
 
-report_compile_cache_stats
+  report_compile_cache_stats
+fi
 
 
 ###############################################################################

diff --git a/.jenkins/pytorch/common.sh b/.jenkins/pytorch/common.sh
@@ -112,8 +112,7 @@ else
   exit 1
 fi
 
-if [[ "$BUILD_ENVIRONMENT" == *pytorch-linux-xenial-cuda9-cudnn7-py3 ]] || \
-   [[ "$BUILD_ENVIRONMENT" == *pytorch-linux-trusty-py3.6-gcc7* ]]; then
+if [[ "$BUILD_ENVIRONMENT" == *pytorch-linux-trusty-py3.6-gcc7* ]]; then
   BUILD_TEST_LIBTORCH=1
 else
   BUILD_TEST_LIBTORCH=0

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -54,7 +54,6 @@ endif()
 # cmake/Summary.cmake so that the summary prints out the option values.
 include(CMakeDependentOption)
 option(BUILD_TORCH "Build Torch" OFF)
-option(BUILD_CAFFE2 "Build Caffe2" ON)
 option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
 option(BUILD_ATEN_MOBILE "Build ATen for Android and iOS" OFF)
 option(BUILD_BINARY "Build C++ binaries" ON)
@@ -68,9 +67,7 @@ cmake_dependent_option(
 cmake_dependent_option(
     CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON
     "NOT BUILD_SHARED_LIBS" OFF)
-cmake_dependent_option(
-    BUILD_TEST "Build Caffe2 C++ test binaries (need gtest and gbenchmark)" OFF
-    "BUILD_CAFFE2" OFF)
+option(BUILD_TEST "Build C++ test binaries (need gtest and gbenchmark)" OFF)
 cmake_dependent_option(
     INSTALL_TEST "Install test binaries if BUILD_TEST is on" OFF
     "BUILD_TEST" OFF)
@@ -83,32 +80,16 @@ cmake_dependent_option(
     USE_CUDNN "Use cuDNN" ON
     "USE_CUDA" OFF)
 option(USE_FFMPEG "Use ffmpeg" OFF)
-cmake_dependent_option(
-    USE_GFLAGS "Use GFLAGS" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-    USE_GLOG "Use GLOG" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-    USE_GLOO "Use Gloo" ON
-    "BUILD_CAFFE2" OFF)
+option(USE_GFLAGS "Use GFLAGS" ON)
+option(USE_GLOG "Use GLOG" ON)
+option(USE_GLOO "Use Gloo" ON)
 option(USE_GLOO_IBVERBS "Use Gloo IB verbs for distributed support" OFF)
-cmake_dependent_option(
-    USE_LEVELDB "Use LEVELDB" ON
-    "BUILD_CAFFE2" OFF)
+option(USE_LEVELDB "Use LEVELDB" ON)
 option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF)
-cmake_dependent_option(
-    USE_LMDB "Use LMDB" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-    USE_METAL "Use Metal for iOS build" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-    USE_MOBILE_OPENGL "Use OpenGL for mobile code" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-    USE_MPI "Use MPI" ON
-    "BUILD_CAFFE2" OFF)
+option(USE_LMDB "Use LMDB" ON)
+option(USE_METAL "Use Metal for iOS build" ON)
+option(USE_MOBILE_OPENGL "Use OpenGL for mobile code" ON)
+option(USE_MPI "Use MPI" ON)
 option(USE_NATIVE_ARCH "Use -march=native" OFF)
 option(USE_NCCL "Use NCCL" ON)
 option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF)
@@ -121,9 +102,7 @@ cmake_dependent_option(
     "USE_CUDA" OFF)
 option(USE_OBSERVERS "Use observers module." OFF)
 option(USE_OPENCL "Use OpenCL" OFF)
-cmake_dependent_option(
-    USE_OPENCV "Use OpenCV" ON
-    "BUILD_CAFFE2" OFF)
+option(USE_OPENCV "Use OpenCV" ON)
 option(USE_OPENMP "Use OpenMP for parallel code" OFF)
 option(USE_PROF "Use profiling" OFF)
 option(USE_REDIS "Use Redis" OFF)
@@ -133,12 +112,8 @@ option(USE_TENSORRT "Using Nvidia TensorRT library" OFF)
 option(USE_ZMQ "Use ZMQ" OFF)
 option(USE_ZSTD "Use ZSTD" OFF)
 option(USE_MKLDNN "Use MKLDNN" OFF)
-cmake_dependent_option(
-  USE_IDEEP "Use IDEEP interface in MKL BLAS" ON
-    "BUILD_CAFFE2" OFF)
-cmake_dependent_option(
-  USE_MKLML "Use MKLML interface in MKL BLAS" ON
-    "BUILD_CAFFE2" OFF)
+option(USE_IDEEP "Use IDEEP interface in MKL BLAS" ON)
+option(USE_MKLML "Use MKLML interface in MKL BLAS" ON)
 option(USE_DISTRIBUTED "Use THD (distributed)" OFF)
 
 # Used when building Caffe2 through setup.py
@@ -218,6 +193,9 @@ if(NOT MSVC)
   if (CMAKE_COMPILER_IS_GNUCXX AND NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0))
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-overflow")
   endif()
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pedantic")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=redundant-decls")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=old-style-cast")
   # These flags are not available in GCC-4.8.5. Set only when using clang.
   # Compared against https://gcc.gnu.org/onlinedocs/gcc-4.8.5/gcc/Option-Summary.html
   if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
@@ -240,6 +218,10 @@ if(NOT MSVC)
   if ($ENV{WERROR})
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
   endif($ENV{WERROR})
+  if (NOT APPLE)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-but-set-variable")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized")
+  endif()
 else()
   foreach(flag_var
       CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
@@ -266,6 +248,17 @@ if (USE_ASAN)
     set (CMAKE_LINKER_FLAGS_DEBUG "${CMAKE_STATIC_LINKER_FLAGS_DEBUG} -fsanitize=address")
 endif()
 
+if (APPLE)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++14-extensions")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constexpr-not-const")
+endif()
+
+if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0.0)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-overflow")
+endif()
+
 if(ANDROID)
   if(CMAKE_COMPILER_IS_GNUCXX)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s")
@@ -402,19 +395,23 @@ else()
 endif()
 
 # ---[ Modules
-if (BUILD_CAFFE2)
-  add_subdirectory(modules)
+# TODO(orionr): Enable all of this for Windows DLL when we
+# can figure out how to get it to build
+if (NOT (MSVC AND BUILD_SHARED_LIBS))
+add_subdirectory(modules)
 endif()
 
 # ---[ Binaries
 # Binaries will be built after the Caffe2 main libraries and the modules
 # are built. For the binaries, they will be linked to the Caffe2 main
 # libraries, as well as all the modules that are built with Caffe2 (the ones
 # built in the previous Modules section above).
-if (BUILD_CAFFE2)
-  if (BUILD_BINARY)
-    add_subdirectory(binaries)
-  endif()
+# TODO(orionr): Enable all of this for Windows DLL when we
+# can figure out how to get it to build
+if (NOT (MSVC AND BUILD_SHARED_LIBS))
+if (BUILD_BINARY)
+  add_subdirectory(binaries)
+endif()
 endif()
 
 include(cmake/Summary.cmake)

diff --git a/aten/src/ATen/core/typeid.h b/aten/src/ATen/core/typeid.h
@@ -404,7 +404,7 @@ inline bool operator!=(const TypeMeta& lhs, const TypeMeta& rhs) noexcept {
 #ifdef _MSC_VER
 #define CAFFE_KNOWN_TYPE(T)                                               \
   template <>                                                             \
-  AT_CORE_API TypeIdentifier TypeMeta::Id<T>() {                          \
+  AT_CORE_EXPORT TypeIdentifier TypeMeta::Id<T>() {                       \
     static const TypeIdentifier type_id = TypeIdentifier::createTypeId(); \
     static TypeNameRegisterer<T> registerer(type_id, #T);                 \
     return type_id;                                                       \

diff --git a/aten/src/ATen/cuda/CUDAStream.h b/aten/src/ATen/cuda/CUDAStream.h
@@ -5,7 +5,7 @@
 
 #include "cuda_runtime_api.h"
 
-#include <ATen/core/ATenGeneral.h>
+#include <ATen/cuda/ATenCUDAGeneral.h>
 
 /*
 * A CUDAStream interface. See CUDAStream.cpp for implementation details.
@@ -17,7 +17,7 @@
 
 /*
 * Stream pool note.
-* 
+*
 * A CUDAStream is an abstraction of an actual cuStream on the GPU. CUDAStreams
 * are backed by cuStreams, but they use several pools to minimize the costs
 * associated with creating, retaining, and destroying cuStreams.
@@ -27,14 +27,14 @@
 * The first pool contains only the default stream. When the default stream
 * is requested it's returned.
 *
-* The second pool is the "low priority" or "default priority" streams. In 
+* The second pool is the "low priority" or "default priority" streams. In
 * HIP builds there is no distinction between streams in this pool and streams
-* in the third pool (below). There are 32 of these streams per device, and 
+* in the third pool (below). There are 32 of these streams per device, and
 * when a stream is requested one of these streams is returned round-robin.
 * That is, the first stream requested is at index 0, the second at index 1...
 * to index 31, then index 0 again.
 *
-* This means that if 33 low priority streams are requested, the first and 
+* This means that if 33 low priority streams are requested, the first and
 * last streams requested are actually the same stream (under the covers)
 * and kernels enqueued on them cannot run concurrently.
 *
@@ -46,7 +46,7 @@
 * many longer-lived streams are required in performance critical scenarios
 * then the functionality here may need to be extended to allow, for example,
 * "reserving" a subset of the pool so that other streams do not accidentally
-* overlap the performance critical streams. 
+* overlap the performance critical streams.
 */
 
 struct CUDAStreamInternals;
@@ -59,19 +59,19 @@ struct CUDAEvent;
 namespace detail {
 
 // Pointer-based API (for internal use)
-AT_API CUDAStreamInternals* CUDAStream_getDefaultStream(int64_t device = -1);
+AT_CUDA_API CUDAStreamInternals* CUDAStream_getDefaultStream(int64_t device = -1);
 
-AT_API CUDAStreamInternals* CUDAStream_createStream(
+AT_CUDA_API CUDAStreamInternals* CUDAStream_createStream(
   const bool isHighPriority = false
 , int64_t device = -1);
 
-AT_API CUDAStreamInternals* CUDAStream_getCurrentStream(int64_t device = -1);
+AT_CUDA_API CUDAStreamInternals* CUDAStream_getCurrentStream(int64_t device = -1);
 
-AT_API void CUDAStream_setStream(CUDAStreamInternals* internals);
-AT_API void CUDAStream_uncheckedSetStream(CUDAStreamInternals* internals);
+AT_CUDA_API void CUDAStream_setStream(CUDAStreamInternals* internals);
+AT_CUDA_API void CUDAStream_uncheckedSetStream(CUDAStreamInternals* internals);
 
-AT_API cudaStream_t CUDAStream_stream(CUDAStreamInternals*);
-AT_API int64_t CUDAStream_device(CUDAStreamInternals*);
+AT_CUDA_API cudaStream_t CUDAStream_stream(CUDAStreamInternals*);
+AT_CUDA_API int64_t CUDAStream_device(CUDAStreamInternals*);
 
 } // namespace detail
 
@@ -81,7 +81,7 @@ struct CUDAStream {
 
   // Constructors
   CUDAStream() = default;
-  /* implicit */ CUDAStream(CUDAStreamInternals* internals_in) 
+  /* implicit */ CUDAStream(CUDAStreamInternals* internals_in)
   : internals_{internals_in} { }
 
   // Returns true if the CUDAStream is not null.