forked from huggingface/text-generation-inference
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(fp8): use fbgemm kernels and load fp8 weights directly (huggingf…
…ace#2248) * feat(fp8): add support for fbgemm * allow loading fp8 weights directly * update outlines * fix makefile * build fbgemm * avoid circular import and fix dockerfile * add default dtype * refactored weights loader * fix auto conversion * fix quantization config parsing * force new nccl on install * missing get_weights implementation * increase timeout
- Loading branch information
1 parent
e5c1d6d
commit 53ec0b7
Showing
23 changed files
with
736 additions
and
109 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
fbgemm_commit := 9cf0429b726931cfab72b8264730bea682f32fca | ||
|
||
build-fbgemm: | ||
chmod +x fix_torch90a.sh && ./fix_torch90a.sh && \ | ||
git clone https://github.com/pytorch/FBGEMM.git fbgemm && \ | ||
cp fbgemm_remove_unused.patch fbgemm && \ | ||
cd fbgemm && git fetch && git checkout $(fbgemm_commit) && git apply fbgemm_remove_unused.patch && \ | ||
git submodule update --init --recursive && \ | ||
cd fbgemm_gpu && \ | ||
pip install -r requirements.txt && \ | ||
CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py --package_variant genai build | ||
|
||
install-fbgemm: build-fbgemm | ||
cd fbgemm/fbgemm_gpu && \ | ||
CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py --package_variant genai install |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,306 @@ | ||
diff --git a/fbgemm_gpu/CMakeLists.txt b/fbgemm_gpu/CMakeLists.txt | ||
index 2244ea6f..96265a48 100644 | ||
--- a/fbgemm_gpu/CMakeLists.txt | ||
+++ b/fbgemm_gpu/CMakeLists.txt | ||
@@ -94,14 +94,14 @@ endif() | ||
# Build Experimental Modules | ||
################################################################################ | ||
|
||
-if(NOT FBGEMM_CPU_ONLY AND NOT USE_ROCM) | ||
- # TODO: Figure out NCCL/RCCL integration with ROCm | ||
- add_subdirectory(experimental/example) | ||
-endif() | ||
- | ||
-if(NOT FBGEMM_CPU_ONLY) | ||
- add_subdirectory(experimental/gemm) | ||
-endif() | ||
+# if(NOT FBGEMM_CPU_ONLY AND NOT USE_ROCM) | ||
+# # TODO: Figure out NCCL/RCCL integration with ROCm | ||
+# add_subdirectory(experimental/example) | ||
+# endif() | ||
+ | ||
+# if(NOT FBGEMM_CPU_ONLY) | ||
+# add_subdirectory(experimental/gemm) | ||
+# endif() | ||
|
||
if(NOT FBGEMM_CPU_ONLY AND NOT USE_ROCM) | ||
# CUTLASS currently doesn't build on ROCm and CK hasnt yet been added: | ||
diff --git a/fbgemm_gpu/FbgemmGpu.cmake b/fbgemm_gpu/FbgemmGpu.cmake | ||
index c56773fe..0c0d349e 100644 | ||
--- a/fbgemm_gpu/FbgemmGpu.cmake | ||
+++ b/fbgemm_gpu/FbgemmGpu.cmake | ||
@@ -446,53 +446,55 @@ set_source_files_properties(${fbgemm_sources} | ||
################################################################################ | ||
|
||
set(fbgemm_gpu_sources_static_cpu | ||
- codegen/training/forward/embedding_forward_split_cpu.cpp | ||
- codegen/inference/embedding_forward_quantized_host_cpu.cpp | ||
- codegen/training/backward/embedding_backward_dense_host_cpu.cpp | ||
- codegen/utils/embedding_bounds_check_host_cpu.cpp | ||
- src/merge_pooled_embedding_ops/merge_pooled_embedding_ops_cpu.cpp | ||
- src/permute_multi_embedding_ops/permute_multi_embedding_function.cpp | ||
- src/permute_multi_embedding_ops/permute_multi_embedding_ops_cpu.cpp | ||
- src/permute_pooled_embedding_ops/permute_pooled_embedding_function.cpp | ||
- src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_cpu.cpp | ||
- src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split_cpu.cpp | ||
- src/jagged_tensor_ops/jagged_tensor_ops_autograd.cpp | ||
- src/jagged_tensor_ops/jagged_tensor_ops_meta.cpp | ||
- src/jagged_tensor_ops/jagged_tensor_ops_cpu.cpp | ||
- src/input_combine_ops/input_combine_cpu.cpp | ||
- src/layout_transform_ops/layout_transform_ops_cpu.cpp | ||
+ # codegen/training/forward/embedding_forward_split_cpu.cpp | ||
+ # codegen/inference/embedding_forward_quantized_host_cpu.cpp | ||
+ # codegen/training/backward/embedding_backward_dense_host_cpu.cpp | ||
+ # codegen/utils/embedding_bounds_check_host_cpu.cpp | ||
+ # src/merge_pooled_embedding_ops/merge_pooled_embedding_ops_cpu.cpp | ||
+ # src/permute_multi_embedding_ops/permute_multi_embedding_function.cpp | ||
+ # src/permute_multi_embedding_ops/permute_multi_embedding_ops_cpu.cpp | ||
+ # src/permute_pooled_embedding_ops/permute_pooled_embedding_function.cpp | ||
+ # src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_cpu.cpp | ||
+ # src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split_cpu.cpp | ||
+ # src/jagged_tensor_ops/jagged_tensor_ops_autograd.cpp | ||
+ # src/jagged_tensor_ops/jagged_tensor_ops_meta.cpp | ||
+ # src/jagged_tensor_ops/jagged_tensor_ops_cpu.cpp | ||
+ # src/input_combine_ops/input_combine_cpu.cpp | ||
+ # src/layout_transform_ops/layout_transform_ops_cpu.cpp | ||
src/quantize_ops/quantize_ops_cpu.cpp | ||
src/quantize_ops/quantize_ops_meta.cpp | ||
- src/sparse_ops/sparse_ops_cpu.cpp | ||
- src/sparse_ops/sparse_ops_meta.cpp | ||
- src/embedding_inplace_ops/embedding_inplace_update_cpu.cpp | ||
- src/split_embeddings_cache/linearize_cache_indices.cpp | ||
- src/split_embeddings_cache/lfu_cache_populate_byte.cpp | ||
- src/split_embeddings_cache/lru_cache_populate_byte.cpp | ||
- src/split_embeddings_cache/lxu_cache.cpp | ||
- src/split_embeddings_cache/split_embeddings_cache_ops.cpp | ||
- codegen/training/index_select/batch_index_select_dim0_ops.cpp | ||
- codegen/training/index_select/batch_index_select_dim0_cpu_host.cpp) | ||
+ # src/sparse_ops/sparse_ops_cpu.cpp | ||
+ # src/sparse_ops/sparse_ops_meta.cpp | ||
+ # src/embedding_inplace_ops/embedding_inplace_update_cpu.cpp | ||
+ # src/split_embeddings_cache/linearize_cache_indices.cpp | ||
+ # src/split_embeddings_cache/lfu_cache_populate_byte.cpp | ||
+ # src/split_embeddings_cache/lru_cache_populate_byte.cpp | ||
+ # src/split_embeddings_cache/lxu_cache.cpp | ||
+ # src/split_embeddings_cache/split_embeddings_cache_ops.cpp | ||
+ # codegen/training/index_select/batch_index_select_dim0_ops.cpp | ||
+ # codegen/training/index_select/batch_index_select_dim0_cpu_host.cpp) | ||
+) | ||
|
||
if(NOT FBGEMM_CPU_ONLY) | ||
list(APPEND fbgemm_gpu_sources_static_cpu | ||
- codegen/inference/embedding_forward_quantized_host.cpp | ||
- codegen/utils/embedding_bounds_check_host.cpp | ||
- src/intraining_embedding_pruning_ops/intraining_embedding_pruning_gpu.cpp | ||
- src/layout_transform_ops/layout_transform_ops_gpu.cpp | ||
- src/memory_utils/memory_utils.cpp | ||
- src/memory_utils/memory_utils_ops.cpp | ||
- src/memory_utils/memory_utils_ops_cpu.cpp | ||
- src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_gpu.cpp | ||
- src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split_gpu.cpp | ||
+ # codegen/inference/embedding_forward_quantized_host.cpp | ||
+ # codegen/utils/embedding_bounds_check_host.cpp | ||
+ # src/intraining_embedding_pruning_ops/intraining_embedding_pruning_gpu.cpp | ||
+ # src/layout_transform_ops/layout_transform_ops_gpu.cpp | ||
+ # src/memory_utils/memory_utils.cpp | ||
+ # src/memory_utils/memory_utils_ops.cpp | ||
+ # src/memory_utils/memory_utils_ops_cpu.cpp | ||
+ # src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_gpu.cpp | ||
+ # src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split_gpu.cpp | ||
src/quantize_ops/quantize_ops_gpu.cpp | ||
- src/sparse_ops/sparse_ops_gpu.cpp | ||
- src/split_embeddings_utils/split_embeddings_utils.cpp | ||
- src/split_embeddings_cache/split_embeddings_cache_ops.cu | ||
- src/metric_ops/metric_ops_host.cpp | ||
- src/embedding_inplace_ops/embedding_inplace_update_gpu.cpp | ||
- src/input_combine_ops/input_combine_gpu.cpp | ||
- codegen/training/index_select/batch_index_select_dim0_host.cpp) | ||
+ # src/sparse_ops/sparse_ops_gpu.cpp | ||
+ # src/split_embeddings_utils/split_embeddings_utils.cpp | ||
+ # src/split_embeddings_cache/split_embeddings_cache_ops.cu | ||
+ # src/metric_ops/metric_ops_host.cpp | ||
+ # src/embedding_inplace_ops/embedding_inplace_update_gpu.cpp | ||
+ # src/input_combine_ops/input_combine_gpu.cpp | ||
+ # codegen/training/index_select/batch_index_select_dim0_host.cpp) | ||
+ ) | ||
|
||
if(NVML_LIB_PATH OR USE_ROCM) | ||
message(STATUS "Adding merge_pooled_embeddings sources") | ||
@@ -516,36 +518,36 @@ endif() | ||
|
||
if(NOT FBGEMM_CPU_ONLY) | ||
set(fbgemm_gpu_sources_static_gpu | ||
- codegen/utils/embedding_bounds_check.cu | ||
- codegen/inference/embedding_forward_quantized_split_lookup.cu | ||
- src/embedding_inplace_ops/embedding_inplace_update.cu | ||
- src/histogram_binning_calibration_ops.cu | ||
- src/input_combine_ops/input_combine.cu | ||
- src/intraining_embedding_pruning_ops/intraining_embedding_pruning.cu | ||
- src/memory_utils/memory_utils.cu | ||
- src/memory_utils/memory_utils_ops.cu | ||
- src/jagged_tensor_ops/batched_dense_vec_jagged_2d_mul_backward.cu | ||
- src/jagged_tensor_ops/batched_dense_vec_jagged_2d_mul_forward.cu | ||
- src/jagged_tensor_ops/dense_to_jagged_forward.cu | ||
- src/jagged_tensor_ops/jagged_dense_bmm_forward.cu | ||
- src/jagged_tensor_ops/jagged_dense_dense_elementwise_add_jagged_output_forward.cu | ||
- src/jagged_tensor_ops/jagged_dense_elementwise_mul_backward.cu | ||
- src/jagged_tensor_ops/jagged_dense_elementwise_mul_forward.cu | ||
- src/jagged_tensor_ops/jagged_index_add_2d_forward.cu | ||
- src/jagged_tensor_ops/jagged_index_select_2d_forward.cu | ||
- src/jagged_tensor_ops/jagged_jagged_bmm_forward.cu | ||
- src/jagged_tensor_ops/jagged_softmax_backward.cu | ||
- src/jagged_tensor_ops/jagged_softmax_forward.cu | ||
- src/jagged_tensor_ops/jagged_tensor_ops.cu | ||
- src/jagged_tensor_ops/jagged_to_padded_dense_backward.cu | ||
- src/jagged_tensor_ops/jagged_to_padded_dense_forward.cu | ||
- src/jagged_tensor_ops/jagged_unique_indices.cu | ||
- src/jagged_tensor_ops/keyed_jagged_index_select_dim1.cu | ||
- src/layout_transform_ops/layout_transform_ops.cu | ||
- src/metric_ops/metric_ops.cu | ||
- src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split.cu | ||
- src/permute_pooled_embedding_ops/permute_pooled_embedding_ops.cu | ||
- src/permute_multi_embedding_ops/permute_multi_embedding_ops.cu | ||
+ # codegen/utils/embedding_bounds_check.cu | ||
+ # codegen/inference/embedding_forward_quantized_split_lookup.cu | ||
+ # src/embedding_inplace_ops/embedding_inplace_update.cu | ||
+ # src/histogram_binning_calibration_ops.cu | ||
+ # src/input_combine_ops/input_combine.cu | ||
+ # src/intraining_embedding_pruning_ops/intraining_embedding_pruning.cu | ||
+ # src/memory_utils/memory_utils.cu | ||
+ # src/memory_utils/memory_utils_ops.cu | ||
+ # src/jagged_tensor_ops/batched_dense_vec_jagged_2d_mul_backward.cu | ||
+ # src/jagged_tensor_ops/batched_dense_vec_jagged_2d_mul_forward.cu | ||
+ # src/jagged_tensor_ops/dense_to_jagged_forward.cu | ||
+ # src/jagged_tensor_ops/jagged_dense_bmm_forward.cu | ||
+ # src/jagged_tensor_ops/jagged_dense_dense_elementwise_add_jagged_output_forward.cu | ||
+ # src/jagged_tensor_ops/jagged_dense_elementwise_mul_backward.cu | ||
+ # src/jagged_tensor_ops/jagged_dense_elementwise_mul_forward.cu | ||
+ # src/jagged_tensor_ops/jagged_index_add_2d_forward.cu | ||
+ # src/jagged_tensor_ops/jagged_index_select_2d_forward.cu | ||
+ # src/jagged_tensor_ops/jagged_jagged_bmm_forward.cu | ||
+ # src/jagged_tensor_ops/jagged_softmax_backward.cu | ||
+ # src/jagged_tensor_ops/jagged_softmax_forward.cu | ||
+ # src/jagged_tensor_ops/jagged_tensor_ops.cu | ||
+ # src/jagged_tensor_ops/jagged_to_padded_dense_backward.cu | ||
+ # src/jagged_tensor_ops/jagged_to_padded_dense_forward.cu | ||
+ # src/jagged_tensor_ops/jagged_unique_indices.cu | ||
+ # src/jagged_tensor_ops/keyed_jagged_index_select_dim1.cu | ||
+ # src/layout_transform_ops/layout_transform_ops.cu | ||
+ # src/metric_ops/metric_ops.cu | ||
+ # src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split.cu | ||
+ # src/permute_pooled_embedding_ops/permute_pooled_embedding_ops.cu | ||
+ # src/permute_multi_embedding_ops/permute_multi_embedding_ops.cu | ||
src/quantize_ops/quantize_bfloat16.cu | ||
src/quantize_ops/quantize_fp8_rowwise.cu | ||
src/quantize_ops/quantize_fused_8bit_rowwise.cu | ||
@@ -554,39 +556,40 @@ if(NOT FBGEMM_CPU_ONLY) | ||
src/quantize_ops/quantize_msfp.cu | ||
src/quantize_ops/quantize_padded_fp8_rowwise.cu | ||
src/quantize_ops/quantize_mx.cu | ||
- src/sparse_ops/sparse_async_cumsum.cu | ||
- src/sparse_ops/sparse_block_bucketize_features.cu | ||
- src/sparse_ops/sparse_bucketize_features.cu | ||
- src/sparse_ops/sparse_batched_unary_embeddings.cu | ||
- src/sparse_ops/sparse_compute_frequency_sequence.cu | ||
- src/sparse_ops/sparse_expand_into_jagged_permute.cu | ||
- src/sparse_ops/sparse_group_index.cu | ||
- src/sparse_ops/sparse_index_add.cu | ||
- src/sparse_ops/sparse_index_select.cu | ||
- src/sparse_ops/sparse_invert_permute.cu | ||
- src/sparse_ops/sparse_pack_segments_backward.cu | ||
- src/sparse_ops/sparse_pack_segments_forward.cu | ||
- src/sparse_ops/sparse_permute_1d.cu | ||
- src/sparse_ops/sparse_permute_2d.cu | ||
- src/sparse_ops/sparse_permute102.cu | ||
- src/sparse_ops/sparse_permute_embeddings.cu | ||
- src/sparse_ops/sparse_range.cu | ||
- src/sparse_ops/sparse_reorder_batched_ad.cu | ||
- src/sparse_ops/sparse_segment_sum_csr.cu | ||
- src/sparse_ops/sparse_zipf.cu | ||
- src/split_embeddings_cache/lfu_cache_find.cu | ||
- src/split_embeddings_cache/lfu_cache_populate.cu | ||
- src/split_embeddings_cache/lfu_cache_populate_byte.cu | ||
- src/split_embeddings_cache/lru_cache_find.cu | ||
- src/split_embeddings_cache/lru_cache_populate.cu | ||
- src/split_embeddings_cache/lru_cache_populate_byte.cu | ||
- src/split_embeddings_cache/lxu_cache.cu | ||
- src/split_embeddings_cache/linearize_cache_indices.cu | ||
- src/split_embeddings_cache/reset_weight_momentum.cu | ||
- src/split_embeddings_utils/generate_vbe_metadata.cu | ||
- src/split_embeddings_utils/get_infos_metadata.cu | ||
- src/split_embeddings_utils/radix_sort_pairs.cu | ||
- src/split_embeddings_utils/transpose_embedding_input.cu) | ||
+ # src/sparse_ops/sparse_async_cumsum.cu | ||
+ # src/sparse_ops/sparse_block_bucketize_features.cu | ||
+ # src/sparse_ops/sparse_bucketize_features.cu | ||
+ # src/sparse_ops/sparse_batched_unary_embeddings.cu | ||
+ # src/sparse_ops/sparse_compute_frequency_sequence.cu | ||
+ # src/sparse_ops/sparse_expand_into_jagged_permute.cu | ||
+ # src/sparse_ops/sparse_group_index.cu | ||
+ # src/sparse_ops/sparse_index_add.cu | ||
+ # src/sparse_ops/sparse_index_select.cu | ||
+ # src/sparse_ops/sparse_invert_permute.cu | ||
+ # src/sparse_ops/sparse_pack_segments_backward.cu | ||
+ # src/sparse_ops/sparse_pack_segments_forward.cu | ||
+ # src/sparse_ops/sparse_permute_1d.cu | ||
+ # src/sparse_ops/sparse_permute_2d.cu | ||
+ # src/sparse_ops/sparse_permute102.cu | ||
+ # src/sparse_ops/sparse_permute_embeddings.cu | ||
+ # src/sparse_ops/sparse_range.cu | ||
+ # src/sparse_ops/sparse_reorder_batched_ad.cu | ||
+ # src/sparse_ops/sparse_segment_sum_csr.cu | ||
+ # src/sparse_ops/sparse_zipf.cu | ||
+ # src/split_embeddings_cache/lfu_cache_find.cu | ||
+ # src/split_embeddings_cache/lfu_cache_populate.cu | ||
+ # src/split_embeddings_cache/lfu_cache_populate_byte.cu | ||
+ # src/split_embeddings_cache/lru_cache_find.cu | ||
+ # src/split_embeddings_cache/lru_cache_populate.cu | ||
+ # src/split_embeddings_cache/lru_cache_populate_byte.cu | ||
+ # src/split_embeddings_cache/lxu_cache.cu | ||
+ # src/split_embeddings_cache/linearize_cache_indices.cu | ||
+ # src/split_embeddings_cache/reset_weight_momentum.cu | ||
+ # src/split_embeddings_utils/generate_vbe_metadata.cu | ||
+ # src/split_embeddings_utils/get_infos_metadata.cu | ||
+ # src/split_embeddings_utils/radix_sort_pairs.cu | ||
+ # src/split_embeddings_utils/transpose_embedding_input.cu) | ||
+ ) | ||
|
||
set_source_files_properties(${fbgemm_gpu_sources_static_gpu} | ||
PROPERTIES COMPILE_OPTIONS | ||
diff --git a/fbgemm_gpu/experimental/gen_ai/CMakeLists.txt b/fbgemm_gpu/experimental/gen_ai/CMakeLists.txt | ||
index 01f1d6ab..a6b8d7a8 100644 | ||
--- a/fbgemm_gpu/experimental/gen_ai/CMakeLists.txt | ||
+++ b/fbgemm_gpu/experimental/gen_ai/CMakeLists.txt | ||
@@ -25,23 +25,24 @@ set(fbgemm_sources_include_directories | ||
${THIRDPARTY}/json/include | ||
${NCCL_INCLUDE_DIRS}) | ||
|
||
-set(attention_ops_sources | ||
- src/attention/attention.cpp | ||
- src/attention/gqa_attn_splitk.cu) | ||
+# set(attention_ops_sources | ||
+# src/attention/attention.cpp | ||
+# src/attention/gqa_attn_splitk.cu) | ||
|
||
set(quantize_ops_sources | ||
src/quantize/cutlass_extensions.cu | ||
src/quantize/quantize.cu | ||
src/quantize/quantize.cpp) | ||
|
||
-set(comm_ops_sources | ||
- src/comm/car.cu | ||
- src/comm/car.cpp) | ||
+# set(comm_ops_sources | ||
+# src/comm/car.cu | ||
+# src/comm/car.cpp) | ||
|
||
set(experimental_gen_ai_cpp_source_files | ||
- ${attention_ops_sources} | ||
+ # ${attention_ops_sources} | ||
${quantize_ops_sources} | ||
- ${comm_ops_sources}) | ||
+ # ${comm_ops_sources} | ||
+) | ||
|
||
set_source_files_properties(${experimental_gen_ai_cpp_source_files} | ||
PROPERTIES INCLUDE_DIRECTORIES |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
|
||
# This script is required to patch torch < 2.4 | ||
# It adds the 90a cuda target (H100) | ||
# This target is required to build FBGEMM kernels | ||
|
||
torch_cuda_arch=$(python -c "import torch; print(torch.__file__)" | sed 's/\/__init__.py//; s|$|/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake|') | ||
|
||
sed -i '189s/\[0-9]\\\\\.\[0-9](/[0-9]\\\\.[0-9]a?(/' $torch_cuda_arch | ||
sed -i '245s/\[0-9()]+\+"/[0-9()]+a?"/' $torch_cuda_arch | ||
sed -i '246s/\[0-9]+\+"/[0-9]+a?"/' $torch_cuda_arch |
Oops, something went wrong.