forked from microsoft/mscclpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Port python tests to mscclpp. Please run `mpirun -tag-output -np 8 pytest ./python/test/test_mscclpp.py -x` to start pytest --------- Co-authored-by: Saeed Maleki <[email protected]> Co-authored-by: Changho Hwang <[email protected]> Co-authored-by: Saeed Maleki <[email protected]>
- Loading branch information
1 parent
3df18d2
commit 858e381
Showing
39 changed files
with
1,186 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,3 +5,4 @@ dist/ | |
__pycache__ | ||
.*.swp | ||
.idea/ | ||
*.so |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,17 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT license. | ||
|
||
find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED) | ||
include(FetchContent) | ||
FetchContent_Declare(nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind.git GIT_TAG v1.4.0) | ||
FetchContent_MakeAvailable(nanobind) | ||
add_subdirectory(mscclpp) | ||
add_subdirectory(test) | ||
|
||
add_custom_target(pylib-copy) | ||
add_custom_command(TARGET pylib-copy POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy_if_different | ||
${CMAKE_CURRENT_BINARY_DIR}/mscclpp/_mscclpp.cpython-38-x86_64-linux-gnu.so | ||
${CMAKE_CURRENT_SOURCE_DIR}/mscclpp | ||
COMMAND ${CMAKE_COMMAND} -E copy_if_different | ||
${CMAKE_CURRENT_BINARY_DIR}/test/_ext.cpython-38-x86_64-linux-gnu.so | ||
${CMAKE_CURRENT_SOURCE_DIR}/test/_cpp | ||
COMMAND ${CMAKE_COMMAND} -E echo "Copy python libraries" | ||
) | ||
|
||
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cpp) | ||
nanobind_add_module(mscclpp_py ${SOURCES}) | ||
set_target_properties(mscclpp_py PROPERTIES OUTPUT_NAME _mscclpp) | ||
target_link_libraries(mscclpp_py PRIVATE mscclpp_static) | ||
target_include_directories(mscclpp_py PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) | ||
install(TARGETS mscclpp_py LIBRARY DESTINATION .) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT license. | ||
|
||
find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED) | ||
include(FetchContent) | ||
FetchContent_Declare(nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind.git GIT_TAG v1.4.0) | ||
FetchContent_MakeAvailable(nanobind) | ||
|
||
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cpp) | ||
nanobind_add_module(mscclpp_py ${SOURCES}) | ||
set_target_properties(mscclpp_py PROPERTIES OUTPUT_NAME _mscclpp) | ||
target_link_libraries(mscclpp_py PRIVATE mscclpp_static) | ||
target_include_directories(mscclpp_py PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) | ||
install(TARGETS mscclpp_py LIBRARY DESTINATION .) |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT license. | ||
|
||
find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED) | ||
include(FetchContent) | ||
FetchContent_Declare(nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind.git GIT_TAG v1.4.0) | ||
FetchContent_MakeAvailable(nanobind) | ||
|
||
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cpp) | ||
nanobind_add_module(mscclpp_py_test ${SOURCES}) | ||
set_target_properties(mscclpp_py_test PROPERTIES OUTPUT_NAME _ext) | ||
target_link_libraries(mscclpp_py_test PRIVATE mscclpp_static) | ||
target_include_directories(mscclpp_py_test PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT license. | ||
|
||
#include <cuda.h> | ||
#include <nanobind/nanobind.h> | ||
#include <nanobind/stl/shared_ptr.h> | ||
#include <nanobind/stl/vector.h> | ||
|
||
#include <iostream> | ||
#include <memory> | ||
#include <mscclpp/core.hpp> | ||
#include <mscclpp/cuda_utils.hpp> | ||
#include <mscclpp/fifo.hpp> | ||
#include <mscclpp/numa.hpp> | ||
#include <mscclpp/proxy.hpp> | ||
#include <mscclpp/semaphore.hpp> | ||
#include <vector> | ||
|
||
namespace nb = nanobind; | ||
|
||
class MyProxyService { | ||
private: | ||
int deviceNumaNode_; | ||
int my_rank_, nranks_, dataSize_; | ||
std::vector<std::shared_ptr<mscclpp::Connection>> connections_; | ||
std::vector<std::shared_ptr<mscclpp::RegisteredMemory>> allRegMem_; | ||
std::vector<std::shared_ptr<mscclpp::Host2DeviceSemaphore>> semaphores_; | ||
mscclpp::Proxy proxy_; | ||
|
||
public: | ||
MyProxyService(int my_rank, int nranks, int dataSize, std::vector<std::shared_ptr<mscclpp::Connection>> conns, | ||
std::vector<std::shared_ptr<mscclpp::RegisteredMemory>> allRegMem, | ||
std::vector<std::shared_ptr<mscclpp::Host2DeviceSemaphore>> semaphores) | ||
: my_rank_(my_rank), | ||
nranks_(nranks), | ||
dataSize_(dataSize), | ||
connections_(conns), | ||
allRegMem_(allRegMem), | ||
semaphores_(semaphores), | ||
proxy_([&](mscclpp::ProxyTrigger triggerRaw) { return handleTrigger(triggerRaw); }, [&]() { bindThread(); }) { | ||
int cudaDevice; | ||
cudaGetDevice(&cudaDevice); | ||
deviceNumaNode_ = mscclpp::getDeviceNumaNode(cudaDevice); | ||
} | ||
|
||
void bindThread() { | ||
if (deviceNumaNode_ >= 0) { | ||
mscclpp::numaBind(deviceNumaNode_); | ||
} | ||
} | ||
|
||
mscclpp::ProxyHandlerResult handleTrigger(mscclpp::ProxyTrigger) { | ||
int dataSizePerRank = dataSize_ / nranks_; | ||
for (int r = 1; r < nranks_; ++r) { | ||
int nghr = (my_rank_ + r) % nranks_; | ||
connections_[nghr]->write(*allRegMem_[nghr], my_rank_ * (uint64_t)dataSizePerRank, *allRegMem_[my_rank_], | ||
my_rank_ * (uint64_t)dataSizePerRank, dataSizePerRank); | ||
semaphores_[nghr]->signal(); | ||
connections_[nghr]->flush(); | ||
} | ||
return mscclpp::ProxyHandlerResult::FlushFifoTailAndContinue; | ||
} | ||
|
||
void start() { proxy_.start(); } | ||
|
||
void stop() { proxy_.stop(); } | ||
|
||
mscclpp::FifoDeviceHandle fifoDeviceHandle() { return proxy_.fifo().deviceHandle(); } | ||
}; | ||
|
||
void init_mscclpp_proxy_test_module(nb::module_ &m) { | ||
nb::class_<MyProxyService>(m, "MyProxyService") | ||
.def(nb::init<int, int, int, std::vector<std::shared_ptr<mscclpp::Connection>>, | ||
std::vector<std::shared_ptr<mscclpp::RegisteredMemory>>, | ||
std::vector<std::shared_ptr<mscclpp::Host2DeviceSemaphore>>>(), | ||
nb::arg("rank"), nb::arg("nranks"), nb::arg("data_size"), nb::arg("conn_vec"), nb::arg("reg_mem_vec"), | ||
nb::arg("h2d_sem_vec")) | ||
.def("fifo_device_handle", &MyProxyService::fifoDeviceHandle) | ||
.def("start", &MyProxyService::start) | ||
.def("stop", &MyProxyService::stop); | ||
} | ||
|
||
NB_MODULE(_ext, m) { init_mscclpp_proxy_test_module(m); } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT license. | ||
|
||
#include <mscclpp/semaphore_device.hpp> | ||
|
||
// be careful about using semaphore[my_rank] as it is an invalid semaphore and it is there just for simplicity of | ||
// indexing | ||
extern "C" __global__ void __launch_bounds__(1024, 1) | ||
d2d_semaphore(mscclpp::SmDevice2DeviceSemaphoreDeviceHandle* semaphores, int my_rank, int nranks) { | ||
int tid = threadIdx.x; | ||
if (tid < nranks && tid != my_rank) { | ||
semaphores[tid].signal(); | ||
semaphores[tid].wait(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT license. | ||
|
||
#include <stdio.h> | ||
|
||
#include "mscclpp/fifo_device.hpp" | ||
|
||
extern "C" __global__ void __launch_bounds__(1024, 1) fifo(mscclpp::FifoDeviceHandle fifo) { | ||
mscclpp::ProxyTrigger trigger; | ||
trigger.fst = 123; | ||
fifo.push(trigger); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT license. | ||
|
||
#include <mscclpp/semaphore_device.hpp> | ||
|
||
// be careful about using semaphore[my_rank] as it is an invalid semaphore and it is there just for simplicity of | ||
// indexing | ||
extern "C" __global__ void __launch_bounds__(1024, 1) | ||
h2d_semaphore(mscclpp::Host2DeviceSemaphoreDeviceHandle* semaphores, int my_rank, int nranks) { | ||
int tid = threadIdx.x; | ||
if (tid < nranks && tid != my_rank) semaphores[tid].wait(); | ||
} |
Oops, something went wrong.