Skip to content

Commit

Permalink
Pytest (microsoft#162)
Browse files Browse the repository at this point in the history
Port python tests to mscclpp.
Please run
`mpirun -tag-output -np 8 pytest ./python/test/test_mscclpp.py -x` to start pytest

---------

Co-authored-by: Saeed Maleki <[email protected]>
Co-authored-by: Changho Hwang <[email protected]>
Co-authored-by: Saeed Maleki <[email protected]>
  • Loading branch information
4 people authored Sep 1, 2023
1 parent 3df18d2 commit 858e381
Show file tree
Hide file tree
Showing 39 changed files with 1,186 additions and 18 deletions.
20 changes: 20 additions & 0 deletions .azure-pipelines/multi-nodes-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
mkdir build && cd build
MPI_HOME=/usr/local/mpi /tmp/cmake-3.26.4-linux-x86_64/bin/cmake -DCMAKE_BUILD_TYPE=Release ..
make -j
make pylib-copy
workingDirectory: '$(System.DefaultWorkingDirectory)'

- task: DownloadSecureFile@1
Expand Down Expand Up @@ -104,6 +105,25 @@ jobs:
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/run_tests.sh mp-ut'
kill $CHILD_PID
- task: Bash@3
name: RunMultiNodePythonTests
displayName: Run multi-nodes python tests
inputs:
targetType: 'inline'
script: |
set -e
HOSTFILE=$(System.DefaultWorkingDirectory)/test/mscclpp-test/deploy/hostfile
SSH_OPTION="StrictHostKeyChecking=no"
KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
rm -rf output/*
mkdir -p output
touch output/mscclpp-it-000000
tail -f output/mscclpp-it-000000 &
CHILD_PID=$!
parallel-ssh -t 0 -H mscclpp-it-000000 -l azureuser -x "-i ${KeyFilePath}" \
-O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/run_tests.sh pytests'
kill $CHILD_PID
- task: AzureCLI@2
name: StopVMSS
displayName: Deallocate VMSS
Expand Down
17 changes: 17 additions & 0 deletions .azure-pipelines/ut.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,20 @@ jobs:
mpirun -tag-output -np 4 ./build/test/mp_unit_tests
mpirun -tag-output -np 8 ./build/test/mp_unit_tests
workingDirectory: '$(System.DefaultWorkingDirectory)'

- task: Bash@3
name: PyTests
displayName: Run pytests
inputs:
targetType: 'inline'
script: |
set -e
export PATH=/usr/local/mpi/bin:$PATH
cd build && make pylib-copy
if [[ '$(containerImage)' == *'cuda11'* ]]; then
pip3 install -r ../python/test/requirements_cu11.txt
else
pip3 install -r ../python/test/requirements_cu12.txt
fi
mpirun -tag-output -np 8 ~/.local/bin/pytest ../python/test/test_mscclpp.py -x
workingDirectory: '$(System.DefaultWorkingDirectory)'
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ dist/
__pycache__
.*.swp
.idea/
*.so
2 changes: 1 addition & 1 deletion include/mscclpp/core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ class RegisteredMemory {
/// Get a pointer to the memory block.
///
/// @return A pointer to the memory block.
void* data();
void* data() const;

/// Get the size of the memory block.
///
Expand Down
2 changes: 2 additions & 0 deletions include/mscclpp/poll.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#ifdef __CUDACC__

#include <cstdint>

extern __device__ void __assert_fail(const char *__assertion, const char *__file, unsigned int __line,
const char *__function) __THROW;

Expand Down
2 changes: 1 addition & 1 deletion include/mscclpp/sm_channel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ namespace mscclpp {
struct SmChannel {
private:
std::shared_ptr<SmDevice2DeviceSemaphore> semaphore_;
RegisteredMemory dst_;
void* src_;
void* dst_;
void* getPacketBuffer_;

public:
Expand Down
23 changes: 13 additions & 10 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED)
include(FetchContent)
FetchContent_Declare(nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind.git GIT_TAG v1.4.0)
FetchContent_MakeAvailable(nanobind)
add_subdirectory(mscclpp)
add_subdirectory(test)

add_custom_target(pylib-copy)
add_custom_command(TARGET pylib-copy POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different
${CMAKE_CURRENT_BINARY_DIR}/mscclpp/_mscclpp.cpython-38-x86_64-linux-gnu.so
${CMAKE_CURRENT_SOURCE_DIR}/mscclpp
COMMAND ${CMAKE_COMMAND} -E copy_if_different
${CMAKE_CURRENT_BINARY_DIR}/test/_ext.cpython-38-x86_64-linux-gnu.so
${CMAKE_CURRENT_SOURCE_DIR}/test/_cpp
COMMAND ${CMAKE_COMMAND} -E echo "Copy python libraries"
)

file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cpp)
nanobind_add_module(mscclpp_py ${SOURCES})
set_target_properties(mscclpp_py PROPERTIES OUTPUT_NAME _mscclpp)
target_link_libraries(mscclpp_py PRIVATE mscclpp_static)
target_include_directories(mscclpp_py PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
install(TARGETS mscclpp_py LIBRARY DESTINATION .)
14 changes: 14 additions & 0 deletions python/mscclpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED)
include(FetchContent)
FetchContent_Declare(nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind.git GIT_TAG v1.4.0)
FetchContent_MakeAvailable(nanobind)

file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cpp)
nanobind_add_module(mscclpp_py ${SOURCES})
set_target_properties(mscclpp_py PROPERTIES OUTPUT_NAME _mscclpp)
target_link_libraries(mscclpp_py PRIVATE mscclpp_static)
target_include_directories(mscclpp_py PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
install(TARGETS mscclpp_py LIBRARY DESTINATION .)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
13 changes: 13 additions & 0 deletions python/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED)
include(FetchContent)
FetchContent_Declare(nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind.git GIT_TAG v1.4.0)
FetchContent_MakeAvailable(nanobind)

file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cpp)
nanobind_add_module(mscclpp_py_test ${SOURCES})
set_target_properties(mscclpp_py_test PROPERTIES OUTPUT_NAME _ext)
target_link_libraries(mscclpp_py_test PRIVATE mscclpp_static)
target_include_directories(mscclpp_py_test PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
Empty file added python/test/__init__.py
Empty file.
Empty file added python/test/_cpp/__init__.py
Empty file.
83 changes: 83 additions & 0 deletions python/test/_cpp/proxy_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

#include <cuda.h>
#include <nanobind/nanobind.h>
#include <nanobind/stl/shared_ptr.h>
#include <nanobind/stl/vector.h>

#include <iostream>
#include <memory>
#include <mscclpp/core.hpp>
#include <mscclpp/cuda_utils.hpp>
#include <mscclpp/fifo.hpp>
#include <mscclpp/numa.hpp>
#include <mscclpp/proxy.hpp>
#include <mscclpp/semaphore.hpp>
#include <vector>

namespace nb = nanobind;

class MyProxyService {
private:
int deviceNumaNode_;
int my_rank_, nranks_, dataSize_;
std::vector<std::shared_ptr<mscclpp::Connection>> connections_;
std::vector<std::shared_ptr<mscclpp::RegisteredMemory>> allRegMem_;
std::vector<std::shared_ptr<mscclpp::Host2DeviceSemaphore>> semaphores_;
mscclpp::Proxy proxy_;

public:
MyProxyService(int my_rank, int nranks, int dataSize, std::vector<std::shared_ptr<mscclpp::Connection>> conns,
std::vector<std::shared_ptr<mscclpp::RegisteredMemory>> allRegMem,
std::vector<std::shared_ptr<mscclpp::Host2DeviceSemaphore>> semaphores)
: my_rank_(my_rank),
nranks_(nranks),
dataSize_(dataSize),
connections_(conns),
allRegMem_(allRegMem),
semaphores_(semaphores),
proxy_([&](mscclpp::ProxyTrigger triggerRaw) { return handleTrigger(triggerRaw); }, [&]() { bindThread(); }) {
int cudaDevice;
cudaGetDevice(&cudaDevice);
deviceNumaNode_ = mscclpp::getDeviceNumaNode(cudaDevice);
}

void bindThread() {
if (deviceNumaNode_ >= 0) {
mscclpp::numaBind(deviceNumaNode_);
}
}

mscclpp::ProxyHandlerResult handleTrigger(mscclpp::ProxyTrigger) {
int dataSizePerRank = dataSize_ / nranks_;
for (int r = 1; r < nranks_; ++r) {
int nghr = (my_rank_ + r) % nranks_;
connections_[nghr]->write(*allRegMem_[nghr], my_rank_ * (uint64_t)dataSizePerRank, *allRegMem_[my_rank_],
my_rank_ * (uint64_t)dataSizePerRank, dataSizePerRank);
semaphores_[nghr]->signal();
connections_[nghr]->flush();
}
return mscclpp::ProxyHandlerResult::FlushFifoTailAndContinue;
}

void start() { proxy_.start(); }

void stop() { proxy_.stop(); }

mscclpp::FifoDeviceHandle fifoDeviceHandle() { return proxy_.fifo().deviceHandle(); }
};

void init_mscclpp_proxy_test_module(nb::module_ &m) {
nb::class_<MyProxyService>(m, "MyProxyService")
.def(nb::init<int, int, int, std::vector<std::shared_ptr<mscclpp::Connection>>,
std::vector<std::shared_ptr<mscclpp::RegisteredMemory>>,
std::vector<std::shared_ptr<mscclpp::Host2DeviceSemaphore>>>(),
nb::arg("rank"), nb::arg("nranks"), nb::arg("data_size"), nb::arg("conn_vec"), nb::arg("reg_mem_vec"),
nb::arg("h2d_sem_vec"))
.def("fifo_device_handle", &MyProxyService::fifoDeviceHandle)
.def("start", &MyProxyService::start)
.def("stop", &MyProxyService::stop);
}

NB_MODULE(_ext, m) { init_mscclpp_proxy_test_module(m); }
15 changes: 15 additions & 0 deletions python/test/d2d_semaphore_test.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

#include <mscclpp/semaphore_device.hpp>

// be careful about using semaphore[my_rank] as it is an invalid semaphore and it is there just for simplicity of
// indexing
extern "C" __global__ void __launch_bounds__(1024, 1)
d2d_semaphore(mscclpp::SmDevice2DeviceSemaphoreDeviceHandle* semaphores, int my_rank, int nranks) {
int tid = threadIdx.x;
if (tid < nranks && tid != my_rank) {
semaphores[tid].signal();
semaphores[tid].wait();
}
}
12 changes: 12 additions & 0 deletions python/test/fifo_test.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

#include <stdio.h>

#include "mscclpp/fifo_device.hpp"

extern "C" __global__ void __launch_bounds__(1024, 1) fifo(mscclpp::FifoDeviceHandle fifo) {
mscclpp::ProxyTrigger trigger;
trigger.fst = 123;
fifo.push(trigger);
}
12 changes: 12 additions & 0 deletions python/test/h2d_semaphore_test.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

#include <mscclpp/semaphore_device.hpp>

// be careful about using semaphore[my_rank] as it is an invalid semaphore and it is there just for simplicity of
// indexing
extern "C" __global__ void __launch_bounds__(1024, 1)
h2d_semaphore(mscclpp::Host2DeviceSemaphoreDeviceHandle* semaphores, int my_rank, int nranks) {
int tid = threadIdx.x;
if (tid < nranks && tid != my_rank) semaphores[tid].wait();
}
Loading

0 comments on commit 858e381

Please sign in to comment.