Skip to content

Commit

Permalink
[Core] Make worker_register_timeout_seconds configurable (ray-project…
Browse files Browse the repository at this point in the history
  • Loading branch information
yncxcw authored Jul 7, 2020
1 parent 8f19f1e commit 4ba4110
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 3 deletions.
2 changes: 2 additions & 0 deletions python/ray/includes/ray_config.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ cdef extern from "ray/common/ray_config.h" nogil:

int64_t kill_worker_timeout_milliseconds() const

int64_t worker_register_timeout_seconds() const

int64_t max_time_for_handler_milliseconds() const

int64_t max_time_for_loop() const
Expand Down
7 changes: 6 additions & 1 deletion python/ray/includes/ray_config.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ cdef class Config:

@staticmethod
def raylet_client_connect_timeout_milliseconds():
return RayConfig.instance().raylet_client_connect_timeout_milliseconds()
return (RayConfig.instance()
.raylet_client_connect_timeout_milliseconds())

@staticmethod
def raylet_fetch_timeout_milliseconds():
Expand All @@ -80,6 +81,10 @@ cdef class Config:
def kill_worker_timeout_milliseconds():
return RayConfig.instance().kill_worker_timeout_milliseconds()

@staticmethod
def worker_register_timeout_seconds():
return RayConfig.instance().worker_register_timeout_seconds()

@staticmethod
def max_time_for_handler_milliseconds():
return RayConfig.instance().max_time_for_handler_milliseconds()
Expand Down
4 changes: 4 additions & 0 deletions src/ray/common/ray_config_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ RAY_CONFIG(size_t, raylet_max_active_object_ids, 0)
/// the worker SIGKILL.
RAY_CONFIG(int64_t, kill_worker_timeout_milliseconds, 100)

/// The duration that we wait after the worekr is launched before the
/// starting_worker_timeout_callback() is called.
RAY_CONFIG(int64_t, worker_register_timeout_seconds, 30)

/// This is a timeout used to cause failures in the plasma manager and raylet
/// when certain event loop handlers take too long.
RAY_CONFIG(int64_t, max_time_for_handler_milliseconds, 1000)
Expand Down
4 changes: 2 additions & 2 deletions src/ray/raylet/worker_pool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,9 @@ Process WorkerPool::StartWorkerProcess(const Language &language,

void WorkerPool::MonitorStartingWorkerProcess(const Process &proc,
const Language &language) {
constexpr static size_t worker_register_timeout_seconds = 30;
auto timer = std::make_shared<boost::asio::deadline_timer>(
*io_service_, boost::posix_time::seconds(worker_register_timeout_seconds));
*io_service_, boost::posix_time::seconds(
RayConfig::instance().worker_register_timeout_seconds()));
// Capture timer in lambda to copy it once, so that it can avoid destructing timer.
timer->async_wait(
[timer, language, proc, this](const boost::system::error_code e) -> void {
Expand Down

0 comments on commit 4ba4110

Please sign in to comment.