Skip to content

Commit

Permalink
coolwsd signal tracking.
Browse files Browse the repository at this point in the history
Add high level details of what is going on for coolwsd to
activity tracking, so we have more visibility into aborts /
faults - primarily during shutdown.

Signed-off-by: Michael Meeks <[email protected]>
Change-Id: I3824465107f4b5d12dec258445b6cded9a040699
  • Loading branch information
mmeeks committed Sep 6, 2024
1 parent 0bcaa12 commit e1dfd5b
Showing 1 changed file with 31 additions and 0 deletions.
31 changes: 31 additions & 0 deletions wsd/COOLWSD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ void COOLWSD::alertAllUsersInternal(const std::string& msg)
std::lock_guard<std::mutex> docBrokersLock(DocBrokersMutex);

LOG_INF("Alerting all users: [" << msg << ']');
SigUtil::addActivity("alert all users: " + msg);

if (UnitWSD::get().filterAlertAllusers(msg))
return;
Expand Down Expand Up @@ -534,6 +535,10 @@ static bool cleanupChildren()
}
}

if (static_cast<int>(NewChildren.size()) != count)
SigUtil::addActivity("removed " + std::to_string(count - NewChildren.size()) +
" children");

return static_cast<int>(NewChildren.size()) != count;
}

Expand Down Expand Up @@ -610,6 +615,7 @@ static size_t addNewChild(std::shared_ptr<ChildProcess> child)

LOG_TRC("Adding a new child " << pid << " to NewChildren, have " << OutstandingForks
<< " outstanding requests");
SigUtil::addActivity("added child " + std::to_string(pid));
NewChildren.emplace_back(std::move(child));
const size_t count = NewChildren.size();
lock.unlock();
Expand Down Expand Up @@ -1057,6 +1063,7 @@ void ForKitProcWSHandler::handleMessage(const std::vector<char> &data)
{
Admin::instance().addSegFaultCount(count);
LOG_INF(count << " coolkit processes crashed with segmentation fault.");
SigUtil::addActivity("coolkit(s) crashed");
UnitWSD::get().kitSegfault(count);
}
else
Expand Down Expand Up @@ -3028,7 +3035,10 @@ void COOLWSD::initializeSSL()
if (!ssl::Manager::isServerContextInitialized())
LOG_ERR("Failed to initialize Server SSL.");
else
{
LOG_INF("Initialized Server SSL.");
SigUtil::addActivity("initialized SSL");
}
#else
LOG_INF("SSL is unavailable in this build.");
#endif
Expand Down Expand Up @@ -3504,6 +3514,8 @@ bool COOLWSD::createForKit()
{
LOG_INF("Creating new forkit process.");

SigUtil::addActivity("spawning new forkit");

// Creating a new forkit is always a slow process.
ChildSpawnTimeoutMs = CHILD_SPAWN_TIMEOUT_MS;

Expand Down Expand Up @@ -4326,6 +4338,7 @@ int COOLWSD::innerMain()
Util::getVersionInfo(version, hash);
LOG_INF("Coolwsd version details: " << version << " - " << hash << " - id " << Util::getProcessIdentifier() << " - on " << Util::getLinuxVersion());
#endif
SigUtil::addActivity("coolwsd init");

initializeSSL();

Expand Down Expand Up @@ -4475,6 +4488,8 @@ int COOLWSD::innerMain()
/// The main-poll does next to nothing:
SocketPoll mainWait("main");

SigUtil::addActivity("coolwsd accepting connections");

#if !MOBILEAPP
std::cerr << "Ready to accept connections on port " << ClientPortNumber << ".\n" << std::endl;
if (SignalParent)
Expand Down Expand Up @@ -4530,6 +4545,8 @@ int COOLWSD::innerMain()
#endif
#endif

SigUtil::addActivity("coolwsd running");

while (!SigUtil::getShutdownRequestFlag())
{
// This timeout affects the recovery time of prespawned children.
Expand Down Expand Up @@ -4583,6 +4600,8 @@ int COOLWSD::innerMain()

COOLWSD::alertAllUsersInternal("close: shuttingdown");

SigUtil::addActivity("shutting down");

// Lots of polls will stop; stop watching them first.
SocketPoll::PollWatchdog.reset();

Expand Down Expand Up @@ -4618,6 +4637,8 @@ int COOLWSD::innerMain()
}
#endif

SigUtil::addActivity("wait save & close");

// Wait until documents are saved and sessions closed.
// Don't stop the DocBroker, they will exit.
constexpr size_t sleepMs = 200;
Expand Down Expand Up @@ -4666,6 +4687,8 @@ int COOLWSD::innerMain()
DocBrokers.clear();
}

SigUtil::addActivity("save traces");

if (TraceEventFile != NULL)
{
// If we have written any objects to it, it ends with a comma and newline. Back over those.
Expand Down Expand Up @@ -4693,6 +4716,8 @@ int COOLWSD::innerMain()

Server->stopPrisoners();

SigUtil::addActivity("prisoners stopped");

if (UnitWSD::isUnitTesting())
{
Server->stop();
Expand All @@ -4705,6 +4730,8 @@ int COOLWSD::innerMain()
net::AsyncDNS::stopAsyncDNS();
#endif

SigUtil::addActivity("async DNS stopped");

WebServerPoll.reset();

// Terminate child processes
Expand All @@ -4716,6 +4743,8 @@ int COOLWSD::innerMain()

NewChildren.clear();

SigUtil::addActivity("terminated unused children");

#if !MOBILEAPP
if (!Util::isKitInProcess())
{
Expand All @@ -4734,6 +4763,8 @@ int COOLWSD::innerMain()

LOG_INF("Process [coolwsd] finished with exit status: " << returnValue);

SigUtil::addActivity("finished with status " + std::to_string(returnValue));

// At least on centos7, Poco deadlocks while
// cleaning up its SSL context singleton.
Util::forcedExit(returnValue);
Expand Down

0 comments on commit e1dfd5b

Please sign in to comment.