Skip to content

Commit

Permalink
(5/N): Add soft-drain file flag path and increment value to OpenrConf…
Browse files Browse the repository at this point in the history
…ig.thrift

Summary:
As we have discovered, Open/R should support drainer set flag for SOFT_DRAIN operation. This is the change to let Open/R's LinkMonitor apply soft-drain node metric override.

Highlights:
- Introduce `softdrained_flag_path` and the default `softdrained_node_increment` value inside OpenrConfig.thrift;
- Add the logic for `LinkMonitor` to understand and interpret the SOFT_DRAIN file flag potentially set by drainer;
- Add the logic to determine `isSoftdrained` in the config. This is the drainer set part;

Reviewed By: TangoRoxy

Differential Revision: D39003885

fbshipit-source-id: 5ebe83f4be2d98bd1c793f6e85a4f5ac914debc0
  • Loading branch information
xiangxu1121 authored and facebook-github-bot committed Aug 30, 2022
1 parent 1ecd419 commit 5c97327
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 35 deletions.
21 changes: 18 additions & 3 deletions openr/config/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -520,9 +520,14 @@ class Config {
return *config_.vip_service_config();
}

//
// Drain state
//
/*
* [Drain Status]
*
* Based on configured file flag path, Open/R will determine the node
* is in either:
* - UNDRAINED;
* - DRAINED with either SOFTDRAINED(metric bump) or HARDDRAINED(overloaded);
*/
bool
isAssumeDrained() const {
auto undrainedFlagPath = config_.undrained_flag_path();
Expand All @@ -533,6 +538,16 @@ class Config {
return *config_.assume_drained();
}

bool
isSoftdrainEnabled() const {
return *config_.enable_soft_drain();
}

int64_t
getNodeMetricIncrement() const {
return *config_.softdrained_node_increment();
}

//
// Memory profiling
//
Expand Down
11 changes: 11 additions & 0 deletions openr/config/tests/ConfigTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,15 @@ TEST(ConfigTest, PopulateInternalDb) {
}
}

TEST(ConfigTest, SoftdrainConfigTest) {
auto tConfig = getBasicOpenrConfig();
tConfig.enable_soft_drain() = true;

// no soft-drained flag
auto config = Config(tConfig);
EXPECT_TRUE(config.isSoftdrainEnabled());
}

TEST(ConfigTest, GeneralGetter) {
// config without bgp peering
{
Expand Down Expand Up @@ -762,6 +771,8 @@ TEST(ConfigTest, GeneralGetter) {
EXPECT_FALSE(config.isV4OverV6NexthopEnabled());
// enable_vip_service
EXPECT_FALSE(config.isVipServiceEnabled());
// enable_soft_drain
EXPECT_FALSE(config.isSoftdrainEnabled());

// getSparkConfig
EXPECT_EQ(*tConfig.spark_config(), config.getSparkConfig());
Expand Down
21 changes: 21 additions & 0 deletions openr/if/OpenrConfig.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -776,12 +776,16 @@ struct OpenrConfig {
34: optional ThriftClientConfig thrift_client;

/**
* [Hard-Drain]
*
* If set, will assume node is drained if no drain state
* is found in the persistent store.
*/
35: bool assume_drained = true;

/**
* [Hard-Drain]
*
* Set the file path for undrained_flag. If the file undrained_flag
* found, will set assume_drained to false.
*/
Expand All @@ -797,6 +801,23 @@ struct OpenrConfig {
*/
38: optional MemoryProfilingConfig memory_profiling_config;

/**
* [Soft-Drain]
*
* Set the file path for softdrained_flag. If the file flag is set. Open/R
* will be set SOFT_DRAINED.
*/
39: bool enable_soft_drain = false;

/**
* [Soft-Drain]
*
* Set the default value for soft-drained node increment. Can be adjusted
* accordingly in case we choose different types of adj metric and different
* size of the network.
*/
40: i64 softdrained_node_increment = 100;

/**
* This knob is meant for migrating BGP config routes to Open/R-originated.
* Currently there's no way to choose between two otherwise equivalent configs
Expand Down
47 changes: 37 additions & 10 deletions openr/link-monitor/LinkMonitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,20 @@ void
printLinkMonitorState(openr::thrift::LinkMonitorState const& state) {
// Hard-drain state
XLOG(DBG1) << fmt::format(
"[Hard-Drain] Node Overloaded: {}",
"[Drain Status] Node Overloaded: {}",
(*state.isOverloaded() ? "true" : "false"));
if (not state.overloadedLinks()->empty()) {
XLOG(DBG1) << fmt::format(
"[Hard-Drain] Overloaded Links: {}",
"[Drain Status] Overloaded Links: {}",
folly::join(",", *state.overloadedLinks()));
}

// Soft-drain state
XLOG(DBG1) << fmt::format(
"[Soft-Drain] Node Metric Increment: {}",
"[Drain Status] Node Metric Increment: {}",
*state.nodeMetricIncrementVal());
if (not state.linkMetricIncrementMap()->empty()) {
XLOG(DBG1) << fmt::format("[Soft-Drain] Link Metric Increment:");
XLOG(DBG1) << fmt::format("[Drain Status] Link Metric Increment:");
for (auto const& [key, val] : *state.linkMetricIncrementMap()) {
XLOG(DBG1) << fmt::format("\t{}: {}", key, val);
}
Expand Down Expand Up @@ -173,8 +173,17 @@ LinkMonitor::LinkMonitor(
advertiseIfaceAddrTimer_->scheduleTimeout(
Constants::kMaxDurationLinkDiscovery);

// Create config-store client
/*
* [Config-Store]
*
* Load link-monitor state from previous incarnation. This includes:
* - drain/undrain/softdrain state;
* - link/node overload status;
* - allocated node label;
* - etc.;
*/
XLOG(INFO) << "Loading link-monitor state";

auto state =
configStore_->loadThriftObj<thrift::LinkMonitorState>(kConfigKey).get();
// If assumeDrained is set, we will assume drained if no drain state
Expand All @@ -186,16 +195,34 @@ LinkMonitor::LinkMonitor(
printLinkMonitorState(state_);
} else {
XLOG(INFO) << fmt::format(
"Failed to load link-monitor-state from disk. Setting node as {}",
"[Drain Status] Failed to load link-monitor-state from disk. Setting node as {}",
assumeDrained ? "DRAINED" : "UNDRAINED");
state_.isOverloaded() = assumeDrained;
}

// overrideDrainState provided, use assumeDrained
/*
* [Drain/Undrain/Softdrain Status]
*
* - isOverloaded: this is the HARD-DRAIN status. The Open/R instance will
* be hard-drained if this flag is TRUE;
* - nodeMetricIncrementVal: this is the SOFT_DRAIN value. The Open/R
* instance will add metric to the adj database;
*/
if (overrideDrainState) {
XLOG(INFO) << fmt::format(
"Override node as {}", assumeDrained ? "DRAINED" : "UNDRAINED");
state_.isOverloaded() = assumeDrained;
if (config->isSoftdrainEnabled()) {
const auto nodeInc = config->getNodeMetricIncrement();
state_.nodeMetricIncrementVal() = nodeInc;

XLOG(INFO) << fmt::format(
"[Drain Status] Override node soft-drain increment value: {}",
nodeInc);
} else {
state_.isOverloaded() = assumeDrained;

XLOG(INFO) << fmt::format(
"[Drain Status] Override node as {}",
assumeDrained ? "DRAINED" : "UNDRAINED");
}
}

if (enableSegmentRouting_) {
Expand Down
83 changes: 61 additions & 22 deletions openr/link-monitor/tests/LinkMonitorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ class LinkMonitorTestFixture : public testing::Test {
createPrefixManager();

// start a link monitor
createLinkMonitor();
createLinkMonitor(true /* overrideDrainState */);
}

void
Expand Down Expand Up @@ -2543,6 +2543,45 @@ TEST_F(InitializationTestFixture, AdjacencyUpTest) {
}
}

class DrainStatusTestFixture : public LinkMonitorTestFixture {
public:
thrift::OpenrConfig
createConfig() override {
// disable softdrain
auto tConfig = LinkMonitorTestFixture::createConfig();
tConfig.enable_soft_drain() = true;

return tConfig;
}
};

TEST_F(DrainStatusTestFixture, SoftDrainStatusUponStart) {
{
// Create an interface.
nlEventsInjector->sendLinkEvent("iface_2_1", 100, true);
recvAndReplyIfUpdate();
}

{
// Send neighbor up.
auto neighborEvent = nb2_up_event;
neighborUpdatesQueue.push(NeighborEvents({std::move(neighborEvent)}));
}
{
// Create expected adjacency.
// ATTN: expect node metric increment to be applied.
auto adj_2_1_modified = adj_2_1;
adj_2_1_modified.metric() =
*adj_2_1.metric() + config->getNodeMetricIncrement();

auto adjDb = createAdjDb("node-1", {adj_2_1_modified}, kNodeLabel);
expectedAdjDbs.push(std::move(adjDb));
}

// verify adjDb
checkNextAdjPub("adj:node-1");
}

class MultiAreaTestFixture : public LinkMonitorTestFixture {
public:
std::vector<thrift::AreaConfig>
Expand All @@ -2557,8 +2596,8 @@ class MultiAreaTestFixture : public LinkMonitorTestFixture {

/*
* TODO: T101565435 to track this flaky test under OSS env and re-enable it
*
TEST_F(MultiAreaTestFixture, AreaTest) {
*/
TEST_F(MultiAreaTestFixture, DISABLED_AreaTest) {
// Verify that we receive empty adjacency database in all 3 areas
expectedAdjDbs.push(createAdjDb("node-1", {}, kNodeLabel));
expectedAdjDbs.push(createAdjDb("node-1", {}, kNodeLabel, false, planeArea_));
Expand All @@ -2583,8 +2622,9 @@ TEST_F(MultiAreaTestFixture, AreaTest) {
auto adjDb = createAdjDb("node-1", {adj_2_1}, kNodeLabel);
expectedAdjDbs.push(std::move(adjDb));
{
auto neighborEvent = NeighborEvent(NeighborEventType::NEIGHBOR_UP, nb2);
neighborUpdatesQueue.push(NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));
auto neighborEvent = nb2_up_event;
neighborUpdatesQueue.push(
NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));
LOG(INFO) << "Testing neighbor UP event in default area!";

checkNextAdjPub("adj:node-1", kTestingAreaName);
Expand All @@ -2597,10 +2637,10 @@ TEST_F(MultiAreaTestFixture, AreaTest) {
adjDb = createAdjDb("node-1", {adj_3_1}, kNodeLabel, false, planeArea_);
expectedAdjDbs.push(std::move(adjDb));
{
auto cp = nb3;
cp.area() = planeArea_;
auto neighborEvent = NeighborEvent(NeighborEventType::NEIGHBOR_UP, cp);
neighborUpdatesQueue.push(NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));
auto neighborEvent = nb3_up_event;
// cp.area() = planeArea_;
neighborUpdatesQueue.push(
NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));
LOG(INFO) << "Testing neighbor UP event in plane area!";

checkNextAdjPub("adj:node-1", planeArea_);
Expand All @@ -2610,8 +2650,9 @@ TEST_F(MultiAreaTestFixture, AreaTest) {
{
auto adjDb = createAdjDb("node-1", {adj_2_1}, kNodeLabel);
expectedAdjDbs.push(std::move(adjDb));
auto neighborEvent = NeighborEvent(NeighborEventType::NEIGHBOR_UP, nb2);
neighborUpdatesQueue.push(NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));
auto neighborEvent = nb2_up_event;
neighborUpdatesQueue.push(
NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));
LOG(INFO) << "Testing neighbor UP event!";
checkNextAdjPub("adj:node-1", kTestingAreaName);
checkPeerDump(*adj_2_1.otherNodeName(), peerSpec_2_1);
Expand All @@ -2622,29 +2663,27 @@ TEST_F(MultiAreaTestFixture, AreaTest) {
createAdjDb("node-1", {adj_3_1}, kNodeLabel, false, AreaId{planeArea_});
expectedAdjDbs.push(std::move(adjDb));

auto cp = nb3;
cp.area() = planeArea_;
auto neighborEvent = NeighborEvent(NeighborEventType::NEIGHBOR_UP, cp);
neighborUpdatesQueue.push(NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));
auto neighborEvent = nb3_up_event;
// cp.area() = planeArea_;
neighborUpdatesQueue.push(
NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));
LOG(INFO) << "Testing neighbor UP event!";
checkNextAdjPub("adj:node-1", planeArea_);
checkPeerDump(
*adj_3_1.otherNodeName(), peerSpec_3_1, AreaId{planeArea_});
checkPeerDump(*adj_3_1.otherNodeName(), peerSpec_3_1, AreaId{planeArea_});
}
// verify neighbor down in "plane" area
{
auto adjDb = createAdjDb("node-1", {}, kNodeLabel, false, planeArea_);
expectedAdjDbs.push(std::move(adjDb));

auto cp = nb3;
cp.area() = planeArea_;
auto neighborEvent = NeighborEvent(NeighborEventType::NEIGHBOR_DOWN, cp);
neighborUpdatesQueue.push(NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));
auto neighborEvent = nb3_down_event;
// cp.area() = planeArea_;
neighborUpdatesQueue.push(
NeighborInitEvent(NeighborEvents({std::move(neighborEvent)})));
LOG(INFO) << "Testing neighbor down event!";
checkNextAdjPub("adj:node-1", planeArea_);
}
}
*/

/**
* Verify that LinkMonitor advertises adjacencies to KvStore after
Expand Down

0 comments on commit 5c97327

Please sign in to comment.