Skip to content

Commit

Permalink
[pick_first] fix test flake (grpc#34098)
Browse files Browse the repository at this point in the history
CNR the flake, but I've changed the test (which is very old) to use some
of our more modern helper functions that have saner timeouts.

Also re-add a `return` statement that was accidentally removed in
grpc#33753, which I noticed while working on this. Its absence doesn't cause
a real problem, but it does cause us to needlessly trigger a duplicate
connection attempt or report a duplicate CONNECTING update in some
cases.
  • Loading branch information
markdroth authored Aug 18, 2023
1 parent d3548a3 commit 72e7914
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
// with the first subchannel.
if (!old_state.has_value()) {
subchannel_list()->subchannel(0)->ReactToConnectivityStateLocked();
return;
}
// Ignore any other updates for subchannels we're not currently trying to
// connect to.
Expand Down
32 changes: 18 additions & 14 deletions test/cpp/end2end/client_lb_end2end_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -830,32 +830,36 @@ TEST_F(PickFirstTest, BackOffInitialReconnect) {
args.SetInt(GRPC_ARG_INITIAL_RECONNECT_BACKOFF_MS,
kInitialBackOffMs * grpc_test_slowdown_factor());
const std::vector<int> ports = {grpc_pick_unused_port_or_die()};
const gpr_timespec t0 = gpr_now(GPR_CLOCK_MONOTONIC);
auto response_generator = BuildResolverResponseGenerator();
auto channel = BuildChannel("pick_first", response_generator, args);
auto stub = BuildStub(channel);
response_generator.SetNextResolution(ports);
// The channel won't become connected (there's no server).
ASSERT_FALSE(channel->WaitForConnected(
grpc_timeout_milliseconds_to_deadline(kInitialBackOffMs * 2)));
// Start trying to connect. The channel will report
// TRANSIENT_FAILURE, because the server is not reachable.
const grpc_core::Timestamp t0 = grpc_core::Timestamp::Now();
ASSERT_TRUE(WaitForChannelState(
channel.get(),
[&](grpc_connectivity_state state) {
if (state == GRPC_CHANNEL_TRANSIENT_FAILURE) return true;
EXPECT_THAT(state, ::testing::AnyOf(GRPC_CHANNEL_IDLE,
GRPC_CHANNEL_CONNECTING));
return false;
},
/*try_to_connect=*/true));
// Bring up a server on the chosen port.
StartServers(1, ports);
// Now it will.
ASSERT_TRUE(channel->WaitForConnected(
grpc_timeout_milliseconds_to_deadline(kInitialBackOffMs * 2)));
const gpr_timespec t1 = gpr_now(GPR_CLOCK_MONOTONIC);
const grpc_core::Duration waited =
grpc_core::Duration::FromTimespec(gpr_time_sub(t1, t0));
// Now the channel will become connected.
ASSERT_TRUE(WaitForChannelReady(channel.get()));
// Check how long it took.
const grpc_core::Duration waited = grpc_core::Timestamp::Now() - t0;
gpr_log(GPR_DEBUG, "Waited %" PRId64 " milliseconds", waited.millis());
// We should have waited at least kInitialBackOffMs. We substract one to
// account for test and precision accuracy drift.
EXPECT_GE(waited.millis(),
(kInitialBackOffMs * grpc_test_slowdown_factor()) - 1);
// But not much more.
EXPECT_GT(
gpr_time_cmp(
grpc_timeout_milliseconds_to_deadline(kInitialBackOffMs * 1.10), t1),
0);
EXPECT_LE(waited.millis(),
(kInitialBackOffMs * grpc_test_slowdown_factor()) * 1.1);
}

TEST_F(PickFirstTest, BackOffMinReconnect) {
Expand Down

0 comments on commit 72e7914

Please sign in to comment.