Skip to content

Commit

Permalink
Cancel retry timer on call cancellation (grpc#25890)
Browse files Browse the repository at this point in the history
  • Loading branch information
markdroth authored Apr 9, 2021
1 parent c4b77ca commit d017ce5
Show file tree
Hide file tree
Showing 10 changed files with 400 additions and 15 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,7 @@ add_library(end2end_nosec_tests
test/core/end2end/tests/request_with_payload.cc
test/core/end2end/tests/resource_quota_server.cc
test/core/end2end/tests/retry.cc
test/core/end2end/tests/retry_cancel_during_delay.cc
test/core/end2end/tests/retry_cancellation.cc
test/core/end2end/tests/retry_disabled.cc
test/core/end2end/tests/retry_exceeds_buffer_size_in_initial_batch.cc
Expand Down Expand Up @@ -1243,6 +1244,7 @@ add_library(end2end_tests
test/core/end2end/tests/request_with_payload.cc
test/core/end2end/tests/resource_quota_server.cc
test/core/end2end/tests/retry.cc
test/core/end2end/tests/retry_cancel_during_delay.cc
test/core/end2end/tests/retry_cancellation.cc
test/core/end2end/tests/retry_disabled.cc
test/core/end2end/tests/retry_exceeds_buffer_size_in_initial_batch.cc
Expand Down
2 changes: 2 additions & 0 deletions build_autogenerated.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ libs:
- test/core/end2end/tests/request_with_payload.cc
- test/core/end2end/tests/resource_quota_server.cc
- test/core/end2end/tests/retry.cc
- test/core/end2end/tests/retry_cancel_during_delay.cc
- test/core/end2end/tests/retry_cancellation.cc
- test/core/end2end/tests/retry_disabled.cc
- test/core/end2end/tests/retry_exceeds_buffer_size_in_initial_batch.cc
Expand Down Expand Up @@ -192,6 +193,7 @@ libs:
- test/core/end2end/tests/request_with_payload.cc
- test/core/end2end/tests/resource_quota_server.cc
- test/core/end2end/tests/retry.cc
- test/core/end2end/tests/retry_cancel_during_delay.cc
- test/core/end2end/tests/retry_cancellation.cc
- test/core/end2end/tests/retry_disabled.cc
- test/core/end2end/tests/retry_exceeds_buffer_size_in_initial_batch.cc
Expand Down
1 change: 1 addition & 0 deletions gRPC-Core.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -2080,6 +2080,7 @@ Pod::Spec.new do |s|
'test/core/end2end/tests/request_with_payload.cc',
'test/core/end2end/tests/resource_quota_server.cc',
'test/core/end2end/tests/retry.cc',
'test/core/end2end/tests/retry_cancel_during_delay.cc',
'test/core/end2end/tests/retry_cancellation.cc',
'test/core/end2end/tests/retry_disabled.cc',
'test/core/end2end/tests/retry_exceeds_buffer_size_in_initial_batch.cc',
Expand Down
2 changes: 2 additions & 0 deletions grpc.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@
'test/core/end2end/tests/request_with_payload.cc',
'test/core/end2end/tests/resource_quota_server.cc',
'test/core/end2end/tests/retry.cc',
'test/core/end2end/tests/retry_cancel_during_delay.cc',
'test/core/end2end/tests/retry_cancellation.cc',
'test/core/end2end/tests/retry_disabled.cc',
'test/core/end2end/tests/retry_exceeds_buffer_size_in_initial_batch.cc',
Expand Down Expand Up @@ -339,6 +340,7 @@
'test/core/end2end/tests/request_with_payload.cc',
'test/core/end2end/tests/resource_quota_server.cc',
'test/core/end2end/tests/retry.cc',
'test/core/end2end/tests/retry_cancel_during_delay.cc',
'test/core/end2end/tests/retry_cancellation.cc',
'test/core/end2end/tests/retry_disabled.cc',
'test/core/end2end/tests/retry_exceeds_buffer_size_in_initial_batch.cc',
Expand Down
98 changes: 84 additions & 14 deletions src/core/ext/filters/client_channel/retry_filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ class RetryFilter::CallData {
static void SetPollent(grpc_call_element* elem, grpc_polling_entity* pollent);

private:
class Canceller;

// Pending batches stored in call data.
struct PendingBatch {
// The pending batch. If nullptr, this slot is empty.
Expand Down Expand Up @@ -409,7 +411,7 @@ class RetryFilter::CallData {
void PendingBatchClear(PendingBatch* pending);
void MaybeClearPendingBatch(PendingBatch* pending);
static void FailPendingBatchInCallCombiner(void* arg, grpc_error* error);
// Fails all pending batches. Does NOT yeild call combiner.
// Fails all pending batches. Does NOT yield call combiner.
void PendingBatchesFail(grpc_error* error);
// Returns a pointer to the first pending batch for which predicate(batch)
// returns true, or null if not found.
Expand All @@ -423,16 +425,18 @@ class RetryFilter::CallData {
// Frees cached send_message at index idx.
void FreeCachedSendMessage(size_t idx);
void FreeCachedSendTrailingMetadata();
void FreeAllCachedSendOpData();

// Commits the call so that no further retry attempts will be performed.
void RetryCommit(CallAttempt* call_attempt);

// Starts a retry after appropriate back-off.
void DoRetry(grpc_millis server_pushback_ms);
static void OnRetryTimer(void* arg, grpc_error* error);

RefCountedPtr<ClientChannel::LoadBalancedCall> CreateLoadBalancedCall();

static void CreateCallAttempt(void* arg, grpc_error* error);
void CreateCallAttempt();

// Adds a closure to closures that will execute batch in the call combiner.
void AddClosureForBatch(grpc_transport_stream_op_batch* batch,
Expand Down Expand Up @@ -480,7 +484,9 @@ class RetryFilter::CallData {
bool retry_committed_ : 1;
bool last_attempt_got_server_pushback_ : 1;
int num_attempts_completed_ = 0;
grpc_timer retry_timer_;
Mutex timer_mu_;
Canceller* canceller_ ABSL_GUARDED_BY(timer_mu_);
grpc_timer retry_timer_ ABSL_GUARDED_BY(timer_mu_);
grpc_closure retry_closure_;

// The number of batches containing send ops that are currently in-flight
Expand Down Expand Up @@ -519,6 +525,45 @@ class RetryFilter::CallData {
grpc_metadata_batch send_trailing_metadata_;
};

//
// RetryFilter::CallData::Canceller
//

class RetryFilter::CallData::Canceller {
public:
explicit Canceller(CallData* calld) : calld_(calld) {
GRPC_CALL_STACK_REF(calld_->owning_call_, "RetryCanceller");
GRPC_CLOSURE_INIT(&closure_, &Cancel, this, nullptr);
calld_->call_combiner_->SetNotifyOnCancel(&closure_);
}

private:
static void Cancel(void* arg, grpc_error* error) {
auto* self = static_cast<Canceller*>(arg);
auto* calld = self->calld_;
{
MutexLock lock(&calld->timer_mu_);
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
gpr_log(GPR_INFO,
"calld=%p: cancelling retry timer: error=%s self=%p "
"calld->canceller_=%p",
calld, grpc_error_string(error), self, calld->canceller_);
}
if (calld->canceller_ == self && error != GRPC_ERROR_NONE) {
calld->canceller_ = nullptr; // Checked by OnRetryTimer().
grpc_timer_cancel(&calld->retry_timer_);
calld->FreeAllCachedSendOpData();
GRPC_CALL_COMBINER_STOP(calld->call_combiner_, "Canceller");
}
}
GRPC_CALL_STACK_UNREF(calld->owning_call_, "RetryCanceller");
delete self;
}

CallData* calld_;
grpc_closure closure_;
};

//
// RetryFilter::CallData::CallAttempt
//
Expand Down Expand Up @@ -1694,8 +1739,6 @@ void RetryFilter::CallData::StartTransportStreamOpBatch(
call_attempt_->lb_call()->StartTransportStreamOpBatch(batch);
return;
}
// TODO(roth): If retry timer is pending, cancel it. The timer callback
// should be a no-op in this case.
// Fail pending batches.
PendingBatchesFail(GRPC_ERROR_REF(cancel_error));
// Note: This will release the call combiner.
Expand Down Expand Up @@ -1723,13 +1766,12 @@ void RetryFilter::CallData::StartTransportStreamOpBatch(
committed_call_->StartTransportStreamOpBatch(batch);
return;
}
// TODO(roth): If retry timer is pending, return without doing anything.
// We do not yet have a call attempt, so create one.
if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) {
gpr_log(GPR_INFO, "chand=%p calld=%p: creating call attempt", chand_,
this);
}
CreateCallAttempt(this, GRPC_ERROR_NONE);
CreateCallAttempt();
return;
}
// Send batches to call attempt.
Expand All @@ -1749,11 +1791,9 @@ RetryFilter::CallData::CreateLoadBalancedCall() {
return chand_->client_channel_->CreateLoadBalancedCall(args, pollent_);
}

void RetryFilter::CallData::CreateCallAttempt(void* arg,
grpc_error* /*error*/) {
auto* calld = static_cast<CallData*>(arg);
calld->call_attempt_.reset(calld->arena_->New<CallAttempt>(calld));
calld->call_attempt_->StartRetriableBatches();
void RetryFilter::CallData::CreateCallAttempt() {
call_attempt_.reset(arena_->New<CallAttempt>(this));
call_attempt_->StartRetriableBatches();
// TODO(roth): When implementing hedging, change this to start a timer
// for the next hedging attempt.
}
Expand Down Expand Up @@ -1852,6 +1892,18 @@ void RetryFilter::CallData::FreeCachedSendTrailingMetadata() {
grpc_metadata_batch_destroy(&send_trailing_metadata_);
}

void RetryFilter::CallData::FreeAllCachedSendOpData() {
if (seen_send_initial_metadata_) {
FreeCachedSendInitialMetadata();
}
for (size_t i = 0; i < send_messages_.size(); ++i) {
FreeCachedSendMessage(i);
}
if (seen_send_trailing_metadata_) {
FreeCachedSendTrailingMetadata();
}
}

//
// pending_batches management
//
Expand Down Expand Up @@ -2037,11 +2089,29 @@ void RetryFilter::CallData::DoRetry(grpc_millis server_pushback_ms) {
this, next_attempt_time - ExecCtx::Get()->Now());
}
// Schedule retry after computed delay.
GRPC_CLOSURE_INIT(&retry_closure_, CreateCallAttempt, this, nullptr);
// TODO(roth): Register a call combiner canceller for the timer.
GRPC_CLOSURE_INIT(&retry_closure_, OnRetryTimer, this, nullptr);
GRPC_CALL_STACK_REF(owning_call_, "OnRetryTimer");
MutexLock lock(&timer_mu_);
canceller_ = new Canceller(this);
grpc_timer_init(&retry_timer_, next_attempt_time, &retry_closure_);
}

void RetryFilter::CallData::OnRetryTimer(void* arg, grpc_error* error) {
auto* calld = static_cast<CallData*>(arg);
if (error == GRPC_ERROR_NONE) {
bool start_attempt = false;
{
MutexLock lock(&calld->timer_mu_);
if (calld->canceller_ != nullptr) {
calld->canceller_ = nullptr;
start_attempt = true;
}
}
if (start_attempt) calld->CreateCallAttempt();
}
GRPC_CALL_STACK_UNREF(calld->owning_call_, "OnRetryTimer");
}

} // namespace

const grpc_channel_filter kRetryFilterVtable = {
Expand Down
8 changes: 8 additions & 0 deletions test/core/end2end/end2end_nosec_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ extern void resource_quota_server(grpc_end2end_test_config config);
extern void resource_quota_server_pre_init(void);
extern void retry(grpc_end2end_test_config config);
extern void retry_pre_init(void);
extern void retry_cancel_during_delay(grpc_end2end_test_config config);
extern void retry_cancel_during_delay_pre_init(void);
extern void retry_cancellation(grpc_end2end_test_config config);
extern void retry_cancellation_pre_init(void);
extern void retry_disabled(grpc_end2end_test_config config);
Expand Down Expand Up @@ -241,6 +243,7 @@ void grpc_end2end_tests_pre_init(void) {
request_with_payload_pre_init();
resource_quota_server_pre_init();
retry_pre_init();
retry_cancel_during_delay_pre_init();
retry_cancellation_pre_init();
retry_disabled_pre_init();
retry_exceeds_buffer_size_in_initial_batch_pre_init();
Expand Down Expand Up @@ -330,6 +333,7 @@ void grpc_end2end_tests(int argc, char **argv,
request_with_payload(config);
resource_quota_server(config);
retry(config);
retry_cancel_during_delay(config);
retry_cancellation(config);
retry_disabled(config);
retry_exceeds_buffer_size_in_initial_batch(config);
Expand Down Expand Up @@ -558,6 +562,10 @@ void grpc_end2end_tests(int argc, char **argv,
retry(config);
continue;
}
if (0 == strcmp("retry_cancel_during_delay", argv[i])) {
retry_cancel_during_delay(config);
continue;
}
if (0 == strcmp("retry_cancellation", argv[i])) {
retry_cancellation(config);
continue;
Expand Down
8 changes: 8 additions & 0 deletions test/core/end2end/end2end_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ extern void resource_quota_server(grpc_end2end_test_config config);
extern void resource_quota_server_pre_init(void);
extern void retry(grpc_end2end_test_config config);
extern void retry_pre_init(void);
extern void retry_cancel_during_delay(grpc_end2end_test_config config);
extern void retry_cancel_during_delay_pre_init(void);
extern void retry_cancellation(grpc_end2end_test_config config);
extern void retry_cancellation_pre_init(void);
extern void retry_disabled(grpc_end2end_test_config config);
Expand Down Expand Up @@ -244,6 +246,7 @@ void grpc_end2end_tests_pre_init(void) {
request_with_payload_pre_init();
resource_quota_server_pre_init();
retry_pre_init();
retry_cancel_during_delay_pre_init();
retry_cancellation_pre_init();
retry_disabled_pre_init();
retry_exceeds_buffer_size_in_initial_batch_pre_init();
Expand Down Expand Up @@ -334,6 +337,7 @@ void grpc_end2end_tests(int argc, char **argv,
request_with_payload(config);
resource_quota_server(config);
retry(config);
retry_cancel_during_delay(config);
retry_cancellation(config);
retry_disabled(config);
retry_exceeds_buffer_size_in_initial_batch(config);
Expand Down Expand Up @@ -566,6 +570,10 @@ void grpc_end2end_tests(int argc, char **argv,
retry(config);
continue;
}
if (0 == strcmp("retry_cancel_during_delay", argv[i])) {
retry_cancel_during_delay(config);
continue;
}
if (0 == strcmp("retry_cancellation", argv[i])) {
retry_cancellation(config);
continue;
Expand Down
4 changes: 4 additions & 0 deletions test/core/end2end/generate_tests.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,10 @@ END2END_TESTS = {
needs_client_channel = True,
proxyable = False,
),
"retry_cancel_during_delay": _test_options(
needs_client_channel = True,
proxyable = False,
),
"retry_disabled": _test_options(needs_client_channel = True, proxyable = False),
"retry_exceeds_buffer_size_in_initial_batch": _test_options(
needs_client_channel = True,
Expand Down
Loading

0 comments on commit d017ce5

Please sign in to comment.