Skip to content

Commit

Permalink
ratelimits: Add a feature-flag which makes key-value implementation a…
Browse files Browse the repository at this point in the history
…uthoritative (letsencrypt#7666)

- Add feature flag `UseKvLimitsForNewOrder`
- Add feature flag `UseKvLimitsForNewAccount`
- Flush all Redis shards before running integration or unit tests, this
avoids false positives between local testing runs

Fixes letsencrypt#7664
Blocked by letsencrypt#7676
  • Loading branch information
beautifulentropy authored Aug 22, 2024
1 parent c7a04e8 commit c9be034
Show file tree
Hide file tree
Showing 12 changed files with 203 additions and 101 deletions.
13 changes: 13 additions & 0 deletions features/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,19 @@ type Config struct {
// returned to the Subscriber indicating that the order cannot be processed
// until the paused identifiers are unpaused and the order is resubmitted.
CheckIdentifiersPaused bool

// UseKvLimitsForNewOrder when enabled, causes the key-value rate limiter to
// be the authoritative source of rate limiting information for new-order
// callers and disables the legacy rate limiting checks.
//
// Note: this flag does not disable writes to the certificatesPerName or
// fqdnSets tables at Finalize time.
UseKvLimitsForNewOrder bool

// UseKvLimitsForNewAccount when enabled, causes the key-value rate limiter
// to be the authoritative source of rate limiting information for
// new-account callers and disables the legacy rate limiting checks.
UseKvLimitsForNewAccount bool
}

var fMu = new(sync.RWMutex)
Expand Down
19 changes: 8 additions & 11 deletions ra/ra.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,9 +491,12 @@ func (ra *RegistrationAuthorityImpl) NewRegistration(ctx context.Context, reques
if err != nil {
return nil, berrors.InternalServerError("failed to unmarshal ip address: %s", err.Error())
}
err = ra.checkRegistrationLimits(ctx, ipAddr)
if err != nil {
return nil, err

if !features.Get().UseKvLimitsForNewAccount {
err = ra.checkRegistrationLimits(ctx, ipAddr)
if err != nil {
return nil, err
}
}

// Check that contacts conform to our expectations.
Expand Down Expand Up @@ -2007,12 +2010,6 @@ func (ra *RegistrationAuthorityImpl) PerformValidation(
if prob != nil {
challenge.Status = core.StatusInvalid
challenge.Error = prob

// TODO(#5545): Spending can be async until key-value rate limits
// are authoritative. This saves us from adding latency to each
// request. Goroutines spun out below will respect a context
// deadline set by the ratelimits package and cannot be prematurely
// canceled by the requester.
go ra.countFailedValidation(vaCtx, authz.RegistrationID, authz.Identifier.Value)
} else {
challenge.Status = core.StatusValid
Expand Down Expand Up @@ -2585,7 +2582,7 @@ func (ra *RegistrationAuthorityImpl) NewOrder(ctx context.Context, req *rapb.New
}

// Renewal orders, indicated by ARI, are exempt from NewOrder rate limits.
if !req.IsARIRenewal {
if !req.IsARIRenewal && !features.Get().UseKvLimitsForNewOrder {
// Check if there is rate limit space for issuing a certificate.
err = ra.checkNewOrderLimits(ctx, newOrder.DnsNames, newOrder.RegistrationID, req.IsRenewal)
if err != nil {
Expand Down Expand Up @@ -2664,7 +2661,7 @@ func (ra *RegistrationAuthorityImpl) NewOrder(ctx context.Context, req *rapb.New
}

// Renewal orders, indicated by ARI, are exempt from NewOrder rate limits.
if len(missingAuthzIdents) > 0 && !req.IsARIRenewal {
if len(missingAuthzIdents) > 0 && !req.IsARIRenewal && !features.Get().UseKvLimitsForNewOrder {
pendingAuthzLimits := ra.rlPolicies.PendingAuthorizationsPerAccount()
if pendingAuthzLimits.Enabled() {
// The order isn't fully authorized we need to check that the client
Expand Down
10 changes: 10 additions & 0 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ UNIT_PACKAGES=()
UNIT_FLAGS=()
FILTER=()

#
# Cleanup Functions
#

function flush_redis() {
go run ./test/boulder-tools/flushredis/main.go
}

#
# Print Functions
#
Expand Down Expand Up @@ -225,6 +233,7 @@ fi
STAGE="unit"
if [[ "${RUN[@]}" =~ "$STAGE" ]] ; then
print_heading "Running Unit Tests"
flush_redis
run_unit_tests
fi

Expand All @@ -234,6 +243,7 @@ fi
STAGE="integration"
if [[ "${RUN[@]}" =~ "$STAGE" ]] ; then
print_heading "Running Integration Tests"
flush_redis
python3 test/integration-test.py --chisel --gotest "${FILTER[@]}"
fi

Expand Down
56 changes: 56 additions & 0 deletions test/boulder-tools/flushredis/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package main

import (
"context"
"fmt"
"os"

"github.com/letsencrypt/boulder/cmd"
blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/metrics"
bredis "github.com/letsencrypt/boulder/redis"

"github.com/redis/go-redis/v9"
)

func main() {
rc := bredis.Config{
Username: "unittest-rw",
TLS: cmd.TLSConfig{
CACertFile: "test/certs/ipki/minica.pem",
CertFile: "test/certs/ipki/localhost/cert.pem",
KeyFile: "test/certs/ipki/localhost/key.pem",
},
Lookups: []cmd.ServiceDomain{
{
Service: "redisratelimits",
Domain: "service.consul",
},
},
LookupDNSAuthority: "consul.service.consul",
}
rc.PasswordConfig = cmd.PasswordConfig{
PasswordFile: "test/secrets/ratelimits_redis_password",
}

stats := metrics.NoopRegisterer
log := blog.NewMock()
ring, err := bredis.NewRingFromConfig(rc, stats, log)
if err != nil {
fmt.Printf("while constructing ring client: %v\n", err)
os.Exit(1)
}

err = ring.ForEachShard(context.Background(), func(ctx context.Context, shard *redis.Client) error {
cmd := shard.FlushAll(ctx)
_, err := cmd.Result()
if err != nil {
return err
}
return nil
})
if err != nil {
fmt.Printf("while flushing redis shards: %v\n", err)
os.Exit(1)
}
}
4 changes: 3 additions & 1 deletion test/config-next/ra.json
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@
},
"features": {
"AsyncFinalize": true,
"CheckRenewalExemptionAtWFE": true
"CheckRenewalExemptionAtWFE": true,
"UseKvLimitsForNewOrder": true,
"UseKvLimitsForNewAccount": true
},
"ctLogs": {
"stagger": "500ms",
Expand Down
4 changes: 4 additions & 0 deletions test/config-next/wfe2-ratelimit-overrides.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
ids:
- id: 127.0.0.1
comment: localhost
- id: 10.77.77.77
comment: test
- id: 10.88.88.88
comment: test
- CertificatesPerDomain:
burst: 1
count: 1
Expand Down
4 changes: 3 additions & 1 deletion test/config-next/wfe2.json
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@
"ServeRenewalInfo": true,
"TrackReplacementCertificatesARI": true,
"CheckRenewalExemptionAtWFE": true,
"CheckIdentifiersPaused": true
"CheckIdentifiersPaused": true,
"UseKvLimitsForNewOrder": true,
"UseKvLimitsForNewAccount": true
},
"certProfiles": {
"legacy": "The normal profile you know and love",
Expand Down
16 changes: 14 additions & 2 deletions test/integration/otel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ func httpSpan(endpoint string, children ...expectedSpans) expectedSpans {
}
}

func redisPipelineSpan(op, service string, children ...expectedSpans) expectedSpans {
return expectedSpans{
Operation: "redis.pipeline " + op,
Service: service,
Children: children,
}
}

// TestTraces tests that all the expected spans are present and properly connected
func TestTraces(t *testing.T) {
t.Parallel()
Expand All @@ -210,17 +218,19 @@ func TestTraces(t *testing.T) {
{Operation: "/acme/new-nonce", Service: wfe, Children: []expectedSpans{
rpcSpan("nonce.NonceService/Nonce", wfe, "nonce-service")}},
httpSpan("/acme/new-acct",
redisPipelineSpan("get", wfe),
redisPipelineSpan("set", wfe),
rpcSpan("sa.StorageAuthorityReadOnly/KeyBlocked", wfe, sa),
rpcSpan("sa.StorageAuthorityReadOnly/GetRegistrationByKey", wfe, sa),
rpcSpan("ra.RegistrationAuthority/NewRegistration", wfe, ra,
rpcSpan("sa.StorageAuthority/KeyBlocked", ra, sa),
rpcSpan("sa.StorageAuthority/CountRegistrationsByIP", ra, sa),
rpcSpan("sa.StorageAuthority/NewRegistration", ra, sa))),
httpSpan("/acme/new-order",
rpcSpan("sa.StorageAuthorityReadOnly/GetRegistration", wfe, sa),
redisPipelineSpan("get", wfe),
redisPipelineSpan("set", wfe),
rpcSpan("ra.RegistrationAuthority/NewOrder", wfe, ra,
rpcSpan("sa.StorageAuthority/GetOrderForNames", ra, sa),
// 8 ra -> sa rate limit spans omitted here
rpcSpan("sa.StorageAuthority/NewOrderAndAuthzs", ra, sa))),
httpSpan("/acme/authz-v3/",
rpcSpan("sa.StorageAuthorityReadOnly/GetAuthorization2", wfe, sa)),
Expand All @@ -236,8 +246,10 @@ func TestTraces(t *testing.T) {
rpcSpan("sa.StorageAuthority/GetValidOrderAuthorizations2", ra, sa),
rpcSpan("sa.StorageAuthority/SetOrderProcessing", ra, sa),
rpcSpan("ca.CertificateAuthority/IssuePrecertificate", ra, ca),
redisPipelineSpan("get", ra),
rpcSpan("Publisher/SubmitToSingleCTWithResult", ra, "boulder-publisher"),
rpcSpan("ca.CertificateAuthority/IssueCertificateForPrecertificate", ra, ca),
redisPipelineSpan("set", ra),
rpcSpan("sa.StorageAuthority/FinalizeOrder", ra, sa))),
httpSpan("/acme/order/", rpcSpan("sa.StorageAuthorityReadOnly/GetOrder", wfe, sa)),
httpSpan("/acme/cert/", rpcSpan("sa.StorageAuthorityReadOnly/GetCertificate", wfe, sa)),
Expand Down
88 changes: 41 additions & 47 deletions test/integration/ratelimit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,21 @@
package integration

import (
"context"
"crypto/rand"
"encoding/hex"
"fmt"
"os"
"strings"
"testing"

"github.com/jmhodges/clock"

"github.com/letsencrypt/boulder/cmd"
berrors "github.com/letsencrypt/boulder/errors"
blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/metrics"
"github.com/letsencrypt/boulder/ratelimits"
bredis "github.com/letsencrypt/boulder/redis"
"github.com/letsencrypt/boulder/test"
)

func TestDuplicateFQDNRateLimit(t *testing.T) {
t.Parallel()
domain := random_domain()

// The global rate limit for a duplicate certificates is 2 per 3 hours.
_, err := authAndIssue(nil, nil, []string{domain}, true)
test.AssertNotError(t, err, "Failed to issue first certificate")

Expand All @@ -33,45 +28,44 @@ func TestDuplicateFQDNRateLimit(t *testing.T) {
test.AssertError(t, err, "Somehow managed to issue third certificate")

if strings.Contains(os.Getenv("BOULDER_CONFIG_DIR"), "test/config-next") {
// Setup rate limiting.
rc := bredis.Config{
Username: "unittest-rw",
TLS: cmd.TLSConfig{
CACertFile: "test/certs/ipki/minica.pem",
CertFile: "test/certs/ipki/localhost/cert.pem",
KeyFile: "test/certs/ipki/localhost/key.pem",
},
Lookups: []cmd.ServiceDomain{
{
Service: "redisratelimits",
Domain: "service.consul",
},
},
LookupDNSAuthority: "consul.service.consul",
}
rc.PasswordConfig = cmd.PasswordConfig{
PasswordFile: "test/secrets/ratelimits_redis_password",
}
// Error should be served from key-value rate limits implementation.
test.AssertContains(t, err.Error(), "too many certificates (2) already issued for this exact set of domains in the last 3h0m0s")
} else {
// Error should be served from legacy rate limits implementation.
test.AssertContains(t, err.Error(), "too many certificates (2) already issued for this exact set of domains in the last 3 hours")
}
}

fc := clock.New()
stats := metrics.NoopRegisterer
log := blog.NewMock()
ring, err := bredis.NewRingFromConfig(rc, stats, log)
test.AssertNotError(t, err, "making redis ring client")
source := ratelimits.NewRedisSource(ring.Ring, fc, stats)
test.AssertNotNil(t, source, "source should not be nil")
limiter, err := ratelimits.NewLimiter(fc, source, stats)
test.AssertNotError(t, err, "making limiter")
txnBuilder, err := ratelimits.NewTransactionBuilder("test/config-next/wfe2-ratelimit-defaults.yml", "")
test.AssertNotError(t, err, "making transaction composer")
func TestCertificatesPerDomain(t *testing.T) {
t.Parallel()

// Check that the CertificatesPerFQDNSet limit is reached.
txns, err := txnBuilder.NewOrderLimitTransactions(1, []string{domain}, false)
test.AssertNotError(t, err, "making transaction")
decision, err := limiter.BatchSpend(context.Background(), txns)
test.AssertNotError(t, err, "checking transaction")
err = decision.Result(fc.Now())
test.AssertErrorIs(t, err, berrors.RateLimit)
test.AssertContains(t, err.Error(), "too many certificates (2) already issued for this exact set of domains in the last 3h0m0s")
randomDomain := random_domain()
randomSubDomain := func() string {
var bytes [3]byte
rand.Read(bytes[:])
return fmt.Sprintf("%s.%s", hex.EncodeToString(bytes[:]), randomDomain)
}

firstSubDomain := randomSubDomain()
_, err := authAndIssue(nil, nil, []string{firstSubDomain}, true)
test.AssertNotError(t, err, "Failed to issue first certificate")

_, err = authAndIssue(nil, nil, []string{randomSubDomain()}, true)
test.AssertNotError(t, err, "Failed to issue second certificate")

_, err = authAndIssue(nil, nil, []string{randomSubDomain()}, true)
test.AssertError(t, err, "Somehow managed to issue third certificate")

if strings.Contains(os.Getenv("BOULDER_CONFIG_DIR"), "test/config-next") {
// Error should be served from key-value rate limits implementation.
test.AssertContains(t, err.Error(), fmt.Sprintf("too many certificates (2) already issued for %q in the last 2160h0m0s", randomDomain))
} else {
// Error should be served from legacy rate limits implementation.
test.AssertContains(t, err.Error(), fmt.Sprintf("too many certificates already issued for %q", randomDomain))
}

// Issue a certificate for the first subdomain, which should succeed because
// it's a renewal.
_, err = authAndIssue(nil, nil, []string{firstSubDomain}, true)
test.AssertNotError(t, err, "Failed to issue renewal certificate")
}
1 change: 0 additions & 1 deletion test/redis-ratelimits.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ rename-command BGREWRITEAOF ""
rename-command BGSAVE ""
rename-command CONFIG ""
rename-command DEBUG ""
rename-command FLUSHALL ""
rename-command FLUSHDB ""
rename-command KEYS ""
rename-command PEXPIRE ""
Expand Down
12 changes: 7 additions & 5 deletions test/v2_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1559,12 +1559,14 @@ def test_renewal_exemption():
chisel2.expect_problem("urn:ietf:params:acme:error:rateLimited",
lambda: chisel2.auth_and_issue(["mail." + base_domain]))

# TODO(#5545)
# - Phase 2: Once the new rate limits are authoritative in config-next, ensure
# that this test only runs in config.
# - Phase 3: Once the new rate limits are authoritative in config, remove this
# test entirely.
# TODO(#5545) Remove this test once key-value rate limits are authoritative in
# production.
def test_certificates_per_name():
if CONFIG_NEXT:
# This test is replaced by TestCertificatesPerDomain in the Go
# integration tests because key-value rate limits does not support
# override limits of 0.
return
chisel2.expect_problem("urn:ietf:params:acme:error:rateLimited",
lambda: chisel2.auth_and_issue([random_domain() + ".lim.it"]))

Expand Down
Loading

0 comments on commit c9be034

Please sign in to comment.