Skip to content

Commit

Permalink
fly: utilize health-check based routing
Browse files Browse the repository at this point in the history
Fly now routes globally to any machine depending on health-checks. Aggressively
timing out on health-checks then is beneficial to shed load and steer traffic
away from an unresponsive server.

And since loadavg is used in-process to tune load shedding, we can experiment
with higher per-service connection values (bumping them up by +200 to 775).
  • Loading branch information
ignoramous committed Jun 14, 2023
1 parent fe76cb5 commit f9d4d71
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 22 deletions.
32 changes: 21 additions & 11 deletions fly.tls.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ auto_rollback = true
auto_start_machines = true

[services.concurrency]
hard_limit = 575
soft_limit = 500
hard_limit = 775
soft_limit = 700
type = "connections"

[[services.ports]]
Expand All @@ -40,10 +40,15 @@ auto_rollback = true
port = 8055

[[services.tcp_checks]]
interval = "30s"
timeout = "3s"
# super aggressive interval and timeout because
# health-check routing is handled by fly-proxy
# and it is cross-region
# community.fly.io/t/12997
interval = "5s"
timeout = "1s"
grace_period = "15s"
restart_limit = 3
# restart_limit is unused in appsv2
restart_limit = 0

# DNS over TCP/TLS
[[services]]
Expand All @@ -53,8 +58,8 @@ auto_rollback = true
auto_start_machines = true

[services.concurrency]
hard_limit = 575
soft_limit = 500
hard_limit = 775
soft_limit = 700
type = "connections"

[[services.ports]]
Expand All @@ -68,10 +73,15 @@ auto_rollback = true
port = 10555

[[services.tcp_checks]]
interval = "30s"
timeout = "3s"
# super aggressive interval and timeout because
# health-check routing is handled by fly-proxy
# and it is cross-region
# community.fly.io/t/12997
interval = "5s"
timeout = "1s"
grace_period = "15s"
restart_limit = 3
# restart_limit is unused in appsv2
restart_limit = 0

# community.fly.io/t/5490/3
[checks]
Expand All @@ -82,6 +92,6 @@ auto_rollback = true
method = "get"
path = "/check"
port = 8888
timeout = "3s"
timeout = "2s"
type = "http"

32 changes: 21 additions & 11 deletions fly.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,19 @@ kill_timeout = "15s"

[services.concurrency]
type = "connections"
hard_limit = 575
soft_limit = 500
hard_limit = 775
soft_limit = 700

[[services.tcp_checks]]
interval = "30s"
timeout = "3s"
# super aggressive interval and timeout because
# health-check routing is handled by fly-proxy
# and it is cross-region
# community.fly.io/t/12997
interval = "5s"
timeout = "1s"
grace_period = "15s"
restart_limit = 3
# restart_limit not used on appsv2
restart_limit = 0

# DNS over TLS
[[services]]
Expand All @@ -53,14 +58,19 @@ kill_timeout = "15s"

[services.concurrency]
type = "connections"
hard_limit = 575
soft_limit = 500
hard_limit = 775
soft_limit = 700

[[services.tcp_checks]]
interval = "30s"
timeout = "3s"
# super aggressive interval and timeout because
# health-check routing is handled by fly-proxy
# and it is cross-region
# community.fly.io/t/12997
interval = "5s"
timeout = "1s"
grace_period = "15s"
restart_limit = 3
# restart_limit is unused in appsv2
restart_limit = 0

# community.fly.io/t/5490/3
[checks]
Expand All @@ -71,5 +81,5 @@ kill_timeout = "15s"
method = "get"
path = "/check"
port = 8888
timeout = "3s"
timeout = "2s"
type = "http"

0 comments on commit f9d4d71

Please sign in to comment.