Skip to content

Commit

Permalink
add ssh connection check (solana-labs#31472)
Browse files Browse the repository at this point in the history
  • Loading branch information
joeaba authored May 4, 2023
1 parent f833dac commit ed4cc52
Showing 1 changed file with 22 additions and 3 deletions.
25 changes: 22 additions & 3 deletions metrics/influx-enterprise/status.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,32 @@ check_service() {

# Loop through the servers
for server in "${servers[@]}"; do
# Check if the service is running
if ssh -o StrictHostKeyChecking=no sol@"$server" sudo systemctl is-active "$service" >/dev/null; then
ssh_success=false
ssh_attempts=0
while ! $ssh_success && [ $ssh_attempts -lt 3 ]; do
# Check if the service is running
if ssh -o StrictHostKeyChecking=no sol@"$server" sudo systemctl is-active "$service" >/dev/null 2>&1; then
ssh_success=true
else
ssh_attempts=$((ssh_attempts + 1))
sleep 5
fi
done

if $ssh_success; then
# Service is running
message="The $service service is running on $server."
echo "$message"
else
# Service is not running, try to restart it
# SSH connection failed after retries
message="ERROR: Unable to establish SSH connection to $server after 3 retries."
echo "$message"
curl -H "Content-Type: application/json" -d '{"content":"'"$message"', manual intervention is required."}' "$DISCORD_WEBHOOK"
continue
fi

# Service is not running, try to restart it
if ! $ssh_success; then
message="The $service service is not running on $server. Restarting..."
echo "$message"
curl -H "Content-Type: application/json" -d '{"content":"'"$message"'"}' "$DISCORD_WEBHOOK"
Expand Down

0 comments on commit ed4cc52

Please sign in to comment.