Skip to content

Commit

Permalink
Merge pull request ceph#1269 from ceph/wip-rgw-sync-agent-retry
Browse files Browse the repository at this point in the history
rgw: fix some races with radosgw and radosgw-agent startup

(verified by Orit)
  • Loading branch information
mattbenjamin authored Nov 16, 2016
2 parents ec33331 + 3e68bfd commit 32bc189
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 15 deletions.
2 changes: 0 additions & 2 deletions suites/rgw/singleton/all/radosgw-admin-data-sync.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ tasks:
name: client1-system-user
access key: 1te6NH5mcdcq0Tc5i8i3
secret key: Py4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXw
- sleep:
duration: 15
- radosgw-agent:
client.0:
max-entries: 10
Expand Down
2 changes: 0 additions & 2 deletions suites/rgw/singleton/all/radosgw-admin-multi-region.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ tasks:
name: client1-system-user
access key: 1te6NH5mcdcq0Tc5i8i3
secret key: Py4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXw
- sleep:
duration: 5
- radosgw-agent:
client.0:
src: client.0
Expand Down
2 changes: 0 additions & 2 deletions suites/rgw/verify/tasks/rgw_s3tests_multiregion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ tasks:
name: client1-system-user
access key: 0te6NH5mcdcq0Tc5i8i2
secret key: Oy4IOauQoL18Gp2zM7lC1vLmoawgqcYPbYGcWfXv
- sleep:
duration: 5
- radosgw-agent:
client.0:
src: client.0
Expand Down
21 changes: 14 additions & 7 deletions tasks/rgw.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
import errno
import util.rgw as rgw_utils

from requests.packages.urllib3 import PoolManager
from requests.packages.urllib3.util import Retry

from cStringIO import StringIO

from teuthology.orchestra import run
Expand Down Expand Up @@ -353,6 +356,17 @@ def start_rgw(ctx, config, on_client = None, except_client = None):
wait=False,
)

# XXX: add_daemon() doesn't let us wait until radosgw finishes startup
# use a connection pool with retry/backoff to poll each gateway until it starts listening
http = PoolManager(retries=Retry(connect=8, backoff_factor=1))
for client in clients_to_run:
if client == except_client:
continue
host, port = ctx.rgw.role_endpoints[client]
endpoint = 'http://{host}:{port}/'.format(host=host, port=port)
log.info('Polling {client} until it starts accepting connections on {endpoint}'.format(client=client, endpoint=endpoint))
http.request('GET', endpoint)

try:
yield
finally:
Expand Down Expand Up @@ -1018,12 +1032,6 @@ def pull_configuration(ctx, config, regions, role_endpoints, realm, master_clien

yield

@contextlib.contextmanager
def wait_for_master():
log.debug("wait_for_master")
time.sleep(20)
yield

@contextlib.contextmanager
def task(ctx, config):
"""
Expand Down Expand Up @@ -1282,7 +1290,6 @@ def task(ctx, config):
else:
raise ValueError("frontend must be 'apache' or 'civetweb'")

subtasks.extend([lambda: wait_for_master(),])
subtasks.extend([
lambda: pull_configuration(ctx=ctx,
config=config,
Expand Down
13 changes: 11 additions & 2 deletions tasks/util/rgw.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import json
import requests
from requests.packages.urllib3.util import Retry
from urlparse import urlparse

from teuthology.orchestra.connection import split_user
Expand Down Expand Up @@ -138,13 +139,21 @@ def radosgw_data_log_window(ctx, client):

def radosgw_agent_sync_data(ctx, agent_host, agent_port, full=False):
log.info('sync agent {h}:{p}'.format(h=agent_host, p=agent_port))
# use retry with backoff to tolerate slow startup of radosgw-agent
s = requests.Session()
s.mount('http://{addr}:{port}/'.format(addr = agent_host, port = agent_port),
requests.adapters.HTTPAdapter(max_retries=Retry(total=5, backoff_factor=1)))
method = "full" if full else "incremental"
return requests.post('http://{addr}:{port}/data/{method}'.format(addr = agent_host, port = agent_port, method = method))
return s.post('http://{addr}:{port}/data/{method}'.format(addr = agent_host, port = agent_port, method = method))

def radosgw_agent_sync_metadata(ctx, agent_host, agent_port, full=False):
log.info('sync agent {h}:{p}'.format(h=agent_host, p=agent_port))
# use retry with backoff to tolerate slow startup of radosgw-agent
s = requests.Session()
s.mount('http://{addr}:{port}/'.format(addr = agent_host, port = agent_port),
requests.adapters.HTTPAdapter(max_retries=Retry(total=5, backoff_factor=1)))
method = "full" if full else "incremental"
return requests.post('http://{addr}:{port}/metadata/{method}'.format(addr = agent_host, port = agent_port, method = method))
return s.post('http://{addr}:{port}/metadata/{method}'.format(addr = agent_host, port = agent_port, method = method))

def radosgw_agent_sync_all(ctx, full=False, data=False):
if ctx.radosgw_agent.procs:
Expand Down

0 comments on commit 32bc189

Please sign in to comment.