Skip to content

Commit

Permalink
Break out the main interval trimming calculation into a new function
Browse files Browse the repository at this point in the history
  • Loading branch information
jbradberry committed Apr 26, 2021
1 parent f4d848e commit 4857c5e
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 50 deletions.
13 changes: 6 additions & 7 deletions awx/main/analytics/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def something(since):
"""


def trivial_slicing(key, since, until):
def trivial_slicing(key, since, until, last_gather):
if since is not None:
return [(since, until)]

Expand All @@ -45,11 +45,11 @@ def trivial_slicing(key, since, until):
horizon = until - timedelta(weeks=4)
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
last_entry = max(last_entries.get(key) or last_gather, horizon)
return [(last_entry, until)]


def four_hour_slicing(key, since, until):
def four_hour_slicing(key, since, until, last_gather):
if since is not None:
last_entry = since
else:
Expand All @@ -58,7 +58,7 @@ def four_hour_slicing(key, since, until):
horizon = until - timedelta(weeks=4)
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
last_entry = max(last_entries.get(key) or settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon, horizon)
last_entry = max(last_entries.get(key) or last_gather, horizon)

start, end = last_entry, None
while start < until:
Expand All @@ -67,15 +67,14 @@ def four_hour_slicing(key, since, until):
start = end


def events_slicing(key, since, until):
def events_slicing(key, since, until, last_gather):
from awx.conf.models import Setting

last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER
last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
horizon = until - timedelta(weeks=4)

lower = since or last_gather or horizon
lower = since or last_gather
if not since and last_entries.get(key):
lower = horizon
pk_values = models.JobEvent.objects.filter(created__gte=lower, created__lte=until).aggregate(Min('pk'), Max('pk'))
Expand Down
95 changes: 52 additions & 43 deletions awx/main/analytics/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,51 @@ def package(target, data, timestamp):
return None


def calculate_collection_interval(since, until):
_now = now()

# Make sure that the endpoints are not in the future.
if until is not None and until > _now:
until = _now
logger.warning(f"End of the collection interval is in the future, setting to {_now}.")
if since is not None and since > _now:
since = _now
logger.warning(f"Start of the collection interval is in the future, setting to {_now}.")

# The value of `until` needs to be concrete, so resolve it. If it wasn't passed in,
# set it to `now`, but only if that isn't more than 4 weeks ahead of a passed-in
# `since` parameter.
if since is not None:
if until is not None:
if until > since + timedelta(weeks=4):
until = since + timedelta(weeks=4)
logger.warning(f"End of the collection interval is greater than 4 weeks from start, setting end to {until}.")
else: # until is None
until = min(since + timedelta(weeks=4), _now)
elif until is None:
until = _now

if since and since >= until:
logger.warning("Start of the collection interval is later than the end, ignoring request.")
raise ValueError

# The ultimate beginning of the interval needs to be compared to 4 weeks prior to
# `until`, but we want to keep `since` empty if it wasn't passed in because we use that
# case to know whether to use the bookkeeping settings variables to decide the start of
# the interval.
horizon = until - timedelta(weeks=4)
if since is not None and since < horizon:
since = horizon
logger.warning(f"Start of the collection interval is more than 4 weeks prior to {until}, setting to {horizon}.")

last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon
if last_gather < horizon:
last_gather = horizon
logger.warning(f"Last analytics run was more than 4 weeks prior to {until}, using {horizon} instead.")

return since, until, last_gather


def gather(dest=None, module=None, subset=None, since=None, until=None, collection_type='scheduled'):
"""
Gather all defined metrics and write them as JSON files in a .tgz
Expand Down Expand Up @@ -148,48 +193,12 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
from awx.main.analytics import collectors
from awx.main.signals import disable_activity_stream

_now = now()

# Make sure that the endpoints are not in the future.
if until is not None and until > _now:
until = _now
logger.warning(f"End of the collection interval is in the future, setting to {_now}.")
if since is not None and since > _now:
since = _now
logger.warning(f"Start of the collection interval is in the future, setting to {_now}.")

# The value of `until` needs to be concrete, so resolve it. If it wasn't passed in,
# set it to `now`, but only if that isn't more than 4 weeks ahead of a passed-in
# `since` parameter.
if since is not None:
if until is not None:
if until > since + timedelta(weeks=4):
until = since + timedelta(weeks=4)
logger.warning(f"End of the collection interval is greater than 4 weeks from start, setting end to {until}.")
else: # until is None
until = min(since + timedelta(weeks=4), _now)
elif until is None:
until = _now

if since and since >= until:
logger.warning("Start of the collection interval is later than the end, ignoring request.")
return None

# The ultimate beginning of the interval needs to be compared to 4 weeks prior to
# `until`, but we want to keep `since` empty if it wasn't passed in because we use that
# case to know whether to use the bookkeeping settings variables to decide the start of
# the interval.
horizon = until - timedelta(weeks=4)
if since is not None and since < horizon:
since = horizon
logger.warning(f"Start of the collection interval is more than 4 weeks prior to {until}, setting to {horizon}.")

logger.debug("Last analytics run was: {}".format(settings.AUTOMATION_ANALYTICS_LAST_GATHER))

last_gather = settings.AUTOMATION_ANALYTICS_LAST_GATHER or horizon
if last_gather < horizon:
last_gather = horizon
logger.warning(f"Last analytics run was more than 4 weeks prior to {until}, using {horizon} instead.")
try:
since, until, last_gather = calculate_collection_interval(since, until)
except ValueError:
return None

last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook)
Expand Down Expand Up @@ -219,7 +228,7 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
key = func.__awx_analytics_key__
filename = f'{key}.json'
try:
last_entry = max(last_entries.get(key) or last_gather, horizon)
last_entry = max(last_entries.get(key) or last_gather, until - timedelta(weeks=4))
results = (func(since or last_entry, collection_type=collection_type, until=until), func.__awx_analytics_version__)
json.dumps(results) # throwaway check to see if the data is json-serializable
data[filename] = results
Expand Down Expand Up @@ -251,9 +260,9 @@ def gather(dest=None, module=None, subset=None, since=None, until=None, collecti
# allowed to be None, and will fall back to LAST_ENTRIES[key] or to
# LAST_GATHER (truncated appropriately to match the 4-week limit).
if func.__awx_expensive__:
slices = func.__awx_expensive__(key, since, until)
slices = func.__awx_expensive__(key, since, until, last_gather)
else:
slices = collectors.trivial_slicing(key, since, until)
slices = collectors.trivial_slicing(key, since, until, last_gather)

for start, end in slices:
files = func(start, full_path=gather_dir, until=end)
Expand Down

0 comments on commit 4857c5e

Please sign in to comment.