diff --git a/src/clusterfuzz/_internal/build_management/build_manager.py b/src/clusterfuzz/_internal/build_management/build_manager.py index 8cd992b3a0..aab86772bd 100644 --- a/src/clusterfuzz/_internal/build_management/build_manager.py +++ b/src/clusterfuzz/_internal/build_management/build_manager.py @@ -15,6 +15,7 @@ from collections import namedtuple import contextlib +import datetime import os import re import shutil @@ -1199,8 +1200,65 @@ def _get_latest_revision(bucket_paths): return None -def setup_trunk_build(bucket_paths, fuzz_target, build_prefix=None): +def _emit_build_age_metric(gcs_path): + """Emits a metric to track the age of a build.""" + try: + last_update_time = storage.get(gcs_path).get('updated') + # TODO(vitorguidi): standardize return type between fs and gcs. + if isinstance(last_update_time, str): + # storage.get returns two different types for the updated field: + # the gcs api returns string, and the local filesystem implementation + # returns a datetime.datetime object normalized for UTC. + last_update_time = datetime.datetime.fromisoformat(last_update_time) + now = datetime.datetime.now(datetime.timezone.utc) + elapsed_time = now - last_update_time + elapsed_time_in_hours = elapsed_time.total_seconds() / 3600 + # Fuzz targets do not apply for custom builds + labels = { + 'job': os.getenv('JOB_NAME'), + 'platform': environment.platform(), + 'task': os.getenv('TASK_NAME'), + } + monitoring_metrics.JOB_BUILD_AGE.add(elapsed_time_in_hours, labels) + # This field is expected as a datetime object + # https://cloud.google.com/storage/docs/json_api/v1/objects#resource + except Exception as e: + logs.error(f'Failed to emit build age metric for {gcs_path}: {e}') + + +def _get_build_url(bucket_path: Optional[str], revision: int, + job_type: Optional[str]): + """Returns the GCS url for a build, given a bucket path and revision""" + build_urls = get_build_urls_list(bucket_path) + if not build_urls: + logs.error('Error getting build urls for job %s.' % job_type) + return None + build_url = revisions.find_build_url(bucket_path, build_urls, revision) + if not build_url: + logs.error( + 'Error getting build url for job %s (r%d).' % (job_type, revision)) + return None + return build_url + + +def _get_build_bucket_paths(): + """Returns gcs bucket endpoints that contain the build of interest.""" + bucket_paths = [] + for env_var in DEFAULT_BUILD_BUCKET_PATH_ENV_VARS: + bucket_path = get_bucket_path(env_var) + if bucket_path: + bucket_paths.append(bucket_path) + else: + logs.info('Bucket path not found for %s' % env_var) + return bucket_paths + + +def setup_trunk_build(fuzz_target, build_prefix=None): """Sets up latest trunk build.""" + bucket_paths = _get_build_bucket_paths() + if not bucket_paths: + logs.error('Attempted a trunk build, but no bucket paths were found.') + return None latest_revision = _get_latest_revision(bucket_paths) if latest_revision is None: logs.error('Unable to find a matching revision.') @@ -1221,24 +1279,24 @@ def setup_trunk_build(bucket_paths, fuzz_target, build_prefix=None): def setup_regular_build(revision, bucket_path=None, build_prefix='', - fuzz_target=None) -> RegularBuild: + fuzz_target=None) -> Optional[RegularBuild]: """Sets up build with a particular revision.""" if not bucket_path: # Bucket path can be customized, otherwise get it from the default env var. bucket_path = get_bucket_path('RELEASE_BUILD_BUCKET_PATH') - build_urls = get_build_urls_list(bucket_path) job_type = environment.get_value('JOB_NAME') - if not build_urls: - logs.error('Error getting build urls for job %s.' % job_type) - return None - build_url = revisions.find_build_url(bucket_path, build_urls, revision) - if not build_url: - logs.error( - 'Error getting build url for job %s (r%d).' % (job_type, revision)) + build_url = _get_build_url(bucket_path, revision, job_type) + if not build_url: return None + all_bucket_paths = _get_build_bucket_paths() + latest_revision = _get_latest_revision(all_bucket_paths) + + if revision == latest_revision: + _emit_build_age_metric(build_url) + # build_url points to a GCP bucket, and we're only converting it to its HTTP # endpoint so that we can use remote unzipping. http_build_url = build_url.replace('gs://', 'https://storage.googleapis.com/') @@ -1377,19 +1435,7 @@ def _setup_build(revision, fuzz_target): return setup_regular_build(revision, fuzz_target=fuzz_target) # If no revision is provided, we default to a trunk build. - bucket_paths = [] - for env_var in DEFAULT_BUILD_BUCKET_PATH_ENV_VARS: - bucket_path = get_bucket_path(env_var) - if bucket_path: - bucket_paths.append(bucket_path) - else: - logs.info('Bucket path not found for %s' % env_var) - - if len(bucket_paths) == 0: - logs.error('Attempted a trunk build, but no bucket paths were found.') - return None - - return setup_trunk_build(bucket_paths, fuzz_target=fuzz_target) + return setup_trunk_build(fuzz_target=fuzz_target) def is_custom_binary(): diff --git a/src/clusterfuzz/_internal/metrics/monitoring_metrics.py b/src/clusterfuzz/_internal/metrics/monitoring_metrics.py index c53c2f1f05..2b9f9b8d29 100644 --- a/src/clusterfuzz/_internal/metrics/monitoring_metrics.py +++ b/src/clusterfuzz/_internal/metrics/monitoring_metrics.py @@ -32,6 +32,18 @@ monitor.BooleanField('bad_build'), ]) +JOB_BUILD_AGE = monitor.CumulativeDistributionMetric( + 'job/build_age', + bucketer=monitor.FixedWidthBucketer(width=0.05, num_finite_buckets=20), + description=('Distribution of latest build\'s age in hours. ' + '(grouped by fuzzer/job)'), + field_spec=[ + monitor.StringField('job'), + monitor.StringField('platform'), + monitor.StringField('task'), + ], +) + JOB_BUILD_RETRIEVAL_TIME = monitor.CumulativeDistributionMetric( 'task/build_retrieval_time', bucketer=monitor.FixedWidthBucketer(width=0.05, num_finite_buckets=20),