Skip to content

Commit

Permalink
Remove time-tests outliers using Inter Quartile Range (IQR) (openvino…
Browse files Browse the repository at this point in the history
…toolkit#5014)

Change number of times to execute binary to aggregate statistics from 3 to 10.
  • Loading branch information
just-sparta authored Apr 2, 2021
1 parent c966aea commit 69a06f5
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 24 deletions.
14 changes: 10 additions & 4 deletions tests/time_tests/scripts/run_timetest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,19 @@
# pylint: disable=redefined-outer-name

import statistics
from pathlib import Path
import tempfile
import subprocess
import logging
import argparse
import sys
import os
from pprint import pprint
import yaml

from pathlib import Path
from pprint import pprint

from test_runner.utils import filter_timetest_result


def run_cmd(args: list, log=None, verbose=True):
""" Run command
Expand Down Expand Up @@ -95,8 +98,11 @@ def run_timetest(args: dict, log=None):
stats = dict((step_name, stats.get(step_name, []) + [duration])
for step_name, duration in raw_data.items())

# Remove outliers
filtered_stats = filter_timetest_result(stats)

# Aggregate results
aggregated_stats = aggregate_stats(stats)
aggregated_stats = aggregate_stats(filtered_stats)
log.debug("Aggregated statistics after full run: {}".format(aggregated_stats))

return 0, aggregated_stats
Expand Down Expand Up @@ -129,7 +135,7 @@ def cli_parser():
type=str,
help='target device to infer on')
parser.add_argument('-niter',
default=3,
default=10,
type=check_positive_int,
help='number of times to execute binary to aggregate statistics of')
parser.add_argument('-s',
Expand Down
4 changes: 2 additions & 2 deletions tests/time_tests/test_runner/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
import shutil
import sys
import tempfile
from pathlib import Path

import pytest
import yaml

from pathlib import Path
from jsonschema import validate, ValidationError

from scripts.run_timetest import check_positive_int
Expand Down
3 changes: 2 additions & 1 deletion tests/time_tests/test_runner/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ pytest==4.0.1
attrs==19.1.0 # required for pytest==4.0.1 to resolve compatibility issues
PyYAML==5.4.1
jsonschema==3.2.0
distro==1.5.0
distro==1.5.0
numpy==1.18.5
51 changes: 34 additions & 17 deletions tests/time_tests/test_runner/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,23 @@
import os
import platform
import sys
from enum import Enum
from pathlib import Path

import distro
import yaml
import numpy as np

from enum import Enum
from pathlib import Path
from pymongo import MongoClient

# constants
DATABASE = 'timetests' # database name for timetests results
DB_COLLECTIONS = ["commit", "nightly", "weekly"]
PRODUCT_NAME = 'dldt' # product name from build manifest

# Define a range to cut outliers which are < Q1 − IQR_CUTOFF * IQR, and > Q3 + IQR_CUTOFF * IQR
# https://en.wikipedia.org/wiki/Interquartile_range
IQR_CUTOFF = 1.5


def expand_env_vars(obj):
"""Expand environment variables in provided object."""
Expand All @@ -34,16 +39,14 @@ def expand_env_vars(obj):


def upload_timetest_data(data, db_url, db_collection):
""" Upload timetest data to database
"""
""" Upload timetest data to database."""
client = MongoClient(db_url)
collection = client[DATABASE][db_collection]
collection.replace_one({'_id': data['_id']}, data, upsert=True)


def metadata_from_manifest(manifest: Path):
""" Extract commit metadata from manifest
"""
""" Extract commit metadata from manifest."""
with open(manifest, 'r') as manifest_file:
manifest = yaml.safe_load(manifest_file)
repo_trigger = next(
Expand All @@ -58,11 +61,27 @@ def metadata_from_manifest(manifest: Path):
}


class UnsupportedOsError(Exception):
"""
Exception for unsupported OS type
"""
def calculate_iqr(stats: list):
"""IQR is calculated as the difference between the 3th and the 1th quantile of the data."""
q1 = np.quantile(stats, 0.25)
q3 = np.quantile(stats, 0.75)
iqr = q3 - q1
return iqr, q1, q3


def filter_timetest_result(stats: dict):
"""Identify and remove outliers from time_results."""
filtered_stats = {}
for step_name, time_results in stats.items():
iqr, q1, q3 = calculate_iqr(time_results)
cut_off = iqr * IQR_CUTOFF
upd_time_results = [x for x in time_results if (q1 - cut_off < x < q3 + cut_off)]
filtered_stats.update({step_name: upd_time_results})
return filtered_stats


class UnsupportedOsError(Exception):
"""Exception for unsupported OS type."""
def __init__(self, *args, **kwargs):
error_message = f'OS type "{get_os_type()}" is not currently supported'
if args or kwargs:
Expand All @@ -72,9 +91,7 @@ def __init__(self, *args, **kwargs):


class OsType(Enum):
"""
Container for supported os types
"""
"""Container for supported os types."""
WINDOWS = 'Windows'
LINUX = 'Linux'
DARWIN = 'Darwin'
Expand All @@ -91,17 +108,17 @@ def get_os_type():


def os_type_is_windows():
"""Returns True if OS type is Windows. Otherwise returns False"""
"""Returns True if OS type is Windows. Otherwise returns False."""
return get_os_type() == OsType.WINDOWS.value


def os_type_is_linux():
"""Returns True if OS type is Linux. Otherwise returns False"""
"""Returns True if OS type is Linux. Otherwise returns False."""
return get_os_type() == OsType.LINUX.value


def os_type_is_darwin():
"""Returns True if OS type is Darwin. Otherwise returns False"""
"""Returns True if OS type is Darwin. Otherwise returns False."""
return get_os_type() == OsType.DARWIN.value


Expand Down

0 comments on commit 69a06f5

Please sign in to comment.