Skip to content

Commit

Permalink
Merge pull request ceph#43669 from rhcs-dashboard/grafana-unit-tests
Browse files Browse the repository at this point in the history
monitoring/grafana: Grafana query tester

Reviewed-by: Aashish Sharma <[email protected]>
Reviewed-by: Alfonso Martínez <[email protected]>
Reviewed-by: Ernesto Puerta <[email protected]>
Reviewed-by: Pere Diaz Bou <[email protected]>
  • Loading branch information
epuertat authored Nov 16, 2021
2 parents e43a888 + 44d3e4c commit 788b810
Show file tree
Hide file tree
Showing 14 changed files with 571 additions and 3 deletions.
1 change: 1 addition & 0 deletions monitoring/grafana/dashboards/.pylintrc
6 changes: 4 additions & 2 deletions monitoring/grafana/dashboards/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ endif()

if(WITH_GRAFANA)
include(AddCephTest)
add_tox_test(grafana TOX_ENVS grafonnet-check)
add_tox_test(grafana-check TOX_ENVS grafonnet-check)
add_tox_test(grafana-query-test TOX_ENVS promql-query-test)
add_tox_test(grafana-lint TOX_ENVS lint)
set(ver 0.1.0)
set(name grafonnet-lib)
include(ExternalProject)
Expand All @@ -30,7 +32,7 @@ if(WITH_GRAFANA)
${name})
ExternalProject_Get_Property(${name} SOURCE_DIR)
set_property(
TEST run-tox-grafana
TEST run-tox-grafana-check run-tox-grafana-query-test run-tox-grafana-lint
APPEND
PROPERTY ENVIRONMENT
GRAFONNET_PATH=${SOURCE_DIR}/grafonnet)
Expand Down
18 changes: 18 additions & 0 deletions monitoring/grafana/dashboards/requirements-lint.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
attrs==21.2.0
behave==1.2.6
py==1.10.0
pyparsing==2.4.7
PyYAML==6.0
types-PyYAML==6.0.0
typing-extensions==3.10.0.2
termcolor==1.1.0
types-termcolor==1.1.2
dataclasses==0.6
types-dataclasses==0.6.1
six==1.16.0
toml==0.10.2
pylint==2.6.0
isort==5.10.0
mypy==0.910
mypy-extensions==0.4.3
prettytable==2.4.0
187 changes: 187 additions & 0 deletions monitoring/grafana/dashboards/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
import re
import subprocess
import sys
import tempfile
from dataclasses import asdict, dataclass, field
from typing import Any, List

import yaml


@dataclass
class InputSeries:
series: str = ''
values: str = ''

@dataclass
class ExprSample:
labels: str = ''
value: float = -1

@dataclass
class PromqlExprTest:
expr: str = ''
eval_time: str = '1m'
exp_samples: List[ExprSample] = field(default_factory=list)

@dataclass
class Test:
interval: str = '1m'
input_series: List[InputSeries] = field(default_factory=list)
promql_expr_test: List[PromqlExprTest] = field(default_factory=list)


@dataclass
class TestFile:
evaluation_interval: str = '1m'
tests: List[Test] = field(default_factory=list)


class PromqlTest:
"""
Base class to provide prometheus query test capabilities. After setting up
the query test with its input and expected output it's expected to run promtool.
https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/#test-yml
The workflow of testing would be something like:
# add prometheus query to test
self.set_expression('bonding_slaves > 0')
# add some prometheus input series
self.add_series('bonding_slaves{master="bond0"}', '2')
self.add_series('bonding_slaves{master="bond1"}', '3')
self.add_series('node_network_receive_bytes{instance="127.0.0.1",
device="eth1"}', "10 100 230 22")
# expected output of the query
self.add_exp_samples('bonding_slaves{master="bond0"}', 2)
self.add_exp_samples('bonding_slaves{master="bond1"}', 3)
# at last, always call promtool with:
self.assertTrue(self.run_promtool())
# assertTrue means it expect promtool to succeed
"""

def __init__(self):
self.test_output_file = tempfile.NamedTemporaryFile('w+')

self.test_file = TestFile()
self.test = Test()
self.promql_expr_test = PromqlExprTest()
self.test.promql_expr_test.append(self.promql_expr_test)
self.test_file.tests.append(self.test)

self.variables = {}

def __del__(self):
self.test_output_file.close()


def set_evaluation_interval(self, interval: int, unit: str = 'm') -> None:
"""
Set the evaluation interval of the time series
Args:
interval (int): number of units.
unit (str): unit type: 'ms', 's', 'm', etc...
"""
self.test_file.evaluation_interval = f'{interval}{unit}'

def set_interval(self, interval: int, unit: str = 'm') -> None:
"""
Set the duration of the time series
Args:
interval (int): number of units.
unit (str): unit type: 'ms', 's', 'm', etc...
"""
self.test.interval = f'{interval}{unit}'

def set_expression(self, expr: str) -> None:
"""
Set the prometheus expression/query used to filter data.
Args:
expr(str): expression/query.
"""
self.promql_expr_test.expr = expr

def add_series(self, series: str, values: str) -> None:
"""
Add a series to the input.
Args:
series(str): Prometheus series.
Notation: '<metric name>{<label name>=<label value>, ...}'
values(str): Value of the series.
"""
input_series = InputSeries(series=series, values=values)
self.test.input_series.append(input_series)

def set_eval_time(self, eval_time: int, unit: str = 'm') -> None:
"""
Set the time when the expression will be evaluated
Args:
interval (int): number of units.
unit (str): unit type: 'ms', 's', 'm', etc...
"""
self.promql_expr_test.eval_time = f'{eval_time}{unit}'

def add_exp_samples(self, sample: str, values: Any) -> None:
"""
Add an expected sample/output of the query given the series/input
Args:
sample(str): Expected sample.
Notation: '<metric name>{<label name>=<label value>, ...}'
values(Any): Value of the sample.
"""
expr_sample = ExprSample(labels=sample, value=values)
self.promql_expr_test.exp_samples.append(expr_sample)

def set_variable(self, variable: str, value: str):
"""
If a query makes use of grafonnet variables, for example
'$osd_hosts', you should change this to a real value. Example:
> self.set_expression('bonding_slaves{master="$osd_hosts"} > 0')
> self.set_variable('osd_hosts', '127.0.0.1')
> print(self.query)
> bonding_slaves{master="127.0.0.1"} > 0
Args:
variable(str): Variable name
value(str): Value to replace variable with
"""
self.variables[variable] = value

def run_promtool(self):
"""
Run promtool to test the query after setting up the input, output
and extra parameters.
Returns:
bool: True if successful, False otherwise.
"""

for variable, value in self.variables.items():
expr = self.promql_expr_test.expr
new_expr = re.sub(r'\${0}'.format(variable), str(value), expr)
self.set_expression(new_expr)

test_as_dict = asdict(self.test_file)
yaml.dump(test_as_dict, self.test_output_file)

args = f'promtool test rules {self.test_output_file.name}'.split()
try:
subprocess.run(args, check=True)
return True
except subprocess.CalledProcessError as process_error:
print(yaml.dump(test_as_dict))
print(process_error.stderr)
return False
Empty file.
10 changes: 10 additions & 0 deletions monitoring/grafana/dashboards/tests/features/ceph-cluster.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Feature: Ceph Cluster Dashboard

Scenario: "Test total PG States"
Given the following series:
| metrics | values |
| ceph_pg_total{foo="var"} | 10 100 |
| ceph_pg_total{foo="bar"} | 20 200 |
Then Grafana panel `PG States` with legend `Total` shows:
| metrics | values |
| {} | 300 |
135 changes: 135 additions & 0 deletions monitoring/grafana/dashboards/tests/features/environment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# type: ignore[no-redef]
# pylint: disable=E0611,W0613,E0102
import copy

from behave import given, then, when
from prettytable import PrettyTable

from tests import PromqlTest
from tests.util import get_dashboards_data, resolve_time_and_unit


class GlobalContext:
def __init__(self):
self.tested_queries_count = 0
self.promql_expr_test = None
self.data = get_dashboards_data()
self.query_map = self.data['queries']

def reset_promql_test(self):
self.promql_expr_test = PromqlTest()
self.promql_expr_test.variables = copy.copy(self.data['variables'])

def print_query_stats(self):
total = len(self.query_map)
table = PrettyTable()
table.field_names = ['Name', 'Queries', 'Tested', 'Cover']

def percent(tested, total):
return str(round((tested / total) * 100, 2)) + '%'

def file_name(path):
return path.split('/')[-1]

total = 0
tested = 0
for path, stat in self.data['stats'].items():
assert stat['total']
table.add_row([file_name(path), stat['total'], stat['tested'],
percent(stat['tested'], stat['total'])])
total += stat['total']
tested += stat['tested']

assert total
table.add_row(['Total', total, tested, percent(tested, total)])
print(table)


global_context = GlobalContext()

# Behave function overloading
# ===========================


def before_scenario(context, scenario):
global_context.reset_promql_test()


def after_scenario(context, scenario):
assert global_context.promql_expr_test.run_promtool()


def after_all(context):
global_context.print_query_stats()


@given("the following series")
def step_impl(context):
for row in context.table:
metric = row['metrics']
value = row['values']
global_context.promql_expr_test.add_series(metric, value)


@when('evaluation interval is `{interval}`')
def step_impl(context, interval):
interval_without_unit, unit = resolve_time_and_unit(interval)
if interval_without_unit is None:
raise ValueError(f'Invalid interval time: {interval_without_unit}. ' +
'A valid time looks like "1m" where you have a number plus a unit')
global_context.promql_expr_test.set_evaluation_interval(interval_without_unit, unit)


@when('interval is `{interval}`')
def step_impl(context, interval):
interval_without_unit, unit = resolve_time_and_unit(interval)
if interval_without_unit is None:
raise ValueError(f'Invalid interval time: {interval_without_unit}. ' +
'A valid time looks like "1m" where you have a number plus a unit')
global_context.promql_expr_test.set_interval(interval_without_unit, unit)


@when('evaluation time is `{eval_time}`')
def step_impl(context, eval_time):
eval_time_without_unit, unit = resolve_time_and_unit(eval_time)
if eval_time_without_unit is None:
raise ValueError(f'Invalid evalution time: {eval_time}. ' +
'A valid time looks like "1m" where you have a number plus a unit')
global_context.promql_expr_test.set_eval_time(eval_time_without_unit, unit)


@when('variable `{variable}` is `{value}`')
def step_impl(context, variable, value):
global_context.promql_expr_test.set_variable(variable, value)


@then('Grafana panel `{panel_name}` with legend `{legend}` shows')
def step_impl(context, panel_name, legend):
"""
This step can have an empty legend. As 'behave' doesn't provide a way
to say it's empty we use EMPTY to mark as empty.
"""
if legend == "EMPTY":
legend = ''
query_id = panel_name + '-' + legend
if query_id not in global_context.query_map:
raise KeyError((f'Query with legend {legend} in panel "{panel_name}"'
'couldn\'t be found'))

expr = global_context.query_map[query_id]['query']
global_context.promql_expr_test.set_expression(expr)
for row in context.table:
metric = row['metrics']
value = row['values']
global_context.promql_expr_test.add_exp_samples(metric, float(value))
path = global_context.query_map[query_id]['path']
global_context.data['stats'][path]['tested'] += 1


@then('query `{query}` produces')
def step_impl(context, query):
global_context.promql_expr_test.set_expression(query)
for row in context.table:
metric = row['metrics']
value = row['values']
global_context.promql_expr_test.add_exp_samples(metric, float(value))
Loading

0 comments on commit 788b810

Please sign in to comment.