Skip to content

Commit

Permalink
KUB-64 - Added support for k8s statefulsets (#158)
Browse files Browse the repository at this point in the history
* Added support for k8s statefulsets

Signed-off-by: nileshbhadana <[email protected]>
  • Loading branch information
nileshbhadana authored Jan 24, 2022
1 parent bbc4fab commit b841055
Show file tree
Hide file tree
Showing 6 changed files with 188 additions and 7 deletions.
2 changes: 1 addition & 1 deletion kubernetes/tests/basic_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ spec:
components:
playframework:
dimensions:
- service: sample-play-service
- service: sample-play-service
15 changes: 9 additions & 6 deletions kubernetes/tests/metric_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ spec:
documentation: https://github.com/grofers/legend/tree/master/docs
metrics_definition: https://github.com/grofers/legend
tags:
- prod
- infra
- prod
- infra
components:

# Application frameworks
Expand Down Expand Up @@ -52,8 +52,8 @@ spec:
- host: sample-ec2-host
db:
- masters:
- host: sample-mysql-host-master
db_name: sample-db
- host: sample-mysql-host-master
db_name: sample-db
slaves:
- host: sample-mysql-host-slave
db_name: sample-db
Expand Down Expand Up @@ -92,6 +92,9 @@ spec:
platform_k8s_deployment:
dimensions:
- deployment_name: sample-deployment-name
platform_k8s_statefulset:
dimensions:
- statefulset_name: sample-statefulset-name
platform_k8s_ingress:
dimensions:
- namespace: sample-namespace
Expand Down Expand Up @@ -132,7 +135,7 @@ spec:
- job: sample-starlette-service
path: sample-starlette-path

# Applications
# Applications
consul:
dimensions:
- region: sample-aws-region
Expand All @@ -158,4 +161,4 @@ spec:
region: sample-region
filters:
- "err"
- "500"
- "500"
158 changes: 158 additions & 0 deletions legend/metrics_library/metrics/platform_k8s_statefulset_metrics.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
component: platform_k8s_statefulset
data_source_type: Prometheus
metrics_source: https://github.com/kubernetes/kube-state-metrics
reference: https://blog.freshtracks.io/a-deep-dive-into-kubernetes-metrics-part-3-container-resource-metrics-361c5ee46e66
description: Kubernetes Statefulset
panels:
- title: (U) CPU Utilisation
type: Graph
description: current cpu utilisation per container
targets:
{% for dimension in data %}
- metric: round(((sum(rate(container_cpu_usage_seconds_total{container!~"POD", pod=~"^{{ dimension.statefulset_name }}.*"}[5m])) by (container) / sum(kube_pod_container_resource_limits{pod=~"^{{ dimension.statefulset_name }}.*", resource="cpu", unit="core"}) by (container)) * 100), 0.1)
legend: '{{ '{{container}}' }}'
ref_no: 1
{% endfor %}
formatY1: percent
alert_config:
priority: P3
message: High CPU Utilisation
rule:
for_duration: 5m
evaluate_every: 1m
condition_query:
- OR,avg,1,now,5m,gt,80

- title: (S) CPU Saturation
type: Graph
description: Amount of time the container was throttled
targets:
{% for dimension in data %}
- metric: sum(rate(container_cpu_cfs_throttled_seconds_total{container!~"POD", pod=~"^{{ dimension.statefulset_name }}.*"}[5m])) by (container)
legend: '{{ '{{container}}' }}'
{% endfor %}
formatY1: s

- title: (U) Memory Utilisation
type: Graph
description: Current memory usage per container
targets:
{% for dimension in data %}
- metric: round(((sum(container_memory_working_set_bytes{container!~"POD", pod=~"^{{ dimension.statefulset_name }}.*"}) by (container) / sum(kube_pod_container_resource_limits{pod=~"^{{ dimension.statefulset_name }}.*", resource="memory", unit="byte"}) by (container)) * 100), 0.1)
legend: '{{ '{{container}}' }}'
ref_no: 1
{% endfor %}
formatY1: percent
alert_config:
priority: P3
message: High Memory Utilisation
rule:
for_duration: 5m
evaluate_every: 1m
condition_query:
- OR,avg,1,now,5m,gt,80

- title: (S) Memory Saturation
type: Graph
description: Amount of available memory from the limit
targets:
{% for dimension in data %}
- metric: (sum(container_memory_working_set_bytes{pod=~"^{{ dimension.statefulset_name }}.*"}) by (container) / sum(kube_pod_container_resource_limits{pod=~"^{{ dimension.statefulset_name }}.*", resource="memory", unit="byte"}) by (container))
legend: '{{ '{{container}}' }}'
{% endfor %}
formatY1: bytes

- title: (U) Disk Utilisation
type: Graph
description: bytes read/written
targets:
{% for dimension in data %}
- metric: sum(rate(container_fs_writes_bytes_total{pod=~"^{{ dimension.statefulset_name }}.*"}[5m])) by (container,device)
legend: '{{ '{{container}} {{device}} Writes' }}'
- metric: sum(rate(container_fs_reads_bytes_total{pod=~"^{{ dimension.statefulset_name }}.*"}[5m])) by (container,device)
legend: '{{ '{{container}} {{device}} Reads' }}'
{% endfor %}
formatY1: bytes

- title: (U) Network Utilisation
type: Graph
description: bytes received/transmitted
targets:
{% for dimension in data %}
- metric: sum(rate(container_network_receive_bytes_total{pod=~"^{{ dimension.statefulset_name }}.*"}[5m])) by (pod, interface)
legend: '{{ '{{pod}} rx' }}'
- metric: sum(rate(container_network_transmit_bytes_total{pod=~"^{{ dimension.statefulset_name }}.*"}[5m])) by (pod, interface)
legend: '{{ '{{pod}} tx' }}'
{% endfor %}

- title: (E) Network Errors
type: Graph
description: Number of network errors
targets:
{% for dimension in data %}
- metric: sum(rate(container_network_receive_errors_total{pod=~"^{{ dimension.statefulset_name }}.*"}[5m])) by (pod)
legend: '{{ '{{pod}} rx' }}'
- metric: sum(rate(container_network_transmit_errors_total{pod=~"^{{ dimension.statefulset_name }}.*"}[5m])) by (pod)
legend: '{{ '{{pod}} tx' }}'
{% endfor %}

- title: (E) Unavailable Replica Percentage
type: Graph
description: Percentage of replicas not available in the statefulset
targets:
{% for dimension in data %}
- metric: round((((kube_statefulset_replicas{statefulset=~"^{{ dimension.statefulset_name }}.*"}-kube_statefulset_status_replicas_ready{statefulset=~"^{{ dimension.statefulset_name }}.*"})/kube_statefulset_replicas{statefulset=~"^{{ dimension.statefulset_name }}.*"}) * 100), 1)
legend: '{{ '{{statefulset}}' }}'
ref_no: 1
{% endfor %}
formatY1: percent
alert_config:
priority: P2
message: High Unavailable Replica Percentage
rule:
for_duration: 5m
evaluate_every: 1m
condition_query:
- OR,avg,1,now,5m,gt,60

- title: (E) Running replicas
type: Graph
description: Running replicas
targets:
{% for dimension in data %}
- metric: kube_statefulset_status_replicas_ready{statefulset=~"^{{ dimension.statefulset_name }}.*"}
legend: '{{ '{{statefulset}}' }}'
ref_no: 1
{% endfor %}

- title: (U) CPU Utilisation(Request)
type: Graph
description: current cpu utilisation per container from the request
targets:
{% for dimension in data %}
- metric: round(((sum(rate(container_cpu_usage_seconds_total{container!~"POD", pod=~"^{{ dimension.statefulset_name }}.*"}[5m])) by (container) / sum(kube_pod_container_resource_requests{pod=~"^{{ dimension.statefulset_name }}.*", resource="cpu", unit="core"}) by (container)) * 100), 0.1)
legend: '{{ '{{container}}' }}'
ref_no: 1
{% endfor %}
formatY1: percent

- title: (S) Memory Saturation(Request)
type: Graph
description: Amount of available memory from the request
targets:
{% for dimension in data %}
- metric: (sum(container_memory_working_set_bytes{pod=~"^{{ dimension.statefulset_name }}.*"}) by (container) / sum(kube_pod_container_resource_requests{pod=~"^{{ dimension.statefulset_name }}.*", resource="memory", unit="byte"}) by (container))
legend: '{{ '{{container}}' }}'
{% endfor %}
formatY1: bytes

- title: (U) Memory Utilisation(Request)
type: Graph
description: Current memory usage per container
targets:
{% for dimension in data %}
- metric: round(((sum(container_memory_working_set_bytes{container!~"POD", pod=~"^{{ dimension.statefulset_name }}.*"}) by (container) / sum(kube_pod_container_resource_requests{pod=~"^{{ dimension.statefulset_name }}.*", resource="memory", unit="byte"}) by (container)) * 100), 0.1)
legend: '{{ '{{container}}' }}'
ref_no: 1
{% endfor %}
formatY1: percent
11 changes: 11 additions & 0 deletions legend/metrics_library/metrics_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,17 @@
},
}

platform_k8s_statefulset_schema = {
"data_source": {"type": "string", "required": False},
"dimensions": {
"type": "list",
"schema": {
"type": "dict",
"schema": {"statefulset_name": {"type": "string", "required": True},},
},
},
}

platform_k8s_cronjob_schema = {
"data_source": {"type": "string", "required": False},
"dimensions": {
Expand Down
6 changes: 6 additions & 0 deletions legend/metrics_library/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
promtail_schema,
celery_schema,
platform_k8s_deployment_schema,
platform_k8s_statefulset_schema,
platform_k8s_ingress_schema,
redis_schema,
redis_elasticache_schema,
Expand Down Expand Up @@ -208,6 +209,11 @@ def md(x, y):
"schema": md(default_panels_schema, platform_k8s_deployment_schema),
"required": False,
},
"platform_k8s_statefulset": {
"type": "dict",
"schema": md(default_panels_schema, platform_k8s_statefulset_schema),
"required": False,
},
"platform_k8s_hpa": {
"type": "dict",
"schema": md(default_panels_schema, platform_k8s_hpa_schema),
Expand Down
3 changes: 3 additions & 0 deletions sample_input.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ components:
platform_k8s_deployment:
dimensions:
- deployment_name: sample-deployment-name
platform_k8s_statefulset:
dimensions:
- statefulset_name: sample-statefulset-name
platform_k8s_ingress:
dimensions:
- namespace: sample-namespace
Expand Down

0 comments on commit b841055

Please sign in to comment.