diff --git a/Documentation/user-guides/cluster-monitoring.md b/Documentation/user-guides/cluster-monitoring.md index 450c5cbba43..a507dd0b80f 100644 --- a/Documentation/user-guides/cluster-monitoring.md +++ b/Documentation/user-guides/cluster-monitoring.md @@ -463,6 +463,14 @@ spec: regex: etcd_(debugging|disk|request|server).* sourceLabels: - __name__ + - action: drop + regex: apiserver_admission_controller_admission_latencies_seconds_.* + sourceLabels: + - __name__ + - action: drop + regex: apiserver_admission_step_admission_latencies_seconds_.* + sourceLabels: + - __name__ port: https scheme: https tlsConfig: @@ -499,6 +507,16 @@ spec: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token honorLabels: true interval: 30s + metricRelabelings: + - action: drop + regex: container_([a-z_]+); + sourceLabels: + - __name__ + - image + - action: drop + regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) + sourceLabels: + - __name__ path: /metrics/cadvisor port: https-metrics scheme: https diff --git a/contrib/kube-prometheus/manifests/prometheus-rules.yaml b/contrib/kube-prometheus/manifests/prometheus-rules.yaml index aaf16ff5675..a4da31028ea 100644 --- a/contrib/kube-prometheus/manifests/prometheus-rules.yaml +++ b/contrib/kube-prometheus/manifests/prometheus-rules.yaml @@ -241,25 +241,25 @@ spec: max by (namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) record: 'node:node_filesystem_avail:' - expr: | - sum(irate(node_network_receive_bytes_total{job="node-exporter",device="eth0"}[1m])) + - sum(irate(node_network_transmit_bytes_total{job="node-exporter",device="eth0"}[1m])) + sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + + sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) record: :node_net_utilisation:sum_irate - expr: | sum by (node) ( - (irate(node_network_receive_bytes_total{job="node-exporter",device="eth0"}[1m]) + - irate(node_network_transmit_bytes_total{job="node-exporter",device="eth0"}[1m])) + (irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) + + irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) record: node:node_net_utilisation:sum_irate - expr: | - sum(irate(node_network_receive_drop_total{job="node-exporter",device="eth0"}[1m])) + - sum(irate(node_network_transmit_drop_total{job="node-exporter",device="eth0"}[1m])) + sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + + sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) record: :node_net_saturation:sum_irate - expr: | sum by (node) ( - (irate(node_network_receive_drop_total{job="node-exporter",device="eth0"}[1m]) + - irate(node_network_transmit_drop_total{job="node-exporter",device="eth0"}[1m])) + (irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) + + irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) diff --git a/contrib/kube-prometheus/manifests/prometheus-serviceMonitorApiserver.yaml b/contrib/kube-prometheus/manifests/prometheus-serviceMonitorApiserver.yaml index 6d884a2b8ce..5dea38e4014 100644 --- a/contrib/kube-prometheus/manifests/prometheus-serviceMonitorApiserver.yaml +++ b/contrib/kube-prometheus/manifests/prometheus-serviceMonitorApiserver.yaml @@ -14,6 +14,14 @@ spec: regex: etcd_(debugging|disk|request|server).* sourceLabels: - __name__ + - action: drop + regex: apiserver_admission_controller_admission_latencies_seconds_.* + sourceLabels: + - __name__ + - action: drop + regex: apiserver_admission_step_admission_latencies_seconds_.* + sourceLabels: + - __name__ port: https scheme: https tlsConfig: diff --git a/contrib/kube-prometheus/manifests/prometheus-serviceMonitorKubelet.yaml b/contrib/kube-prometheus/manifests/prometheus-serviceMonitorKubelet.yaml index 97d7f1a17e5..590a5cd4949 100644 --- a/contrib/kube-prometheus/manifests/prometheus-serviceMonitorKubelet.yaml +++ b/contrib/kube-prometheus/manifests/prometheus-serviceMonitorKubelet.yaml @@ -17,6 +17,16 @@ spec: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token honorLabels: true interval: 30s + metricRelabelings: + - action: drop + regex: container_([a-z_]+); + sourceLabels: + - __name__ + - image + - action: drop + regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) + sourceLabels: + - __name__ path: /metrics/cadvisor port: https-metrics scheme: https