Skip to content

Commit

Permalink
Fix argocd_cluster_connection_status metric (argoproj#7419)
Browse files Browse the repository at this point in the history
* bug: fix argocd_cluster_connection_status metric + docs

Signed-off-by: Leonardo Luz Almeida <[email protected]>
  • Loading branch information
leoluz authored Oct 18, 2021
1 parent 83ff035 commit b5d1433
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 15 deletions.
2 changes: 1 addition & 1 deletion controller/metrics/clustercollector.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,6 @@ func (c *clusterCollector) Collect(ch chan<- prometheus.Metric) {
cacheAgeSeconds = int(now.Sub(*c.LastCacheSyncTime).Seconds())
}
ch <- prometheus.MustNewConstMetric(descClusterCacheAgeSeconds, prometheus.GaugeValue, float64(cacheAgeSeconds), defaultValues...)
ch <- prometheus.MustNewConstMetric(descClusterConnectionStatus, prometheus.GaugeValue, boolFloat64(c.SyncError != nil), append(defaultValues, c.K8SVersion)...)
ch <- prometheus.MustNewConstMetric(descClusterConnectionStatus, prometheus.GaugeValue, boolFloat64(c.SyncError == nil), append(defaultValues, c.K8SVersion)...)
}
}
104 changes: 104 additions & 0 deletions controller/metrics/clustercollector_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package metrics

import (
"errors"
"testing"

gitopsCache "github.com/argoproj/gitops-engine/pkg/cache"
)

func TestMetricClusterConnectivity(t *testing.T) {
type testCases struct {
testCombination
skip bool
description string
metricLabels []string
clustersInfo []gitopsCache.ClusterInfo
}
cases := []testCases{
{
description: "metric will have value 1 if connected with the cluster",
skip: false,
metricLabels: []string{"non-existing"},
testCombination: testCombination{
applications: []string{fakeApp},
responseContains: `
# TYPE argocd_cluster_connection_status gauge
argocd_cluster_connection_status{k8s_version="1.21",server="server1"} 1
`,
},
clustersInfo: []gitopsCache.ClusterInfo{
{
Server: "server1",
K8SVersion: "1.21",
SyncError: nil,
},
},
},
{
description: "metric will have value 0 if not connected with the cluster",
skip: false,
metricLabels: []string{"non-existing"},
testCombination: testCombination{
applications: []string{fakeApp},
responseContains: `
# TYPE argocd_cluster_connection_status gauge
argocd_cluster_connection_status{k8s_version="1.21",server="server1"} 0
`,
},
clustersInfo: []gitopsCache.ClusterInfo{
{
Server: "server1",
K8SVersion: "1.21",
SyncError: errors.New("error connecting with cluster"),
},
},
},
{
description: "will have one metric per cluster",
skip: false,
metricLabels: []string{"non-existing"},
testCombination: testCombination{
applications: []string{fakeApp},
responseContains: `
# TYPE argocd_cluster_connection_status gauge
argocd_cluster_connection_status{k8s_version="1.21",server="server1"} 1
argocd_cluster_connection_status{k8s_version="1.21",server="server2"} 1
argocd_cluster_connection_status{k8s_version="1.21",server="server3"} 1
`,
},
clustersInfo: []gitopsCache.ClusterInfo{
{
Server: "server1",
K8SVersion: "1.21",
SyncError: nil,
},
{
Server: "server2",
K8SVersion: "1.21",
SyncError: nil,
},
{
Server: "server3",
K8SVersion: "1.21",
SyncError: nil,
},
},
},
}

for _, c := range cases {
c := c
t.Run(c.description, func(t *testing.T) {
if !c.skip {
cfg := TestMetricServerConfig{
FakeAppYAMLs: c.applications,
ExpectedResponse: c.responseContains,
AppLabels: c.metricLabels,
ClustersInfo: c.clustersInfo,
}
runTest(t, cfg)
}
})
}
}
45 changes: 41 additions & 4 deletions controller/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"testing"
"time"

gitopsCache "github.com/argoproj/gitops-engine/pkg/cache"
"github.com/argoproj/gitops-engine/pkg/sync/common"
"github.com/ghodss/yaml"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -161,20 +162,55 @@ func testApp(t *testing.T, fakeAppYAMLs []string, expectedResponse string) {
testMetricServer(t, fakeAppYAMLs, expectedResponse, []string{})
}

type fakeClusterInfo struct {
clustersInfo []gitopsCache.ClusterInfo
}

func (f *fakeClusterInfo) GetClustersInfo() []gitopsCache.ClusterInfo {
return f.clustersInfo
}

type TestMetricServerConfig struct {
FakeAppYAMLs []string
ExpectedResponse string
AppLabels []string
ClustersInfo []gitopsCache.ClusterInfo
}

func testMetricServer(t *testing.T, fakeAppYAMLs []string, expectedResponse string, appLabels []string) {
t.Helper()
cancel, appLister := newFakeLister(fakeAppYAMLs...)
cfg := TestMetricServerConfig{
FakeAppYAMLs: fakeAppYAMLs,
ExpectedResponse: expectedResponse,
AppLabels: appLabels,
ClustersInfo: []gitopsCache.ClusterInfo{},
}
runTest(t, cfg)
}

func runTest(t *testing.T, cfg TestMetricServerConfig) {
t.Helper()
cancel, appLister := newFakeLister(cfg.FakeAppYAMLs...)
defer cancel()
metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck, appLabels)
metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck, cfg.AppLabels)
assert.NoError(t, err)

if len(cfg.ClustersInfo) > 0 {
ci := &fakeClusterInfo{clustersInfo: cfg.ClustersInfo}
collector := &clusterCollector{
infoSource: ci,
info: ci.GetClustersInfo(),
}
metricsServ.registry.MustRegister(collector)
}

req, err := http.NewRequest("GET", "/metrics", nil)
assert.NoError(t, err)
rr := httptest.NewRecorder()
metricsServ.Handler.ServeHTTP(rr, req)
assert.Equal(t, rr.Code, http.StatusOK)
body := rr.Body.String()
log.Println(body)
assertMetricsPrinted(t, expectedResponse, body)
assertMetricsPrinted(t, cfg.ExpectedResponse, body)
}

type testCombination struct {
Expand Down Expand Up @@ -310,6 +346,7 @@ argocd_app_sync_total{dest_server="https://localhost:6443",name="my-app",namespa

// assertMetricsPrinted asserts every line in the expected lines appears in the body
func assertMetricsPrinted(t *testing.T, expectedLines, body string) {
t.Helper()
for _, line := range strings.Split(expectedLines, "\n") {
if line == "" {
continue
Expand Down
27 changes: 17 additions & 10 deletions docs/operator-manual/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,23 @@ Argo CD exposes two sets of Prometheus metrics
## Application Metrics
Metrics about applications. Scraped at the `argocd-metrics:8082/metrics` endpoint.

* `argocd_app_info`: Information about Applications. It contains labels such as `sync_status` and `health_status` that reflect the application state in ArgoCD.
* `argocd_app_sync_total`: Counter for application sync history
* `argocd_app_k8s_request_total`: Number of kubernetes requests executed during application reconciliation
* `argocd_kubectl_exec_total`: Number of kubectl executions
* `argocd_kubectl_exec_pending`: Number of pending kubectl executions
* `argocd_app_reconcile`: Application reconciliation performance.
* `argocd_cluster_events_total`: Number of processes k8s resource events.
* `argocd_redis_request_total`: Number of redis requests executed during application reconciliation
* `argocd_redis_request_duration`: Redis requests duration.
* `argocd_app_labels`: Argo Application labels converted to Prometheus labels. Disabled by default. See section bellow about how to enable it.
| Metric | Type | Description |
|--------|:----:|-------------|
| `argocd_app_info` | gauge | Information about Applications. It contains labels such as `sync_status` and `health_status` that reflect the application state in ArgoCD. |
| `argocd_app_sync_total` | counter | Counter for application sync history |
| `argocd_app_k8s_request_total` | counter | Number of kubernetes requests executed during application reconciliation |
| `argocd_kubectl_exec_total` | counter | Number of kubectl executions |
| `argocd_kubectl_exec_pending` | gauge | Number of pending kubectl executions |
| `argocd_app_reconcile` | histogram | Application reconciliation performance. |
| `argocd_cluster_events_total` | counter | Number of processes k8s resource events. |
| `argocd_redis_request_total` | counter | Number of redis requests executed during application reconciliation |
| `argocd_redis_request_duration` | histogram | Redis requests duration. |
| `argocd_app_labels` | gauge | Argo Application labels converted to Prometheus labels. Disabled by default. See section bellow about how to enable it. |
| `argocd_cluster_info` | gauge | Information about cluster. |
| `argocd_cluster_api_resource_objects` | gauge | Number of k8s resource objects in the cache. |
| `argocd_cluster_api_resources` | gauge | Number of monitored kubernetes API resources. |
| `argocd_cluster_cache_age_seconds` | gauge | Cluster cache age in seconds. |
| `argocd_cluster_connection_status` | gauge | The k8s cluster current connection status. |

If you use ArgoCD with many application and project creation and deletion,
the metrics page will keep in cache your application and project's history.
Expand Down

0 comments on commit b5d1433

Please sign in to comment.