kube-updater: Implement UnhealthyWorkloadTrigger (gravitational#22737)

This trigger allows a maintenance to start if the teleport-kube-agent is unhealthy. A workload is unhealthy if at least one if its managed pods is unhealthy. A pod is unhealthy if it has not been ready for 10 minutes or more.
xiv · Mar 16, 2023 · b2d5ea5 · b2d5ea5
1 parent 3f0c74b
commit b2d5ea5
Show file tree

Hide file tree

Showing 7 changed files with 509 additions and 7 deletions.
diff --git a/integrations/kube-agent-updater/go.mod b/integrations/kube-agent-updater/go.mod
@@ -20,6 +20,7 @@ require (
 	github.com/cespare/xxhash/v2 v2.1.2 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/emicklei/go-restful/v3 v3.9.0 // indirect
+	github.com/evanphx/json-patch v4.12.0+incompatible // indirect
 	github.com/evanphx/json-patch/v5 v5.6.0 // indirect
 	github.com/fsnotify/fsnotify v1.6.0 // indirect
 	github.com/go-logr/logr v1.2.3 // indirect

diff --git a/integrations/kube-agent-updater/go.sum b/integrations/kube-agent-updater/go.sum
@@ -76,6 +76,7 @@ github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ=
 github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH5pOlLGNtQ5lPWQu84=
+github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
 github.com/evanphx/json-patch/v5 v5.6.0 h1:b91NhWfaz02IuVxO9faSllyAtNXHMPkC5J8sJCLunww=
 github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4=
 github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=

diff --git a/integrations/kube-agent-updater/pkg/controller/updater.go b/integrations/kube-agent-updater/pkg/controller/updater.go
@@ -22,7 +22,7 @@ import (
 
 	"github.com/docker/distribution/reference"
 	"github.com/gravitational/trace"
-	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 	ctrllog "sigs.k8s.io/controller-runtime/pkg/log"
 
 	"github.com/gravitational/teleport/integrations/kube-agent-updater/pkg/img"
@@ -42,7 +42,7 @@ type VersionUpdater struct {
 // validating the new image signature.
 // If all steps are successfully executed and there's a new version, it returns
 // a digested reference to the new image that should be deployed.
-func (r *VersionUpdater) GetVersion(ctx context.Context, obj v1.Object, currentVersion string) (img.NamedTaggedDigested, error) {
+func (r *VersionUpdater) GetVersion(ctx context.Context, obj client.Object, currentVersion string) (img.NamedTaggedDigested, error) {
 	// Those are debug logs only
 	log := ctrllog.FromContext(ctx).V(1)
 

diff --git a/integrations/kube-agent-updater/pkg/maintenance/mock.go b/integrations/kube-agent-updater/pkg/maintenance/mock.go
@@ -19,7 +19,7 @@ package maintenance
 import (
 	"context"
 
-	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
 // TriggerMock is a fake Trigger that return a static answer. This is used
@@ -35,7 +35,7 @@ func (m TriggerMock) Name() string {
 }
 
 // CanStart returns the statically defined maintenance approval result.
-func (m TriggerMock) CanStart(_ context.Context, _ v1.Object) (bool, error) {
+func (m TriggerMock) CanStart(_ context.Context, _ client.Object) (bool, error) {
 	return m.canStart, nil
 }
 

diff --git a/integrations/kube-agent-updater/pkg/maintenance/trigger.go b/integrations/kube-agent-updater/pkg/maintenance/trigger.go
@@ -19,7 +19,7 @@ package maintenance
 import (
 	"context"
 
-	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 	ctrllog "sigs.k8s.io/controller-runtime/pkg/log"
 )
 
@@ -33,15 +33,17 @@ import (
 // of error.
 type Trigger interface {
 	Name() string
-	CanStart(ctx context.Context, object v1.Object) (bool, error)
+	CanStart(ctx context.Context, object client.Object) (bool, error)
 	Default() bool
 }
 
 // Triggers is a list of Trigger. Triggers are OR-ed: any trigger firing in the
 // list will cause the maintenance to be triggered.
 type Triggers []Trigger
 
-func (t Triggers) CanStart(ctx context.Context, object v1.Object) bool {
+// CanStart checks if the maintenance can be started. It will return true if at
+// least a Trigger approves the maintenance.
+func (t Triggers) CanStart(ctx context.Context, object client.Object) bool {
 	log := ctrllog.FromContext(ctx).V(1)
 	for _, trigger := range t {
 		start, err := trigger.CanStart(ctx, object)

diff --git a/integrations/kube-agent-updater/pkg/maintenance/unhealthy.go b/integrations/kube-agent-updater/pkg/maintenance/unhealthy.go
@@ -0,0 +1,158 @@
+/*
+Copyright 2023 Gravitational, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package maintenance
+
+import (
+	"context"
+	"time"
+
+	"github.com/gravitational/trace"
+	appsv1 "k8s.io/api/apps/v1"
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/labels"
+	kclient "sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+const (
+	podReadinessGracePeriod = 10 * time.Minute
+	deploymentKind          = "Deployment"
+	statefulSetKind         = "StatefulSet"
+)
+
+// unhealthyWorkloadTrigger allows a maintenance to start if the workload is
+// unhealthy. This is designed to recover faster if a new version breaks the
+// agent. This way the user will not be left with a broken cluster until the
+// next maintenance window.
+type unhealthyWorkloadTrigger struct {
+	name string
+	kclient.Client
+}
+
+// Name returns the trigger name.
+func (u unhealthyWorkloadTrigger) Name() string {
+	return u.name
+}
+
+// CanStart implements maintenance.Trigger
+func (u unhealthyWorkloadTrigger) CanStart(ctx context.Context, object kclient.Object) (bool, error) {
+	switch workload := object.(type) {
+	case *appsv1.Deployment:
+		selector, err := metav1.LabelSelectorAsSelector(workload.Spec.Selector)
+		if err != nil {
+			return false, trace.Wrap(err)
+		}
+		return u.isWorkloadUnhealthy(ctx, workload.GetNamespace(), selector)
+	case *appsv1.StatefulSet:
+		selector, err := metav1.LabelSelectorAsSelector(workload.Spec.Selector)
+		if err != nil {
+			return false, trace.Wrap(err)
+		}
+		return u.isWorkloadUnhealthy(ctx, workload.GetNamespace(), selector)
+	default:
+		return false, trace.BadParameter(
+			"workload type '%s' not supported",
+			object.GetObjectKind().GroupVersionKind().String(),
+		)
+	}
+}
+
+// Default returns what to do if the trigger can't be evaluated.
+func (u unhealthyWorkloadTrigger) Default() bool {
+	return false
+}
+
+// isWorkloadUnhealthy checks the pods selected by a workload and returns true
+// if at least one pod is unhealthy.
+func (u unhealthyWorkloadTrigger) isWorkloadUnhealthy(ctx context.Context, namespace string, selector labels.Selector) (bool, error) {
+	managedPods := &v1.PodList{}
+	matchingSelector := kclient.MatchingLabelsSelector{Selector: selector}
+	inNamespace := kclient.InNamespace(namespace)
+	err := u.List(ctx, managedPods, inNamespace, matchingSelector)
+	if err != nil {
+		return false, trace.Wrap(err)
+	}
+
+	// If the deployment manages no pods, it is considered unhealthy
+	// and can be updated at any time
+	if len(managedPods.Items) == 0 {
+		return true, nil
+	}
+
+	// If at least a pod is unhealthy, we consider the whole workload unhealthy
+	return len(UnhealthyPods(managedPods)) > 0, nil
+}
+
+// NewUnhealthyWorkloadTrigger triggers a maintenance if the watched workload
+// is unhealthy.
+func NewUnhealthyWorkloadTrigger(name string, client kclient.Client) Trigger {
+	return unhealthyWorkloadTrigger{
+		name:   name,
+		Client: client,
+	}
+}
+
+// UnhealthyPods takes a v1.PodList of pods and returns a list of all unhealthy
+// pods.
+func UnhealthyPods(list *v1.PodList) []*v1.Pod {
+	var unhealthyPods []*v1.Pod
+	for _, pod := range list.Items {
+		if isPodUnhealthy(&pod) {
+			unhealthyPods = append(unhealthyPods, &pod)
+		}
+	}
+	return unhealthyPods
+}
+
+// A Pod is unhealthy if it is not Ready since at least X minutes
+// This heuristic also detects infrastructure issues like not enough room to
+// schedule pod. As false positives are less problematic than
+// false negatives in our case, this is not a problem. If false positives were
+// to be a frequent issue we could build a more specific heuristic by looking
+// at the container statuses
+func isPodUnhealthy(pod *v1.Pod) bool {
+	// If the pod is terminating we ignore it and consider it healthy as it
+	// should be gone soon.
+	if pod.DeletionTimestamp != nil {
+		return false
+	}
+
+	condition := getPodReadyCondition(&pod.Status)
+	// if the pod has no ready condition, something is not ok
+	// we consider it not healthy
+	if condition == nil {
+		return true
+	}
+
+	// if the pod is marked as ready it is healthy
+	if condition.Status == v1.ConditionTrue {
+		return false
+	}
+
+	// if the pod is marked unready but is still in the grace period
+	// we don't consider it unhealthy yet
+	return condition.LastTransitionTime.Add(podReadinessGracePeriod).Before(time.Now())
+}
+
+func getPodReadyCondition(status *v1.PodStatus) *v1.PodCondition {
+	for _, condition := range status.Conditions {
+		if condition.Type == v1.PodReady {
+			return &condition
+		}
+	}
+	return nil
+}