Skip to content

Commit

Permalink
Merge pull request karmada-io#1854 from likakuli/feature_notreadytaint
Browse files Browse the repository at this point in the history
feat: use taint instead of condition to filter cluster
  • Loading branch information
karmada-bot authored Jul 22, 2022
2 parents cb58d5f + a42c819 commit 02836f9
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 5 deletions.
2 changes: 2 additions & 0 deletions pkg/apis/cluster/v1alpha1/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ const (
EventReasonCreateExecutionSpaceFailed = "CreateExecutionSpaceFailed"
// EventReasonRemoveExecutionSpaceFailed indicates that remove execution space failed.
EventReasonRemoveExecutionSpaceFailed = "RemoveExecutionSpaceFailed"
// EventReasonTaintClusterByConditionFailed indicates that taint cluster by condition
EventReasonTaintClusterByConditionFailed = "TaintClusterByCondition"
)
55 changes: 51 additions & 4 deletions pkg/controllers/cluster/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,32 @@ const (

var (
// UnreachableTaintTemplate is the taint for when a cluster becomes unreachable.
// Used for taint based eviction.
UnreachableTaintTemplate = &corev1.Taint{
Key: clusterv1alpha1.TaintClusterUnreachable,
Effect: corev1.TaintEffectNoExecute,
}

// NotReadyTaintTemplate is the taint for when a cluster is not ready for
// executing resources.
// UnreachableTaintTemplateForSched is the taint for when a cluster becomes unreachable.
// Used for taint based schedule.
UnreachableTaintTemplateForSched = &corev1.Taint{
Key: clusterv1alpha1.TaintClusterUnreachable,
Effect: corev1.TaintEffectNoSchedule,
}

// NotReadyTaintTemplate is the taint for when a cluster is not ready for executing resources.
// Used for taint based eviction.
NotReadyTaintTemplate = &corev1.Taint{
Key: clusterv1alpha1.TaintClusterNotReady,
Effect: corev1.TaintEffectNoExecute,
}

// NotReadyTaintTemplateForSched is the taint for when a cluster is not ready for executing resources.
// Used for taint based schedule.
NotReadyTaintTemplateForSched = &corev1.Taint{
Key: clusterv1alpha1.TaintClusterNotReady,
Effect: corev1.TaintEffectNoSchedule,
}
)

// Controller is to sync Cluster.
Expand Down Expand Up @@ -145,7 +160,7 @@ func (c *Controller) Reconcile(ctx context.Context, req controllerruntime.Reques
return c.removeCluster(cluster)
}

return c.syncCluster(cluster)
return c.syncCluster(ctx, cluster)
}

// Start starts an asynchronous loop that monitors the status of cluster.
Expand Down Expand Up @@ -173,14 +188,21 @@ func (c *Controller) SetupWithManager(mgr controllerruntime.Manager) error {
})
}

func (c *Controller) syncCluster(cluster *clusterv1alpha1.Cluster) (controllerruntime.Result, error) {
func (c *Controller) syncCluster(ctx context.Context, cluster *clusterv1alpha1.Cluster) (controllerruntime.Result, error) {
// create execution space
err := c.createExecutionSpace(cluster)
if err != nil {
c.EventRecorder.Event(cluster, corev1.EventTypeWarning, fmt.Sprintf("Failed %s", clusterv1alpha1.EventReasonCreateExecutionSpaceFailed), err.Error())
return controllerruntime.Result{Requeue: true}, err
}

// taint cluster by condition
err = c.taintClusterByCondition(ctx, cluster)
if err != nil {
c.EventRecorder.Event(cluster, corev1.EventTypeWarning, fmt.Sprintf("Failed %s", clusterv1alpha1.EventReasonTaintClusterByConditionFailed), err.Error())
return controllerruntime.Result{Requeue: true}, err
}

// ensure finalizer
return c.ensureFinalizer(cluster)
}
Expand Down Expand Up @@ -516,3 +538,28 @@ func (c *Controller) processTaintBaseEviction(ctx context.Context, cluster *clus
}
return nil
}

func (c *Controller) taintClusterByCondition(ctx context.Context, cluster *clusterv1alpha1.Cluster) error {
currentReadyCondition := meta.FindStatusCondition(cluster.Status.Conditions, clusterv1alpha1.ClusterConditionReady)

if currentReadyCondition != nil {
switch currentReadyCondition.Status {
case metav1.ConditionFalse:
// Add NotReadyTaintTemplateForSched taint immediately.
if err := utilhelper.UpdateClusterControllerTaint(ctx, c.Client, []*corev1.Taint{NotReadyTaintTemplateForSched}, []*corev1.Taint{UnreachableTaintTemplateForSched}, cluster); err != nil {
klog.ErrorS(err, "Failed to instantly update UnreachableTaintForSched to NotReadyTaintForSched, will try again in the next cycle.", "cluster", cluster.Name)
}
case metav1.ConditionUnknown:
// Add UnreachableTaintTemplateForSched taint immediately.
if err := utilhelper.UpdateClusterControllerTaint(ctx, c.Client, []*corev1.Taint{UnreachableTaintTemplateForSched}, []*corev1.Taint{NotReadyTaintTemplateForSched}, cluster); err != nil {
klog.ErrorS(err, "Failed to instantly swap NotReadyTaintForSched to UnreachableTaintForSched, will try again in the next cycle.", "cluster", cluster.Name)
}
case metav1.ConditionTrue:
if err := utilhelper.UpdateClusterControllerTaint(ctx, c.Client, nil, []*corev1.Taint{NotReadyTaintTemplateForSched, UnreachableTaintTemplateForSched}, cluster); err != nil {
klog.ErrorS(err, "Failed to remove schedule taints from cluster, will retry in next iteration.", "cluster", cluster.Name)
}
}
}

return nil
}
3 changes: 2 additions & 1 deletion pkg/scheduler/core/generic_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ func (g *genericScheduler) findClustersThatFit(
defer metrics.ScheduleStep(metrics.ScheduleStepFilter, time.Now())

var out []*clusterv1alpha1.Cluster
clusters := clusterInfo.GetReadyClusters()
// DO NOT filter unhealthy cluster, let users make decisions by using ClusterTolerations of Placement.
clusters := clusterInfo.GetClusters()
for _, c := range clusters {
if result := fwk.RunFilterPlugins(ctx, placement, resource, c.Cluster()); !result.IsSuccess() {
klog.V(4).Infof("cluster %q is not fit, reason: %v", c.Cluster().Name, result.AsError())
Expand Down

0 comments on commit 02836f9

Please sign in to comment.