Skip to content

Commit

Permalink
fix!: improve basic canary approximation accuracy and honor maxSurge (a…
Browse files Browse the repository at this point in the history
…rgoproj#1759)

Signed-off-by: Jesse Suen <[email protected]>
  • Loading branch information
jessesuen authored Jan 14, 2022
1 parent 4ee0365 commit 0f93f9b
Show file tree
Hide file tree
Showing 4 changed files with 221 additions and 75 deletions.
8 changes: 4 additions & 4 deletions rollout/analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ func TestDoNothingWithAnalysisRunsWhileBackgroundAnalysisRunRunning(t *testing.T
SetWeight: pointer.Int32Ptr(10),
}}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)
r2.Spec.Strategy.Canary.Analysis = &v1alpha1.RolloutAnalysisBackground{
RolloutAnalysis: v1alpha1.RolloutAnalysis{
Expand Down Expand Up @@ -792,7 +792,7 @@ func TestDoNothingWhileStepBasedAnalysisRunRunning(t *testing.T) {
},
}}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)
ar := analysisRun(at, v1alpha1.RolloutTypeStepLabel, r2)
ar.Status.Phase = v1alpha1.AnalysisPhaseRunning
Expand Down Expand Up @@ -1067,7 +1067,7 @@ func TestPausedOnInconclusiveBackgroundAnalysisRun(t *testing.T) {
{SetWeight: pointer.Int32Ptr(30)},
}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)
ar := analysisRun(at, v1alpha1.RolloutTypeBackgroundRunLabel, r2)
r2.Spec.Strategy.Canary.Analysis = &v1alpha1.RolloutAnalysisBackground{
Expand Down Expand Up @@ -1447,7 +1447,7 @@ func TestDoNotCreateBackgroundAnalysisRunAfterInconclusiveRun(t *testing.T) {
{SetWeight: pointer.Int32Ptr(10)},
}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)
r2.Spec.Strategy.Canary.Analysis = &v1alpha1.RolloutAnalysisBackground{
RolloutAnalysis: v1alpha1.RolloutAnalysis{
Expand Down
4 changes: 2 additions & 2 deletions rollout/experiment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ func TestPauseRolloutAfterInconclusiveExperiment(t *testing.T) {
Experiment: &v1alpha1.RolloutExperimentStep{},
}}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)

rs1 := newReplicaSetWithStatus(r1, 1, 1)
Expand Down Expand Up @@ -373,7 +373,7 @@ func TestRolloutExperimentScaleDownExperimentFromPreviousStep(t *testing.T) {
{SetWeight: pointer.Int32Ptr(1)},
}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(1), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(1), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)

rs1 := newReplicaSetWithStatus(r1, 1, 1)
Expand Down
147 changes: 80 additions & 67 deletions utils/replicaset/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,56 +43,6 @@ func AtDesiredReplicaCountsForCanary(ro *v1alpha1.Rollout, newRS, stableRS *apps
return true
}

/*
// AtDesiredReplicaCountsForCanary indicates if the rollout is at the desired state for the current step
func AtDesiredReplicaCountsForCanary(rollout *v1alpha1.Rollout, newRS, stableRS *appsv1.ReplicaSet, olderRSs []*appsv1.ReplicaSet) bool {
desiredNewRSReplicaCount, desiredStableRSReplicaCount := DesiredReplicaCountsForCanary(rollout, newRS, stableRS)
if newRS == nil || desiredNewRSReplicaCount != *newRS.Spec.Replicas || desiredNewRSReplicaCount != newRS.Status.AvailableReplicas {
return false
}
if stableRS == nil || desiredStableRSReplicaCount != *stableRS.Spec.Replicas || desiredStableRSReplicaCount != stableRS.Status.AvailableReplicas {
return false
}
if GetAvailableReplicaCountForReplicaSets(olderRSs) != int32(0) {
return false
}
return true
}
*/

/*
//DesiredReplicaCountsForCanary calculates the desired endstate replica count for the new and stable replicasets
func DesiredReplicaCountsForCanary(rollout *v1alpha1.Rollout, newRS, stableRS *appsv1.ReplicaSet) (int32, int32) {
rolloutSpecReplica := defaults.GetReplicasOrDefault(rollout.Spec.Replicas)
replicas, weight := GetCanaryReplicasOrWeight(rollout)
desiredNewRSReplicaCount := int32(0)
desiredStableRSReplicaCount := int32(0)
if replicas != nil {
desiredNewRSReplicaCount = *replicas
desiredStableRSReplicaCount = rolloutSpecReplica
} else {
desiredNewRSReplicaCount = int32(math.Ceil(float64(rolloutSpecReplica) * (float64(weight) / 100)))
desiredStableRSReplicaCount = int32(math.Ceil(float64(rolloutSpecReplica) * (1 - (float64(weight) / 100))))
}
if !CheckStableRSExists(newRS, stableRS) {
// If there is no stableRS or it is the same as the newRS, then the rollout does not follow the canary steps.
// Instead the controller tries to get the newRS to 100% traffic.
desiredNewRSReplicaCount = rolloutSpecReplica
desiredStableRSReplicaCount = 0
}
// Unlike the ReplicaSet based weighted canary, a service mesh/ingress
// based canary leaves the stable as 100% scaled until the rollout completes.
if rollout.Spec.Strategy.Canary.TrafficRouting != nil {
desiredStableRSReplicaCount = rolloutSpecReplica
}
return desiredNewRSReplicaCount, desiredStableRSReplicaCount
}
*/

// CalculateReplicaCountsForBasicCanary calculates the number of replicas for the newRS and the stableRS
// when using the basic canary strategy. The function calculates the desired number of replicas for
// the new and stable RS using the following equations:
Expand Down Expand Up @@ -131,9 +81,9 @@ func DesiredReplicaCountsForCanary(rollout *v1alpha1.Rollout, newRS, stableRS *a
func CalculateReplicaCountsForBasicCanary(rollout *v1alpha1.Rollout, newRS *appsv1.ReplicaSet, stableRS *appsv1.ReplicaSet, oldRSs []*appsv1.ReplicaSet) (int32, int32) {
rolloutSpecReplica := defaults.GetReplicasOrDefault(rollout.Spec.Replicas)
_, desiredWeight := GetCanaryReplicasOrWeight(rollout)
maxSurge := MaxSurge(rollout)

desiredStableRSReplicaCount := int32(math.Ceil(float64(rolloutSpecReplica) * (1 - (float64(desiredWeight) / 100))))
desiredNewRSReplicaCount := int32(math.Ceil(float64(rolloutSpecReplica) * (float64(desiredWeight) / 100)))
desiredNewRSReplicaCount, desiredStableRSReplicaCount := approximateWeightedCanaryStableReplicaCounts(rolloutSpecReplica, desiredWeight, maxSurge)

stableRSReplicaCount := int32(0)
newRSReplicaCount := int32(0)
Expand All @@ -151,13 +101,6 @@ func CalculateReplicaCountsForBasicCanary(rollout *v1alpha1.Rollout, newRS *apps
desiredStableRSReplicaCount = 0
}

maxSurge := MaxSurge(rollout)

if extraReplicaAdded(rolloutSpecReplica, desiredWeight) {
// In the case where the weight of the stable and canary replica counts cannot be divided evenly,
// the controller needs to surges by one to account for both replica counts being rounded up.
maxSurge = maxSurge + 1
}
maxReplicaCountAllowed := rolloutSpecReplica + maxSurge

allRSs := append(oldRSs, newRS)
Expand Down Expand Up @@ -223,6 +166,84 @@ func CalculateReplicaCountsForBasicCanary(rollout *v1alpha1.Rollout, newRS *apps
return newRSReplicaCount, stableRSReplicaCount
}

// approximateWeightedCanaryStableReplicaCounts approximates the desired canary weight and returns
// the closest replica count values for the canary and stable to reach the desired weight. The
// canary/stable replica counts might sum to either spec.replicas or spec.replicas + 1 but will not
// exceed spec.replicas if maxSurge is 0. If the canary weight is between 1-99, and spec.replicas is > 1,
// we will always return a minimum of 1 for stable and canary as to not return 0.
func approximateWeightedCanaryStableReplicaCounts(specReplicas, desiredWeight, maxSurge int32) (int32, int32) {
if specReplicas == 0 {
return 0, 0
}
// canaryOption is one potential return value of this function. We will evaluate multiple options
// for the canary count in order to best approximate the desired weight.
type canaryOption struct {
canary int32
total int32
}
var options []canaryOption

ceilWeightedCanaryCount := int32(math.Ceil(float64(specReplicas*desiredWeight) / 100.0))
floorWeightedCanaryCount := int32(math.Floor(float64(specReplicas*desiredWeight) / 100.0))

tied := floorCeilingTied(desiredWeight, specReplicas)

// zeroAllowed indicates if are allowed to return the floored value if it is zero. We don't allow
// the value to be zero if when user has a weight from 1-99, and they run 2+ replicas (surge included)
zeroAllowed := desiredWeight == 100 || desiredWeight == 0 || (specReplicas == 1 && maxSurge == 0)

if ceilWeightedCanaryCount < specReplicas || zeroAllowed {
options = append(options, canaryOption{ceilWeightedCanaryCount, specReplicas})
}

if !tied && (floorWeightedCanaryCount != 0 || zeroAllowed) {
options = append(options, canaryOption{floorWeightedCanaryCount, specReplicas})
}

// check if we are allowed to surge. if we are, we can also consider rounding up to spec.replicas + 1
// in order to achieve a closer canary weight
if maxSurge > 0 {
options = append(options, canaryOption{ceilWeightedCanaryCount, specReplicas + 1})
surgeIsTied := floorCeilingTied(desiredWeight, specReplicas+1)
if !surgeIsTied && (floorWeightedCanaryCount != 0 || zeroAllowed) {
options = append(options, canaryOption{floorWeightedCanaryCount, specReplicas + 1})
}
}

if len(options) == 0 {
// should not get here
return 0, specReplicas
}

bestOption := options[0]
bestDelta := weightDelta(desiredWeight, bestOption.canary, bestOption.total)
for i := 1; i < len(options); i++ {
currOption := options[i]
currDelta := weightDelta(desiredWeight, currOption.canary, currOption.total)
if currDelta < bestDelta {
bestOption = currOption
bestDelta = currDelta
}
}
return bestOption.canary, bestOption.total - bestOption.canary
}

// floorCeilingTied indicates if the ceiling and floor values are equidistant from the desired weight
// For example: replicas: 3, desiredWeight: 50%
// A canary count of 1 (33.33%) or 2 (66.66%) are both equidistant from desired weight of 50%.
// When this happens, we will pick the larger canary count
func floorCeilingTied(desiredWeight, totalReplicas int32) bool {
_, frac := math.Modf(float64(totalReplicas) * (float64(desiredWeight) / 100))
return frac == 0.5
}

// weightDelta calculates the difference that the canary replicas will be from the desired weight
// This is used to pick the closest approximation of canary counts.
func weightDelta(desiredWeight, canaryReplicas, totalReplicas int32) float64 {
actualWeight := float64(canaryReplicas*100) / float64(totalReplicas)
return math.Abs(actualWeight - float64(desiredWeight))
}

// calculateScaleDownReplicaCount calculates drainRSReplicaCount
// drainRSReplicaCount can be either stableRS count or canaryRS count
// drainRSReplicaCount corresponds to RS whose availability is not considered in calculating replicasToScaleDown
Expand Down Expand Up @@ -397,14 +418,6 @@ func GetReplicasForScaleDown(rs *appsv1.ReplicaSet, ignoreAvailability bool) int
return rs.Status.AvailableReplicas
}

// extraReplicaAdded checks if an extra replica is added because the stable and canary replicas count are both
// rounded up. The controller rounds both of the replica counts when the setWeight does not distribute evenly
// in order to prevent either from having a 0 replica count.
func extraReplicaAdded(replicas int32, setWeight int32) bool {
_, frac := math.Modf(float64(replicas) * (float64(setWeight) / 100))
return frac != 0.0
}

// GetCurrentCanaryStep returns the current canary step. If there are no steps or the rollout
// has already executed the last step, the func returns nil
func GetCurrentCanaryStep(rollout *v1alpha1.Rollout) (*v1alpha1.CanaryStep, *int32) {
Expand Down
Loading

0 comments on commit 0f93f9b

Please sign in to comment.