Skip to content

Commit

Permalink
fix(FindImageFromCluster): infer regions from deploy stages (spinnake…
Browse files Browse the repository at this point in the history
…r#2640)

FindImageFromClusterTask now looks ahead to find all regions from subsequent deploy (regular + canary) stages. 

This moves the failure point of a pipeline up when an image isn't found.

E.g. FindImage specifies to look for images in `us-east`, but a deploy right after was modified to
deploy to `us-east` AND `us-west`. These FindImage will locate the `us-east` image,
the deploy for that image will start but the deploy for `us-west` will fail.

This failure can cause regions to have different versions deployed.
To prevent this, we now will fail in the FindImage task before any deploy starts

Additionally, updated the BakeStage to take advantage of this functionality
  • Loading branch information
marchello2000 authored and ajordens committed Feb 7, 2019
1 parent 51f1e7b commit 6ee8fe9
Show file tree
Hide file tree
Showing 8 changed files with 343 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ import com.netflix.spinnaker.orca.pipeline.StageDefinitionBuilder
import com.netflix.spinnaker.orca.pipeline.TaskNode
import com.netflix.spinnaker.orca.pipeline.model.Stage
import com.netflix.spinnaker.orca.pipeline.tasks.artifacts.BindProducedArtifactsTask
import com.netflix.spinnaker.orca.pipeline.util.RegionCollector
import groovy.transform.CompileDynamic
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.stereotype.Component

import javax.annotation.Nonnull
Expand All @@ -42,6 +44,9 @@ class BakeStage implements StageDefinitionBuilder {

public static final String PIPELINE_CONFIG_TYPE = "bake"

@Autowired
RegionCollector regionCollector

@Override
void taskGraph(Stage stage, TaskNode.Builder builder) {
if (isTopLevelStage(stage)) {
Expand Down Expand Up @@ -80,31 +85,8 @@ class BakeStage implements StageDefinitionBuilder {
deployRegions.addAll(stage.context.regions as Set<String> ?: [])

if (!deployRegions.contains("global")) {
deployRegions.addAll(stage.execution.stages.findAll {
it.type == "deploy"
}.collect {
Set<String> regions = it.context?.clusters?.inject([] as Set<String>) { Set<String> accum, Map cluster ->
if (cluster.cloudProvider == stage.context.cloudProviderType) {
accum.addAll(cluster.availabilityZones?.keySet() ?: [])
}
return accum
} ?: []
if (it.context?.cluster?.cloudProvider == stage.context.cloudProviderType) {
regions.addAll(it.context?.cluster?.availabilityZones?.keySet() ?: [])
}
return regions
}.flatten())
deployRegions.addAll(regionCollector.getRegionsFromChildStages(stage))
// TODO(duftler): Also filter added canary regions once canary supports multiple platforms.
deployRegions.addAll(stage.execution.stages.findAll {
it.type == "canary"
}.collect {
Set<String> regions = it.context?.clusterPairs?.inject([] as Set<String>) { Set<String> accum, Map clusterPair ->
accum.addAll(clusterPair.baseline?.availabilityZones?.keySet() ?: [])
accum.addAll(clusterPair.canary?.availabilityZones?.keySet() ?: [])
return accum
} ?: []
return regions
}.flatten())
}

log.info("Preparing package `${stage.context.package}` for bake in ${deployRegions.join(", ")}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ package com.netflix.spinnaker.orca.bakery.pipeline

import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.pipeline.model.Stage
import com.netflix.spinnaker.orca.pipeline.util.RegionCollector
import groovy.time.TimeCategory
import spock.lang.Specification
import spock.lang.Unroll

import static com.netflix.spinnaker.orca.test.model.ExecutionBuilder.pipeline
import static com.netflix.spinnaker.orca.test.model.ExecutionBuilder.stage

Expand All @@ -34,11 +36,13 @@ class BakeStageSpec extends Specification {
type = "deploy"
name = "Deploy!"
context = zones
refId = "2"
requisiteStageRefIds = ["1"]
}
}
}

def bakeStage = new Stage(pipeline, "bake", "Bake!", bakeStageContext)
def bakeStage = new Stage(pipeline, "bake", "Bake!", bakeStageContext + [refId: "1"])
def builder = Spy(BakeStage, {
(0..1) * now() >> {
use([TimeCategory]) {
Expand All @@ -47,6 +51,8 @@ class BakeStageSpec extends Specification {
}
})

builder.regionCollector = new RegionCollector()

when:
def parallelContexts = builder.parallelContexts(bakeStage)

Expand Down Expand Up @@ -93,7 +99,8 @@ class BakeStageSpec extends Specification {
}

def bakeStage = pipeline.stageById("1")
def parallelStages = new BakeStage().parallelStages(bakeStage)
def parallelStages = new BakeStage(regionCollector: new RegionCollector()).parallelStages(bakeStage)

parallelStages.eachWithIndex { it, idx -> it.context.ami = idx + 1 }
pipeline.stages.addAll(parallelStages)

Expand Down Expand Up @@ -127,7 +134,7 @@ class BakeStageSpec extends Specification {
private
static List<Map> expectedContexts(String cloudProvider, String amiSuffix, String... regions) {
return regions.collect {
[cloudProviderType: cloudProvider, amiSuffix: amiSuffix, type: BakeStage.PIPELINE_CONFIG_TYPE, "region": it, name: "Bake in ${it}"]
[cloudProviderType: cloudProvider, amiSuffix: amiSuffix, type: BakeStage.PIPELINE_CONFIG_TYPE, "region": it, name: "Bake in ${it}", refId: "1"]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import com.netflix.spinnaker.orca.clouddriver.OortService
import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.support.Location
import com.netflix.spinnaker.orca.clouddriver.tasks.AbstractCloudProviderAwareTask
import com.netflix.spinnaker.orca.pipeline.model.Stage
import com.netflix.spinnaker.orca.pipeline.util.RegionCollector
import groovy.transform.Canonical
import groovy.util.logging.Slf4j
import org.springframework.beans.factory.annotation.Autowired
Expand Down Expand Up @@ -80,6 +81,9 @@ class FindImageFromClusterTask extends AbstractCloudProviderAwareTask implements
@Autowired
ObjectMapper objectMapper

@Autowired
RegionCollector regionCollector

@Canonical
static class FindImageConfiguration {
String cluster
Expand Down Expand Up @@ -119,6 +123,16 @@ class FindImageFromClusterTask extends AbstractCloudProviderAwareTask implements
Set<String> imageNames = []
Map<Location, String> imageIds = [:]

// Supplement config with regions from subsequent deploy/canary stages:
def deployRegions = regionCollector.getRegionsFromChildStages(stage)

deployRegions.forEach {
if (!config.regions.contains(it)) {
config.regions.add(it)
log.info("Inferred and added region ($it) from deploy stage to FindImageFromClusterTask (executionId: ${stage.execution.id})")
}
}

Map<Location, List<Map<String, Object>>> imageSummaries = config.requiredLocations.collectEntries { location ->
try {
def lookupResults = oortService.getServerGroupSummary(
Expand Down Expand Up @@ -164,11 +178,13 @@ class FindImageFromClusterTask extends AbstractCloudProviderAwareTask implements

if (!locationsWithMissingImageIds.isEmpty()) {
// signifies that at least one summary was missing image details, let's retry until we see image details
log.warn("One or more locations are missing image details (locations: ${locationsWithMissingImageIds*.value}, cluster: ${config.cluster}, account: ${account})")
log.warn("One or more locations are missing image details (locations: ${locationsWithMissingImageIds*.value}, cluster: ${config.cluster}, account: ${account}, executionId: ${stage.execution.id})")
return new TaskResult(ExecutionStatus.RUNNING)
}

if (missingLocations) {
log.info("Resolving images in missing locations: ${missingLocations.collect({it -> it.value}).join(",")}, executionId ${stage.execution.id}")

Set<String> searchNames = extractBaseImageNames(imageNames)
if (searchNames.size() != 1) {
throw new IllegalStateException("Request to resolve images for missing ${config.requiredLocations.first().pluralType()} requires exactly one image. (Found ${searchNames}, missing locations: ${missingLocations*.value.join(',')})")
Expand All @@ -180,15 +196,15 @@ class FindImageFromClusterTask extends AbstractCloudProviderAwareTask implements
}

List<Map> images = oortService.findImage(cloudProvider, searchNames[0] + '*', account, null, null)
resolveFromBaseImageName(images, missingLocations, imageSummaries, deploymentDetailTemplate, config)
resolveFromBaseImageName(images, missingLocations, imageSummaries, deploymentDetailTemplate, config, stage.execution.id)

def unresolved = imageSummaries.findResults { it.value == null ? it.key : null }
if (unresolved) {
if (cloudProvider == 'aws') {
// fallback to look it default bake account; the deploy operation will execute the allowLaunchOperation to share
// the image into the target account
List<Map> defaultImages = oortService.findImage(cloudProvider, searchNames[0] + '*', defaultBakeAccount, null, null)
resolveFromBaseImageName(defaultImages, missingLocations, imageSummaries, deploymentDetailTemplate, config)
resolveFromBaseImageName(defaultImages, missingLocations, imageSummaries, deploymentDetailTemplate, config, stage.execution.id)
def stillUnresolved = imageSummaries.findResults { it.value == null ? it.key : null }
if (stillUnresolved) {
throw new IllegalStateException("Missing images in $stillUnresolved.value")
Expand Down Expand Up @@ -275,10 +291,19 @@ class FindImageFromClusterTask extends AbstractCloudProviderAwareTask implements
])
}

private void resolveFromBaseImageName(List<Map> images, ArrayList<Location> missingLocations, Map<Location, List<Map<String, Object>>> imageSummaries, Map<String, Object> deploymentDetailTemplate, FindImageConfiguration config) {
private void resolveFromBaseImageName(
List<Map> images,
ArrayList<Location> missingLocations,
Map<Location, List<Map<String, Object>>> imageSummaries,
Map<String, Object> deploymentDetailTemplate,
FindImageConfiguration config,
String executionId
) {
for (Map image : images) {
for (Location location : missingLocations) {
if (imageSummaries[location] == null && image.amis && image.amis[location.value]) {
log.info("Resolved missing image in '$location.value' with '$image.imageName' (executionId: $executionId)")

imageSummaries[location] = [
mkDeploymentDetail((String) image.imageName, (String) image.amis[location.value][0], deploymentDetailTemplate, config)
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ import com.fasterxml.jackson.databind.ObjectMapper
import com.netflix.spinnaker.orca.ExecutionStatus
import com.netflix.spinnaker.orca.clouddriver.OortService
import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.support.Location
import com.netflix.spinnaker.orca.pipeline.model.Execution
import com.netflix.spinnaker.orca.pipeline.model.Stage
import com.netflix.spinnaker.orca.pipeline.util.RegionCollector
import retrofit.RetrofitError
import retrofit.client.Response
import retrofit.mime.TypedString
Expand All @@ -33,12 +33,16 @@ import static com.netflix.spinnaker.orca.test.model.ExecutionBuilder.pipeline
class FindImageFromClusterTaskSpec extends Specification {

@Subject
task = new FindImageFromClusterTask()
task = new FindImageFromClusterTask()
OortService oortService = Mock(OortService)
RegionCollector regionCollector = Mock(RegionCollector)

def setup() {
regionCollector.getRegionsFromChildStages(_ as Stage) >> { stage -> new HashSet<String>() }

task.oortService = oortService
task.objectMapper = new ObjectMapper()
task.regionCollector = regionCollector
}

@Unroll
Expand Down Expand Up @@ -98,7 +102,11 @@ class FindImageFromClusterTaskSpec extends Specification {

def "should be RUNNING if summary does not include imageId"() {
given:
def stage = new Stage(Execution.newPipeline("orca"), "findImage", [
def pipe = pipeline {
application = "orca" // Should be ignored.
}

def stage = new Stage(pipe, "findImage", [
cloudProvider : "cloudProvider",
cluster : "foo-test",
account : "test",
Expand Down Expand Up @@ -186,7 +194,10 @@ class FindImageFromClusterTaskSpec extends Specification {
account : "test",
selectionStrategy : "LARGEST",
onlyEnabled : "false",
regions : [location1.value, location2.value]
regions : [
location1.value
//Note: location2.value will come from regionCollectorResponse below
]
])

when:
Expand All @@ -200,6 +211,8 @@ class FindImageFromClusterTaskSpec extends Specification {
throw RetrofitError.httpError("http://clouddriver", new Response("http://clouddriver", 404, 'Not Found', [], new TypedString("{}")), null, Map)
}
1 * oortService.findImage("cloudProvider", "ami-012-name-ebs*", "test", null, null) >> imageSearchResult
1 * regionCollector.getRegionsFromChildStages(stage) >> regionCollectorResponse

assertNorth(result.outputs?.deploymentDetails?.find {
it.region == "north"
} as Map, [imageName: "ami-012-name-ebs"])
Expand All @@ -221,6 +234,8 @@ class FindImageFromClusterTaskSpec extends Specification {
]]
]

regionCollectorResponse = [location2.value]

imageSearchResult = [
[
imageName: "ami-012-name-ebs",
Expand Down Expand Up @@ -366,7 +381,11 @@ class FindImageFromClusterTaskSpec extends Specification {

def "should parse fail strategy error message"() {
given:
def stage = new Stage(Execution.newPipeline("orca"), "whatever", [
def pipe = pipeline {
application = "orca" // Should be ignored.
}

def stage = new Stage(pipe, "whatever", [
cloudProvider : "cloudProvider",
cluster : "foo-test",
account : "test",
Expand Down Expand Up @@ -402,7 +421,10 @@ class FindImageFromClusterTaskSpec extends Specification {
@Unroll
'cluster with name "#cluster" and moniker "#moniker" should have application name "#expected"'() {
given:
def stage = new Stage(Execution.newPipeline("orca"), 'findImageFromCluster', [
def pipe = pipeline {
application = "orca" // Should be ignored.
}
def stage = new Stage(pipe, 'findImageFromCluster', [
cluster: cluster,
moniker: moniker,
])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,41 @@ private List<Stage> ancestorsOnly(Set<String> visited) {
}
}

/**
* Recursively get all stages that are children of the current one
*/
public List<Stage> allDownstreamStages() {
if (execution != null) {
HashSet<String> visited = new HashSet<>();
LinkedList<Stage> queue = new LinkedList<>();
List<Stage> children = new ArrayList<>();

queue.push(this);
boolean first = true;

while (!queue.isEmpty()) {
Stage stage = queue.pop();
if (!first) {
children.add(stage);
}

first = false;
visited.add(stage.refId);

List<Stage> childStages = stage.downstreamStages();

childStages
.stream()
.filter(s -> !visited.contains(s.refId))
.forEach(s -> queue.add(s));
}

return children;
}

return emptyList();
}

/**
* Maps the stage's context to a typed object
*/
Expand Down
Loading

0 comments on commit 6ee8fe9

Please sign in to comment.