Skip to content

Commit

Permalink
[DEVCON-2524] output deploy queue to checkrun summary (#772)
Browse files Browse the repository at this point in the history
### Issue:
Previously, when a revision is queued, we mark the github status as
queued but have no other indication in the check run itself that this
check run is waiting on something.
Users have to figure out which check run is holding up the queue which
is difficult because deploys for a root can happen off any branch. We
can figure this out using the temporal UI however it would be a better
CX if we could just surface the queue itself in the check run.

### Part 1
Add queue info to checkrun summary on creating new check run or unlock
signal. Queue info is shown as [commit SHA](link to specific check run)

<img width="1224" alt="Screenshot 2024-10-31 at 1 23 43 PM"
src="https://github.com/user-attachments/assets/7e6e176c-4c7f-4858-bd6f-41d29b743e6e">


### Part 2
Check run summaries do not get updated in cases where a a Terraform
workflow is pending Confirm/Reject checkrun action between Plan and
Apply steps, since the deploy queue lock only tracks the "Unlock"
checkrun action.
* Surfaces deploy queue from queue package to deploy/terraform package.
Some complication using Queue structs here due to cyclic dependencies
between the two packages.
* To avoid restructuring the two packages, instead we will pass the
GithubCheckRunCache (github package) over from queue worker to
deploy/terraform workflow.
* StateReceiver will use GithubCheckRunCache to update checkrun
summaries for each revision on the queue when a Confirm/Reject action is
pending.

<img width="1044" alt="Screenshot 2024-11-04 at 12 46 37 PM"
src="https://github.com/user-attachments/assets/508e9376-8362-46cd-80c9-653a218ddcf4">

### Test Plan

1. Recreating Unlock action: open 2 PRs, run `atlantis apply -f` on PR
1, and try to merge PR 2. PR 2 will be locked by confirm reject action
on PR 1. Any subsequent queued deploys will show up as queued commits
(with corresponding links) in checkrun summary.
2. Recreating diverged confirm/reject action: run `atlantis apply -f` on
unmerged PR 1 and confirm. Create PR 3 and run `atlantis apply -f` on
unmerged PR 3.
3. Merged PR 1 deploy will update to to show pending action on PR 3.
Links will go directly to the specific run instead of just the commit in
case there were multiple forced applies on PR 1.

### Improve Test Cases
Fix queue_test and updater_test where assertions in CreateOrUpdate were
not getting caught since the function is called within a temporal
worker.
  • Loading branch information
tlin4194 authored Dec 3, 2024
1 parent b5f74f6 commit 58ff899
Show file tree
Hide file tree
Showing 17 changed files with 359 additions and 117 deletions.
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ issues:
- dogsled
linters-settings:
interfacebloat:
max: 6
max: 7
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,8 @@ func BuildRevisionURLMarkdown(repoFullName string, revision string) string {
// uses Markdown formatting to generate the link on GH
return fmt.Sprintf("[%s](https://github.com/%s/commit/%s)", revision, repoFullName, revision)
}

func BuildRunURLMarkdown(repoFullName string, revision string, runId int64) string {
// uses Markdown formatting to generate the link on GH
return fmt.Sprintf("[%s](https://github.com/%s/runs/%d)", revision, repoFullName, runId)
}
15 changes: 15 additions & 0 deletions server/neptune/workflows/internal/deploy/lock/lock.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package lock

type LockStatus int

type LockState struct {
Revision string
Status LockStatus
}

const (
UnlockedStatus LockStatus = iota
LockedStatus

QueueDepthStat = "queue.depth"
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/pkg/errors"
"github.com/runatlantis/atlantis/server/neptune/workflows/activities"
"github.com/runatlantis/atlantis/server/neptune/workflows/activities/github"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/lock"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/revision/queue"
"github.com/slack-go/slack"
"go.temporal.io/sdk/workflow"
Expand All @@ -24,7 +25,7 @@ type Slack struct {
func (s *Slack) Notify(ctx workflow.Context) error {
state := s.DeployQueue.GetLockState()

if state.Status == queue.UnlockedStatus {
if state.Status == lock.UnlockedStatus {
return nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/runatlantis/atlantis/server/neptune/workflows/activities"
"github.com/runatlantis/atlantis/server/neptune/workflows/activities/github"
terraformActivities "github.com/runatlantis/atlantis/server/neptune/workflows/activities/terraform"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/lock"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/revision/notifier"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/revision/queue"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/terraform"
Expand All @@ -20,7 +21,7 @@ import (
)

type request struct {
LockState queue.LockState
LockState lock.LockState
InitialItems []terraform.DeploymentInfo
}

Expand Down Expand Up @@ -51,7 +52,7 @@ func testWorkflow(ctx workflow.Context, request request) error {

func TestNotifier(t *testing.T) {
t.Run("empty queue", func(t *testing.T) {
state := queue.LockState{Status: queue.UnlockedStatus}
state := lock.LockState{Status: lock.UnlockedStatus}
ts := testsuite.WorkflowTestSuite{}
env := ts.NewTestWorkflowEnvironment()

Expand All @@ -68,7 +69,7 @@ func TestNotifier(t *testing.T) {
})

t.Run("locked state", func(t *testing.T) {
state := queue.LockState{Status: queue.LockedStatus}
state := lock.LockState{Status: lock.LockedStatus}
ts := testsuite.WorkflowTestSuite{}
env := ts.NewTestWorkflowEnvironment()

Expand All @@ -85,7 +86,7 @@ func TestNotifier(t *testing.T) {
})

t.Run("no slack config", func(t *testing.T) {
state := queue.LockState{Status: queue.LockedStatus}
state := lock.LockState{Status: lock.LockedStatus}
ts := testsuite.WorkflowTestSuite{}
env := ts.NewTestWorkflowEnvironment()

Expand Down Expand Up @@ -121,7 +122,7 @@ func TestNotifier(t *testing.T) {
})

t.Run("activity called", func(t *testing.T) {
state := queue.LockState{Status: queue.LockedStatus}
state := lock.LockState{Status: lock.LockedStatus}
ts := testsuite.WorkflowTestSuite{}
env := ts.NewTestWorkflowEnvironment()

Expand Down
43 changes: 22 additions & 21 deletions server/neptune/workflows/internal/deploy/revision/queue/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,23 @@ package queue
import (
"container/list"
"fmt"
"strings"

"github.com/runatlantis/atlantis/server/neptune/workflows/activities/github"
activity "github.com/runatlantis/atlantis/server/neptune/workflows/activities/terraform"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/lock"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/terraform"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/metrics"
"go.temporal.io/sdk/workflow"
)

type LockStatus int

type LockState struct {
Revision string
Status LockStatus
}

const (
UnlockedStatus LockStatus = iota
LockedStatus

QueueDepthStat = "queue.depth"
)

type Deploy struct {
queue *priority
lockStatusCallback func(workflow.Context, *Deploy)
scope metrics.Scope

// mutable: default is unlocked
lock LockState
lock lock.LockState
}

func NewQueue(callback func(workflow.Context, *Deploy), scope metrics.Scope) *Deploy {
Expand All @@ -41,12 +30,12 @@ func NewQueue(callback func(workflow.Context, *Deploy), scope metrics.Scope) *De
}
}

func (q *Deploy) GetLockState() LockState {
func (q *Deploy) GetLockState() lock.LockState {
return q.lock
}

func (q *Deploy) SetLockForMergedItems(ctx workflow.Context, state LockState) {
if state.Status == LockedStatus {
func (q *Deploy) SetLockForMergedItems(ctx workflow.Context, state lock.LockState) {
if state.Status == lock.LockedStatus {
q.scope.Counter("locked").Inc(1)
} else {
q.scope.Counter("unlocked").Inc(1)
Expand All @@ -56,11 +45,11 @@ func (q *Deploy) SetLockForMergedItems(ctx workflow.Context, state LockState) {
}

func (q *Deploy) CanPop() bool {
return q.queue.HasItemsOfPriority(High) || (q.lock.Status == UnlockedStatus && !q.queue.IsEmpty())
return q.queue.HasItemsOfPriority(High) || (q.lock.Status == lock.UnlockedStatus && !q.queue.IsEmpty())
}

func (q *Deploy) Pop() (terraform.DeploymentInfo, error) {
defer q.scope.Gauge(QueueDepthStat).Update(float64(q.queue.Size()))
defer q.scope.Gauge(lock.QueueDepthStat).Update(float64(q.queue.Size()))
return q.queue.Pop()
}

Expand All @@ -77,14 +66,26 @@ func (q *Deploy) IsEmpty() bool {
}

func (q *Deploy) Push(msg terraform.DeploymentInfo) {
defer q.scope.Gauge(QueueDepthStat).Update(float64(q.queue.Size()))
defer q.scope.Gauge(lock.QueueDepthStat).Update(float64(q.queue.Size()))
if msg.Root.TriggerInfo.Type == activity.ManualTrigger {
q.queue.Push(msg, High)
return
}
q.queue.Push(msg, Low)
}

func (q *Deploy) GetQueuedRevisionsSummary() string {
var revisions []string
if q.IsEmpty() {
return "No other runs ahead in queue."
}
for _, deploy := range q.Scan() {
runLink := github.BuildRunURLMarkdown(deploy.Repo.GetFullName(), deploy.Commit.Revision, deploy.CheckRunID)
revisions = append(revisions, runLink)
}
return fmt.Sprintf("Runs in queue: %s", strings.Join(revisions, ", "))
}

// priority is a simple 2 priority queue implementation
// priority is determined before an item enters a queue and does not change
type priority struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (

"github.com/runatlantis/atlantis/server/neptune/workflows/activities/github"
activity "github.com/runatlantis/atlantis/server/neptune/workflows/activities/terraform"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/lock"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/revision/queue"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/terraform"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/metrics"
Expand Down Expand Up @@ -38,8 +39,8 @@ func TestQueue(t *testing.T) {
q := queue.NewQueue(func(ctx workflow.Context, d *queue.Deploy) {
called = true
}, metrics.NewNullableScope())
q.SetLockForMergedItems(test.Background(), queue.LockState{
Status: queue.LockedStatus,
q.SetLockForMergedItems(test.Background(), lock.LockState{
Status: lock.LockedStatus,
})

assert.True(t, called)
Expand All @@ -52,17 +53,17 @@ func TestQueue(t *testing.T) {

t.Run("can pop empty queue locked", func(t *testing.T) {
q := queue.NewQueue(noopCallback, metrics.NewNullableScope())
q.SetLockForMergedItems(test.Background(), queue.LockState{
Status: queue.LockedStatus,
q.SetLockForMergedItems(test.Background(), lock.LockState{
Status: lock.LockedStatus,
})
assert.Equal(t, false, q.CanPop())
})
t.Run("can pop manual trigger locked", func(t *testing.T) {
q := queue.NewQueue(noopCallback, metrics.NewNullableScope())
msg1 := wrap("1", activity.ManualTrigger)
q.Push(msg1)
q.SetLockForMergedItems(test.Background(), queue.LockState{
Status: queue.LockedStatus,
q.SetLockForMergedItems(test.Background(), lock.LockState{
Status: lock.LockedStatus,
})
assert.Equal(t, true, q.CanPop())
})
Expand All @@ -76,8 +77,8 @@ func TestQueue(t *testing.T) {
q := queue.NewQueue(noopCallback, metrics.NewNullableScope())
msg1 := wrap("1", activity.MergeTrigger)
q.Push(msg1)
q.SetLockForMergedItems(test.Background(), queue.LockState{
Status: queue.LockedStatus,
q.SetLockForMergedItems(test.Background(), lock.LockState{
Status: lock.LockedStatus,
})
assert.Equal(t, false, q.CanPop())
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package queue

import (
"fmt"

key "github.com/runatlantis/atlantis/server/neptune/context"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/notifier"

"github.com/runatlantis/atlantis/server/neptune/workflows/activities/github"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/lock"
"go.temporal.io/sdk/workflow"
)

Expand All @@ -19,17 +21,18 @@ type LockStateUpdater struct {
}

func (u *LockStateUpdater) UpdateQueuedRevisions(ctx workflow.Context, queue *Deploy, repoFullName string) {
lock := queue.GetLockState()
queueLock := queue.GetLockState()
infos := queue.GetOrderedMergedItems()

var actions []github.CheckRunAction
var summary string
var revisionsSummary string = queue.GetQueuedRevisionsSummary()
state := github.CheckRunQueued
if lock.Status == LockedStatus {
if queueLock.Status == lock.LockedStatus {
actions = append(actions, github.CreateUnlockAction())
state = github.CheckRunActionRequired
revisionLink := github.BuildRevisionURLMarkdown(repoFullName, lock.Revision)
summary = fmt.Sprintf("This deploy is locked from a manual deployment for revision %s. Unlock to proceed.", revisionLink)
revisionLink := github.BuildRevisionURLMarkdown(repoFullName, queueLock.Revision)
summary = fmt.Sprintf("This deploy is locked from a manual deployment for revision %s. Unlock to proceed.\n%s", revisionLink, revisionsSummary)
}

for _, i := range infos {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
package queue_test

import (
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/notifier"
"testing"
"time"

"github.com/runatlantis/atlantis/server/neptune/workflows/internal/notifier"

"github.com/google/uuid"
"github.com/runatlantis/atlantis/server/neptune/workflows/activities"
"github.com/runatlantis/atlantis/server/neptune/workflows/activities/github"
tfActivity "github.com/runatlantis/atlantis/server/neptune/workflows/activities/terraform"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/lock"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/revision/queue"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/terraform"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/metrics"
Expand All @@ -25,9 +27,13 @@ type testCheckRunClient struct {
}

func (t *testCheckRunClient) CreateOrUpdate(ctx workflow.Context, deploymentID string, request notifier.GithubCheckRunRequest) (int64, error) {
assert.Equal(t.expectedT, t.expectedRequest, request)
assert.Equal(t.expectedT, t.expectedDeploymentID, deploymentID)
switch {
case assert.Equal(t.expectedT, t.expectedRequest, request):

case assert.Equal(t.expectedT, t.expectedDeploymentID, deploymentID):
default:
t.expectedT.FailNow()
}
return 1, nil
}

Expand Down Expand Up @@ -121,8 +127,8 @@ func TestLockStateUpdater_locked_new_version(t *testing.T) {

env.ExecuteWorkflow(testUpdaterWorkflow, updaterReq{
Queue: []terraform.DeploymentInfo{info},
Lock: queue.LockState{
Status: queue.LockedStatus,
Lock: lock.LockState{
Status: lock.LockedStatus,
Revision: "1234",
},
ExpectedRequest: notifier.GithubCheckRunRequest{
Expand All @@ -147,7 +153,7 @@ func TestLockStateUpdater_locked_new_version(t *testing.T) {

type updaterReq struct {
Queue []terraform.DeploymentInfo
Lock queue.LockState
Lock lock.LockState
ExpectedRequest notifier.GithubCheckRunRequest
ExpectedDeploymentID string
ExpectedT *testing.T
Expand Down
16 changes: 10 additions & 6 deletions server/neptune/workflows/internal/deploy/revision/queue/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
internalContext "github.com/runatlantis/atlantis/server/neptune/context"
"github.com/runatlantis/atlantis/server/neptune/workflows/activities/deployment"
tfModel "github.com/runatlantis/atlantis/server/neptune/workflows/activities/terraform"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/lock"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/deploy/terraform"
"github.com/runatlantis/atlantis/server/neptune/workflows/internal/metrics"
"github.com/runatlantis/atlantis/server/neptune/workflows/plugins"
Expand All @@ -24,7 +25,10 @@ type queue interface {
IsEmpty() bool
CanPop() bool
Pop() (terraform.DeploymentInfo, error)
SetLockForMergedItems(ctx workflow.Context, state LockState)
SetLockForMergedItems(ctx workflow.Context, state lock.LockState)
GetOrderedMergedItems() []terraform.DeploymentInfo
GetQueuedRevisionsSummary() string
GetLockState() lock.LockState
}

type deployer interface {
Expand Down Expand Up @@ -96,7 +100,7 @@ func NewWorker(
},
}

tfWorkflowRunner := terraform.NewWorkflowRunner(tfWorkflow, notifiers, additionalNotifiers...)
tfWorkflowRunner := terraform.NewWorkflowRunner(q, tfWorkflow, githubCheckRunCache, notifiers, additionalNotifiers...)
deployer := &Deployer{
Activities: a,
TerraformWorkflowRunner: tfWorkflowRunner,
Expand All @@ -112,8 +116,8 @@ func NewWorker(
// we don't persist lock state anywhere so in the case of workflow completion we need to rebuild
// the lock state
if latestDeployment != nil && latestDeployment.Root.Trigger == string(tfModel.ManualTrigger) {
q.SetLockForMergedItems(ctx, LockState{
Status: LockedStatus,
q.SetLockForMergedItems(ctx, lock.LockState{
Status: lock.LockedStatus,
Revision: latestDeployment.Revision,
})
}
Expand Down Expand Up @@ -179,8 +183,8 @@ func (w *Worker) Work(ctx workflow.Context) {
workflow.GetMetricsHandler(ctx).WithTags(map[string]string{metricNames.SignalNameTag: UnlockSignalName}).
Counter(metricNames.SignalReceive).
Inc(1)
w.Queue.SetLockForMergedItems(ctx, LockState{
Status: UnlockedStatus,
w.Queue.SetLockForMergedItems(ctx, lock.LockState{
Status: lock.UnlockedStatus,
})
continue
default:
Expand Down
Loading

0 comments on commit 58ff899

Please sign in to comment.