Skip to content

Commit

Permalink
Handle Global Cluster Nuke Failure (gruntwork-io#691)
Browse files Browse the repository at this point in the history
* Handle global DB cluster nuking failure

* Handle Global Cluster
  • Loading branch information
james03160927 authored Apr 30, 2024
1 parent a81d818 commit bdf0fdb
Show file tree
Hide file tree
Showing 10 changed files with 556 additions and 2 deletions.
2 changes: 2 additions & 0 deletions aws/resource_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func GetAndInitRegisteredResources(session *session.Session, region string) []*A
// GetRegisteredGlobalResources - returns a list of registered global resources.
func getRegisteredGlobalResources() []AwsResource {
return []AwsResource{
&resources.DBGlobalClusters{},
&resources.IAMUsers{},
&resources.IAMGroups{},
&resources.IAMPolicies{},
Expand Down Expand Up @@ -99,6 +100,7 @@ func getRegisteredRegionalResources() []AwsResource {
&resources.MSKCluster{},
&resources.NatGateways{},
&resources.OpenSearchDomains{},
&resources.DBGlobalClusterMemberships{},
&resources.DBInstances{},
&resources.DBSubnetGroups{},
&resources.DBClusters{},
Expand Down
4 changes: 2 additions & 2 deletions aws/resources/rds_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ package resources

import (
"context"
"github.com/gruntwork-io/cloud-nuke/util"
"time"

"github.com/gruntwork-io/cloud-nuke/util"

awsgo "github.com/aws/aws-sdk-go/aws"

"github.com/aws/aws-sdk-go/aws"
Expand Down Expand Up @@ -90,7 +91,6 @@ func (instance *DBClusters) nukeAll(names []*string) error {

if len(deletedNames) > 0 {
for _, name := range deletedNames {

err := instance.waitUntilRdsClusterDeleted(&rds.DescribeDBClustersInput{
DBClusterIdentifier: name,
})
Expand Down
106 changes: 106 additions & 0 deletions aws/resources/rds_global_cluster.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package resources

import (
"context"
"time"

awsgo "github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/rds"
"github.com/gruntwork-io/cloud-nuke/config"
"github.com/gruntwork-io/cloud-nuke/logging"
"github.com/gruntwork-io/cloud-nuke/report"
"github.com/gruntwork-io/go-commons/errors"
)

// wait up to 15 minutes
const (
dbGlobalClusterDeletionRetryDelay = 10 * time.Second
dbGlobalClusterDeletionRetryCount = 90
)

func (instance *DBGlobalClusters) getAll(c context.Context, configObj config.Config) ([]*string, error) {
result, err := instance.Client.DescribeGlobalClustersWithContext(c, &rds.DescribeGlobalClustersInput{})
if err != nil {
return nil, errors.WithStackTrace(err)
}

var names []*string
for _, cluster := range result.GlobalClusters {
if !configObj.DBGlobalClusters.ShouldInclude(config.ResourceValue{
Name: cluster.GlobalClusterIdentifier,
}) {
continue
}

names = append(names, cluster.GlobalClusterIdentifier)
}

return names, nil
}

func (instance *DBGlobalClusters) nukeAll(names []*string) error {
if len(names) == 0 {
logging.Debugf("No RDS DB Global Cluster Membership to nuke")
return nil
}

logging.Debugf("Deleting Global Cluster (members)")
deletedNames := []*string{}

for _, name := range names {
_, err := instance.Client.DeleteGlobalCluster(&rds.DeleteGlobalClusterInput{
GlobalClusterIdentifier: name,
})

// Record status of this resource
e := report.Entry{
Identifier: aws.StringValue(name),
ResourceType: "RDS Global Cluster Membership",
Error: err,
}
report.Record(e)

switch {
case err != nil:
logging.Debugf("[Failed] %s: %s", *name, err)

default:
deletedNames = append(deletedNames, name)
logging.Debugf("Deleted RDS DB Global Cluster Membership: %s", awsgo.StringValue(name))
}
}

for _, name := range deletedNames {
err := instance.waitUntilRDSGlobalClusterDeleted(*name)
if err != nil {
logging.Errorf("[Failed] %s", err)
return errors.WithStackTrace(err)
}
}

logging.Debugf("[OK] %d RDS Global DB Cluster(s) Membership nuked in %s", len(deletedNames), instance.Region)
return nil
}

func (instance *DBGlobalClusters) waitUntilRDSGlobalClusterDeleted(name string) error {
for i := 0; i < dbGlobalClusterDeletionRetryCount; i++ {
_, err := instance.Client.DescribeGlobalClusters(&rds.DescribeGlobalClustersInput{
GlobalClusterIdentifier: &name,
})
if err != nil {
if awsErr, isAwsErr := err.(awserr.Error); isAwsErr && awsErr.Code() == rds.ErrCodeGlobalClusterNotFoundFault {
return nil
}

return errors.WithStackTrace(err)
}

time.Sleep(dbGlobalClusterDeletionRetryDelay)
logging.Debug("Waiting for RDS Global Cluster to be deleted")
}

return RdsDeleteError{name: name}
}
151 changes: 151 additions & 0 deletions aws/resources/rds_global_cluster_membership.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package resources

import (
"context"
"fmt"
"strings"
"time"

awsgo "github.com/aws/aws-sdk-go/aws"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/rds"
"github.com/gruntwork-io/cloud-nuke/config"
"github.com/gruntwork-io/cloud-nuke/logging"
"github.com/gruntwork-io/cloud-nuke/report"
"github.com/gruntwork-io/go-commons/errors"
)

// wait up to 15 minutes
const (
dbGlobalClusterMembershipsRemovalRetryDelay = 10 * time.Second
dbGlobalClusterMembershipsRemovalRetryCount = 90
)

func (instance *DBGlobalClusterMemberships) getAll(c context.Context, configObj config.Config) ([]*string, error) {
result, err := instance.Client.DescribeGlobalClustersWithContext(c, &rds.DescribeGlobalClustersInput{})
if err != nil {
return nil, errors.WithStackTrace(err)
}

var names []*string
for _, cluster := range result.GlobalClusters {
if !configObj.DBGlobalClusterMemberships.ShouldInclude(config.ResourceValue{
Name: cluster.GlobalClusterIdentifier,
}) {
continue
}

names = append(names, cluster.GlobalClusterIdentifier)
}

return names, nil
}

func (instance *DBGlobalClusterMemberships) nukeAll(names []*string) error {
if len(names) == 0 {
logging.Debugf("No RDS DB Global Cluster Membership to nuke")
return nil
}

logging.Debugf("Deleting Global Cluster (members)")
deletedNames := []*string{}

for _, name := range names {
deleted, err := instance.removeGlobalClusterMembership(*name)

// Record status of this resource
e := report.Entry{
Identifier: aws.StringValue(name),
ResourceType: "RDS Global Cluster Membership",
Error: err,
}
report.Record(e)

switch {
case err != nil:
logging.Debugf("[Failed] %s: %s", *name, err)

case !deleted:
logging.Debugf("No RDS Global Cluster Membership was deleted on %s", *name)

default:
deletedNames = append(deletedNames, name)
logging.Debugf("Deleted RDS DB Global Cluster Membership: %s", awsgo.StringValue(name))
}
}

logging.Debugf("[OK] %d RDS Global DB Cluster(s) Membership nuked in %s", len(deletedNames), instance.Region)
return nil
}

func (instance *DBGlobalClusterMemberships) removeGlobalClusterMembership(name string) (deleted bool, err error) {
gdbcs, err := instance.Client.DescribeGlobalClusters(&rds.DescribeGlobalClustersInput{
GlobalClusterIdentifier: &name,
})
if err != nil {
return deleted, fmt.Errorf("fail to describe global cluster: %w", err)
}
if len(gdbcs.GlobalClusters) != 1 || *gdbcs.GlobalClusters[0].GlobalClusterIdentifier != name {
return deleted, fmt.Errorf("unexpected describe result global cluster")
}
gdbc := gdbcs.GlobalClusters[0]

deletedNames := []string{}
for _, member := range gdbc.GlobalClusterMembers {
region := strings.Split(*member.DBClusterArn, ":")[3]
if instance.Region != "" && instance.Region != region {
logging.Debugf("Skip removing cluster '%s' from global cluster since it is in different region", *member.DBClusterArn)
continue
}

logging.Debugf("Removing cluster '%s' from global cluster", *member.DBClusterArn)
_, err := instance.Client.RemoveFromGlobalCluster(&rds.RemoveFromGlobalClusterInput{
GlobalClusterIdentifier: gdbc.GlobalClusterIdentifier,
DbClusterIdentifier: member.DBClusterArn,
})
if err != nil {
return deleted, fmt.Errorf("fail to remove cluster '%s' from global cluster :%w", *member, err)
}
deletedNames = append(deletedNames, *member.DBClusterArn)
}
for _, name := range deletedNames {
err = instance.waitUntilRdsClusterRemovedFromGlobalCluster(*gdbc.GlobalClusterIdentifier, name)
if err != nil {
return deleted, fmt.Errorf("fail to remove cluster '%s' from global cluster :%w", name, err)
}
}

return len(deletedNames) > 0, nil
}

func (instance *DBGlobalClusterMemberships) waitUntilRdsClusterRemovedFromGlobalCluster(arnGlobalCluster string, arnCluster string) error {
for i := 0; i < dbGlobalClusterMembershipsRemovalRetryCount; i++ {
gcs, err := instance.Client.DescribeGlobalClusters(&rds.DescribeGlobalClustersInput{
GlobalClusterIdentifier: &arnGlobalCluster,
})
if err != nil {
return errors.WithStackTrace(err)
}

found := false
for _, gc := range gcs.GlobalClusters {
for _, m := range gc.GlobalClusterMembers {
if *m.DBClusterArn != arnCluster {
continue
}

found = true
break
}
}
if !found {
return nil
}

time.Sleep(dbGlobalClusterMembershipsRemovalRetryDelay)
logging.Debug("Waiting for RDS Cluster to be removed from RDS Global Cluster")
}

return RdsDeleteError{name: arnCluster}
}
92 changes: 92 additions & 0 deletions aws/resources/rds_global_cluster_membership_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package resources

import (
"context"
"regexp"
"strings"
"testing"

awsgo "github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/aws/aws-sdk-go/service/rds"
"github.com/aws/aws-sdk-go/service/rds/rdsiface"
"github.com/gruntwork-io/cloud-nuke/config"
"github.com/stretchr/testify/assert"
)

type mockedDBGlobalClusterMemberships struct {
rdsiface.RDSAPI
DescribeGlobalClustersOutput rds.DescribeGlobalClustersOutput
DescribeGlobalClustersError error
RemoveFromGlobalClusterOutput rds.RemoveFromGlobalClusterOutput
}

func (m mockedDBGlobalClusterMemberships) RemoveFromGlobalCluster(input *rds.RemoveFromGlobalClusterInput) (*rds.RemoveFromGlobalClusterOutput, error) {
return &m.RemoveFromGlobalClusterOutput, nil
}

func (m mockedDBGlobalClusterMemberships) DescribeGlobalClusters(input *rds.DescribeGlobalClustersInput) (*rds.DescribeGlobalClustersOutput, error) {
return &m.DescribeGlobalClustersOutput, m.DescribeGlobalClustersError
}

func (m mockedDBGlobalClusterMemberships) DescribeGlobalClustersWithContext(ctx context.Context, input *rds.DescribeGlobalClustersInput, _ ...request.Option) (*rds.DescribeGlobalClustersOutput, error) {
return &m.DescribeGlobalClustersOutput, m.DescribeGlobalClustersError
}

func TestRDSGlobalClusterMembershipGetAll(t *testing.T) {
t.Parallel()

testName := "test-db-global-cluster"
dbCluster := DBGlobalClusterMemberships{
Client: mockedDBGlobalClusterMemberships{
DescribeGlobalClustersOutput: rds.DescribeGlobalClustersOutput{
GlobalClusters: []*rds.GlobalCluster{
{
GlobalClusterIdentifier: &testName,
},
},
},
},
}

// Testing empty config
clusters, err := dbCluster.getAll(context.Background(), config.Config{DBGlobalClusterMemberships: config.ResourceType{}})
assert.NoError(t, err)
assert.Contains(t, awsgo.StringValueSlice(clusters), strings.ToLower(testName))

// Testing db cluster exclusion
clusters, err = dbCluster.getAll(context.Background(), config.Config{
DBGlobalClusterMemberships: config.ResourceType{
ExcludeRule: config.FilterRule{
NamesRegExp: []config.Expression{{
RE: *regexp.MustCompile(testName),
}},
},
},
})
assert.NoError(t, err)
assert.NotContains(t, awsgo.StringValueSlice(clusters), strings.ToLower(testName))
}

func TestRDSGlobalClusterMembershipNukeAll(t *testing.T) {

t.Parallel()

testName := "test-db-global-cluster"
dbCluster := DBGlobalClusterMemberships{
Client: mockedDBGlobalClusterMemberships{
DescribeGlobalClustersOutput: rds.DescribeGlobalClustersOutput{
GlobalClusters: []*rds.GlobalCluster{
{
GlobalClusterIdentifier: &testName,
GlobalClusterMembers: []*rds.GlobalClusterMember{},
},
},
},
RemoveFromGlobalClusterOutput: rds.RemoveFromGlobalClusterOutput{},
},
}

err := dbCluster.nukeAll([]*string{&testName})
assert.NoError(t, err)
}
Loading

0 comments on commit bdf0fdb

Please sign in to comment.