Skip to content

Commit

Permalink
Add concurrency options and handle retries (#125)
Browse files Browse the repository at this point in the history
This adds new functionality for controlling the overall concurrency and
bumps the default concurrency to a more sane value (1 -> 20). This also
fixes an error where some manifests would fail to delete because they
reference another image later. This fixes the error by introducing a
retry mechanism for manifests that fail to delete with a dangling parent
error (3 attempts).

Fixes #118
Closes #124
  • Loading branch information
sethvargo authored Jan 3, 2023
1 parent 4c71122 commit 59b043b
Show file tree
Hide file tree
Showing 8 changed files with 453 additions and 136 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,11 @@ include these debug logs as they are very helpful in finding and fixing any
bugs.


## Concurrency

By default, GCR Cleaner will attempt to perform operations in parallel. You can
customize the concurrency with `-concurrency` on the CLI or by setting the
environment variable `GCRCLEANER_CONCURRENCY` on the server. It defaults to 20.


[artifact-registry]: https://cloud.google.com/artifact-registry
Expand Down
21 changes: 10 additions & 11 deletions cmd/gcr-cleaner-cli/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"fmt"
"os"
"os/signal"
"runtime"
"sort"
"strings"
"syscall"
Expand All @@ -46,14 +45,15 @@ var (
var (
reposMap = make(map[string]struct{}, 4)

tokenPtr = flag.String("token", os.Getenv("GCRCLEANER_TOKEN"), "Authentication token")
recursivePtr = flag.Bool("recursive", false, "Clean all sub-repositories under the -repo root")
gracePtr = flag.Duration("grace", 0, "Grace period")
tagFilterAny = flag.String("tag-filter-any", "", "Delete images where any tag matches this regular expression")
tagFilterAll = flag.String("tag-filter-all", "", "Delete images where all tags match this regular expression")
keepPtr = flag.Int("keep", 0, "Minimum to keep")
dryRunPtr = flag.Bool("dry-run", false, "Do a noop on delete api call")
versionPtr = flag.Bool("version", false, "Print version information and exit")
tokenPtr = flag.String("token", os.Getenv("GCRCLEANER_TOKEN"), "Authentication token")
recursivePtr = flag.Bool("recursive", false, "Clean all sub-repositories under the -repo root")
gracePtr = flag.Duration("grace", 0, "Grace period")
tagFilterAny = flag.String("tag-filter-any", "", "Delete images where any tag matches this regular expression")
tagFilterAll = flag.String("tag-filter-all", "", "Delete images where all tags match this regular expression")
keepPtr = flag.Int64("keep", 0, "Minimum to keep")
dryRunPtr = flag.Bool("dry-run", false, "Do a noop on delete api call")
concurrencyPtr = flag.Int64("concurrency", 20, "Concurrent requests (defaults to number of CPUs)")
versionPtr = flag.Bool("version", false, "Print version information and exit")
)

func main() {
Expand Down Expand Up @@ -132,8 +132,7 @@ func realMain(ctx context.Context, logger *gcrcleaner.Logger) error {
gcrgoogle.Keychain,
)

concurrency := runtime.NumCPU()
cleaner, err := gcrcleaner.NewCleaner(keychain, logger, concurrency)
cleaner, err := gcrcleaner.NewCleaner(keychain, logger, *concurrencyPtr)
if err != nil {
return fmt.Errorf("failed to create cleaner: %w", err)
}
Expand Down
17 changes: 14 additions & 3 deletions cmd/gcr-cleaner-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (
"net/http"
"os"
"os/signal"
"runtime"
"strconv"
"syscall"
"time"

Expand All @@ -38,7 +38,19 @@ var (
)

var (
logLevel = os.Getenv("GCRCLEANER_LOG")
logLevel = os.Getenv("GCRCLEANER_LOG")
concurrency = func() int64 {
v := os.Getenv("GCRCLEANER_CONCURRENCY")
if v == "" {
return 20
}

i, err := strconv.ParseInt(v, 10, 64)
if err != nil {
panic(fmt.Errorf("failed to parse concurrency: %w", err))
}
return i
}()
)

func main() {
Expand Down Expand Up @@ -71,7 +83,6 @@ func realMain(ctx context.Context, logger *gcrcleaner.Logger) error {
gcrgoogle.Keychain,
)

concurrency := runtime.NumCPU()
cleaner, err := gcrcleaner.NewCleaner(keychain, logger, concurrency)
if err != nil {
return fmt.Errorf("failed to create cleaner: %w", err)
Expand Down
15 changes: 8 additions & 7 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,26 @@ module github.com/GoogleCloudPlatform/gcr-cleaner
go 1.19

require (
github.com/google/go-containerregistry v0.12.0
github.com/google/go-containerregistry v0.12.1
golang.org/x/sync v0.1.0
)

require (
cloud.google.com/go/compute/metadata v0.2.0 // indirect
github.com/docker/cli v20.10.20+incompatible // indirect
cloud.google.com/go/compute v1.14.0 // indirect
cloud.google.com/go/compute/metadata v0.2.3 // indirect
github.com/docker/cli v20.10.22+incompatible // indirect
github.com/docker/distribution v2.8.1+incompatible // indirect
github.com/docker/docker v20.10.20+incompatible // indirect
github.com/docker/docker v20.10.22+incompatible // indirect
github.com/docker/docker-credential-helpers v0.7.0 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.0-rc2 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/sirupsen/logrus v1.9.0 // indirect
golang.org/x/net v0.1.0 // indirect
golang.org/x/oauth2 v0.1.0 // indirect
golang.org/x/sys v0.1.0 // indirect
golang.org/x/net v0.4.0 // indirect
golang.org/x/oauth2 v0.3.0 // indirect
golang.org/x/sys v0.3.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.28.1 // indirect
)
30 changes: 16 additions & 14 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
cloud.google.com/go/compute/metadata v0.2.0 h1:nBbNSZyDpkNlo3DepaaLKVuO7ClyifSAmNloSCZrHnQ=
cloud.google.com/go/compute/metadata v0.2.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k=
cloud.google.com/go/compute v1.14.0 h1:hfm2+FfxVmnRlh6LpB7cg1ZNU+5edAHmW679JePztk0=
cloud.google.com/go/compute v1.14.0/go.mod h1:YfLtxrj9sU4Yxv+sXzZkyPjEyPBZfXHUvjxega5vAdo=
cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY=
cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA=
github.com/containerd/stargz-snapshotter/estargz v0.12.1 h1:+7nYmHJb0tEkcRaAW+MHqoKaJYZmkikupxCqVtmPuY0=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/docker/cli v20.10.20+incompatible h1:lWQbHSHUFs7KraSN2jOJK7zbMS2jNCHI4mt4xUFUVQ4=
github.com/docker/cli v20.10.20+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/cli v20.10.22+incompatible h1:0E7UqWPcn4SlvLImMHyh6xwyNRUGdPxhstpHeh0bFL0=
github.com/docker/cli v20.10.22+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/distribution v2.8.1+incompatible h1:Q50tZOPR6T/hjNsyc9g8/syEs6bk8XXApsHjKukMl68=
github.com/docker/distribution v2.8.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
github.com/docker/docker v20.10.20+incompatible h1:kH9tx6XO+359d+iAkumyKDc5Q1kOwPuAUaeri48nD6E=
github.com/docker/docker v20.10.20+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/docker v20.10.22+incompatible h1:6jX4yB+NtcbldT90k7vBSaWJDB3i+zkVJT9BEK8kQkk=
github.com/docker/docker v20.10.22+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A=
github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
Expand All @@ -18,8 +20,8 @@ github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-containerregistry v0.12.0 h1:nidOEtFYlgPCRqxCKj/4c/js940HVWplCWc5ftdfdUA=
github.com/google/go-containerregistry v0.12.0/go.mod h1:sdIK+oHQO7B93xI8UweYdl887YhuIwg9vz8BSLH3+8k=
github.com/google/go-containerregistry v0.12.1 h1:W1mzdNUTx4Zla4JaixCRLhORcR7G6KxE5hHl5fkPsp8=
github.com/google/go-containerregistry v0.12.1/go.mod h1:sdIK+oHQO7B93xI8UweYdl887YhuIwg9vz8BSLH3+8k=
github.com/klauspost/compress v1.15.11 h1:Lcadnb3RKGin4FYM/orgq0qde+nc15E5Cbqg4B9Sx9c=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
Expand All @@ -39,16 +41,16 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/vbatts/tar-split v0.11.2 h1:Via6XqJr0hceW4wff3QRzD5gAk/tatMw/4ZA7cTlIME=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.1.0 h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0=
golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco=
golang.org/x/oauth2 v0.1.0 h1:isLCZuhj4v+tYv7eskaN4v/TM+A1begWWgyVJDdl1+Y=
golang.org/x/oauth2 v0.1.0/go.mod h1:G9FE4dLTsbXUu90h/Pf85g4w1D+SSAgR+q46nJZ8M4A=
golang.org/x/net v0.4.0 h1:Q5QPcMlvfxFTAPV0+07Xz/MpK9NTXu2VDUuy0FeMfaU=
golang.org/x/net v0.4.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
golang.org/x/oauth2 v0.3.0 h1:6l90koy8/LaBLmLu8jpHeHexzMwEita0zFfYlggy2F8=
golang.org/x/oauth2 v0.3.0/go.mod h1:rQrIauxkUhJ6CuwEXwymO2/eh4xz2ZWF1nBkcxS+tGk=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ=
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
Expand Down
167 changes: 167 additions & 0 deletions internal/worker/worker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
// Package worker defines abstractions for parallelizing tasks.
package worker

import (
"context"
"fmt"
"runtime"
"sync"
"sync/atomic"

"golang.org/x/sync/semaphore"
)

// ErrStopped is the error returned when the worker is stopped.
var ErrStopped = fmt.Errorf("worker is stopped")

// Void is a convenience struct for workers that do not actually return values.
type Void struct{}

// WorkFunc is a function for executing work.
type WorkFunc[T any] func() (T, error)

// Worker represents an instance of a worker. It is same for concurrent use, but
// see function documentation for more specific semantics.
type Worker[T any] struct {
size int64
sem *semaphore.Weighted

i int64
results []*result[T]
resultsLock sync.Mutex

stopped uint32
}

// result is the internal result representation. It is primarily used to
// maintain results ordering.
type result[T any] struct {
idx int64
result *Result[T]
}

// Result is the final result returned to the caller.
type Result[T any] struct {
Value T
Error error
}

// New creates a new worker that executes work in parallel, up to the maximum
// provided concurrency. Work is guaranteed to be executed in the order in which
// it was enqueued, but is not guaranteed to complete in the order in which it
// was enqueued (i.e. this is not a pipeline).
//
// If the provided concurrency is less than 1, it defaults to the number of CPU
// cores.
func New[T any](concurrency int64) *Worker[T] {
if concurrency < 1 {
concurrency = int64(runtime.NumCPU())
}
if concurrency < 1 {
concurrency = 1
}

return &Worker[T]{
size: concurrency,
i: -1,
sem: semaphore.NewWeighted(concurrency),
results: make([]*result[T], 0, concurrency),
}
}

// Do adds new work into the queue. If there are no available workers, it blocks
// until a worker becomes available or until the provided context is cancelled.
// The function returns when the work has been successfully scheduled.
//
// To wait for all work to be completed and read the results, call
// [worker.Done]. This function only returns an error on two conditions:
//
// - The worker was stopped via a call to [worker.Done]. You should not
// enqueue more work. The error will be [ErrStopped].
// - The incoming context was cancelled. You should probably not enqueue more
// work, but this is an application-specific decision. The error will be
// [context.DeadlineExceeded] or [context.Canceled].
//
// Never call Do from within a Do function because it will deadlock.
func (w *Worker[T]) Do(ctx context.Context, fn WorkFunc[T]) error {
// Do not enqueue new work if the worker is stopped.
if w.isStopped() {
return ErrStopped
}

if err := w.sem.Acquire(ctx, 1); err != nil {
return err
}

// It's possible the worker was stopped while we were waiting for the
// semaphore to acquire, but the worker is actually stopped.
if w.isStopped() {
w.sem.Release(1)
return ErrStopped
}

i := atomic.AddInt64(&w.i, 1)

go func() {
defer w.sem.Release(1)
t, err := fn()

w.resultsLock.Lock()
defer w.resultsLock.Unlock()
w.results = append(w.results, &result[T]{
idx: i,
result: &Result[T]{
Value: t,
Error: err,
},
})
}()

return nil
}

// Wait blocks until all queued jobs are finished.
func (w *Worker[T]) Wait(ctx context.Context) error {
// Do not enqueue new work if the worker is stopped.
if w.isStopped() {
return ErrStopped
}

defer w.sem.Release(w.size)
return w.sem.Acquire(ctx, w.size)
}

// Done immediately stops the worker and prevents new work from being enqueued.
// Then it waits for all existing work to finish and results the results.
//
// The results are returned in the order in which jobs were enqueued into the
// worker. Each result will include a result value or corresponding error type.
// The function itself returns an error only if the context is cancelled.
//
// If the worker is already done, it returns [ErrStopped].
func (w *Worker[T]) Done(ctx context.Context) ([]*Result[T], error) {
if !atomic.CompareAndSwapUint32(&w.stopped, 0, 1) {
return nil, ErrStopped
}

if err := w.sem.Acquire(ctx, w.size); err != nil {
return nil, err
}
defer w.sem.Release(w.size)

w.resultsLock.Lock()
defer w.resultsLock.Unlock()

// Fix insertion order.
final := make([]*Result[T], len(w.results))
for _, v := range w.results {
final[v.idx] = v.result
}
return final, nil
}

// isStopped returns true if the worker is stopped, false otherwise. It is safe
// for concurrent use.
func (w *Worker[T]) isStopped() bool {
return atomic.LoadUint32(&w.stopped) == 1
}
Loading

0 comments on commit 59b043b

Please sign in to comment.