Skip to content

Commit

Permalink
update oci collectors to use datasources (guacsec#421)
Browse files Browse the repository at this point in the history
* create inmemDataSource and change oci convention

Signed-off-by: Brandon Lum <[email protected]>

fix oci prefix for tests

Signed-off-by: Brandon Lum <[email protected]>

* ocicollector uses datasources

Signed-off-by: Brandon Lum <[email protected]>

* update oci collector to use datasource

Signed-off-by: Brandon Lum <[email protected]>

* edit oci collectors with new interface

Signed-off-by: Brandon Lum <[email protected]>

* clean up swp files

Signed-off-by: Brandon Lum <[email protected]>

---------

Signed-off-by: Brandon Lum <[email protected]>
  • Loading branch information
lumjjb authored Feb 10, 2023
1 parent 421309e commit d309318
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 49 deletions.
6 changes: 4 additions & 2 deletions cmd/collector/cmd/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"os"
"time"

"github.com/guacsec/guac/pkg/collectsub/datasource"
"github.com/guacsec/guac/pkg/emitter"
"github.com/guacsec/guac/pkg/handler/collector"
"github.com/guacsec/guac/pkg/handler/collector/file"
Expand All @@ -33,8 +34,9 @@ import (
type options struct {
// path to folder with documents to collect
path string
// map of image repo and tags
repoTags map[string][]string
// datasource for the collector
dataSource datasource.CollectSource
// address for NATS connection
natsAddr string
}

Expand Down
26 changes: 18 additions & 8 deletions cmd/collector/cmd/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ import (
"context"
"fmt"
"os"
"strings"
"time"

"github.com/guacsec/guac/pkg/collectsub/datasource"
"github.com/guacsec/guac/pkg/collectsub/datasource/inmemsource"
"github.com/guacsec/guac/pkg/handler/collector"
"github.com/guacsec/guac/pkg/handler/collector/oci"
"github.com/guacsec/guac/pkg/logging"
"github.com/regclient/regclient/types/ref"
"github.com/spf13/cobra"
"github.com/spf13/viper"
)
Expand All @@ -47,7 +49,7 @@ var ociCmd = &cobra.Command{
}

// Register collector
ociCollector := oci.NewOCICollector(ctx, opts.repoTags, false, 10*time.Minute)
ociCollector := oci.NewOCICollector(ctx, opts.dataSource, false, 10*time.Minute)
err = collector.RegisterDocumentCollector(ociCollector, oci.OCICollector)
if err != nil {
logger.Errorf("unable to register oci collector: %v", err)
Expand All @@ -59,20 +61,28 @@ var ociCmd = &cobra.Command{

func validateOCIFlags(natsAddr string, args []string) (options, error) {
var opts options

opts.natsAddr = natsAddr
opts.repoTags = map[string][]string{}

if len(args) < 1 {
return opts, fmt.Errorf("expected positional argument for image_path")
}

sources := []datasource.Source{}
for _, arg := range args {
stringSplit := strings.Split(arg, ":")
if len(stringSplit) == 2 {
opts.repoTags[stringSplit[0]] = append(opts.repoTags[stringSplit[0]], stringSplit[1])
} else {
if _, err := ref.New(arg); err != nil {
return opts, fmt.Errorf("image_path parsing error. require format repo:tag")
}
sources = append(sources, datasource.Source{
Value: arg,
})
}

var err error
opts.dataSource, err = inmemsource.NewInmemDataSources(&datasource.DataSources{
OciDataSources: sources,
})
if err != nil {
return opts, err
}

return opts, nil
Expand Down
5 changes: 3 additions & 2 deletions cmd/guacone/cmd/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/guacsec/guac/pkg/assembler"
"github.com/guacsec/guac/pkg/assembler/graphdb"
"github.com/guacsec/guac/pkg/collectsub/datasource"
"github.com/guacsec/guac/pkg/handler/collector"
"github.com/guacsec/guac/pkg/handler/collector/file"
"github.com/guacsec/guac/pkg/handler/processor"
Expand All @@ -50,8 +51,8 @@ type options struct {
keyID string
// path to folder with documents to collect
path string
// map of image repo and tags
repoTags map[string][]string
// datasource for collectors
dataSource datasource.CollectSource
}

var exampleCmd = &cobra.Command{
Expand Down
24 changes: 17 additions & 7 deletions cmd/guacone/cmd/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ import (
"context"
"fmt"
"os"
"strings"
"time"

"github.com/guacsec/guac/pkg/collectsub/datasource"
"github.com/guacsec/guac/pkg/collectsub/datasource/inmemsource"
"github.com/guacsec/guac/pkg/handler/collector"
"github.com/guacsec/guac/pkg/handler/collector/oci"
"github.com/guacsec/guac/pkg/handler/processor"
"github.com/guacsec/guac/pkg/logging"
"github.com/regclient/regclient/types/ref"
"github.com/spf13/cobra"
"github.com/spf13/viper"
)
Expand All @@ -51,7 +53,7 @@ var ociCmd = &cobra.Command{
}

// Register collector
ociCollector := oci.NewOCICollector(ctx, opts.repoTags, false, 10*time.Minute)
ociCollector := oci.NewOCICollector(ctx, opts.dataSource, false, 10*time.Minute)
err = collector.RegisterDocumentCollector(ociCollector, oci.OCICollector)
if err != nil {
logger.Errorf("unable to register oci collector: %v", err)
Expand Down Expand Up @@ -131,18 +133,26 @@ func validateOCIFlags(user string, pass string, dbAddr string, realm string, arg
opts.pass = pass
opts.dbAddr = dbAddr
opts.realm = realm
opts.repoTags = map[string][]string{}

if len(args) < 1 {
return opts, fmt.Errorf("expected positional argument for image_path")
}
sources := []datasource.Source{}
for _, arg := range args {
stringSplit := strings.Split(arg, ":")
if len(stringSplit) == 2 {
opts.repoTags[stringSplit[0]] = append(opts.repoTags[stringSplit[0]], stringSplit[1])
} else {
if _, err := ref.New(arg); err != nil {
return opts, fmt.Errorf("image_path parsing error. require format repo:tag")
}
sources = append(sources, datasource.Source{
Value: arg,
})
}

var err error
opts.dataSource, err = inmemsource.NewInmemDataSources(&datasource.DataSources{
OciDataSources: sources,
})
if err != nil {
return opts, err
}

return opts, nil
Expand Down
12 changes: 6 additions & 6 deletions pkg/collectsub/datasource/inmemsource/inmemsource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ func Test_InmemSourceGetDataSources(t *testing.T) {

expected := &datasource.DataSources{
OciDataSources: []datasource.Source{
{Value: "oci://abc"},
{Value: "oci://def"},
{Value: "abc"},
{Value: "def"},
},
GitDataSources: []datasource.Source{
{Value: "git+https://github.com/guacsec/guac"},
Expand All @@ -59,8 +59,8 @@ func Test_InmemSourceDataSourcesUpdate(t *testing.T) {

expected := &datasource.DataSources{
OciDataSources: []datasource.Source{
{Value: "oci://abc"},
{Value: "oci://def"},
{Value: "abc"},
{Value: "def"},
},
GitDataSources: []datasource.Source{
{Value: "git+https://github.com/guacsec/guac"},
Expand Down Expand Up @@ -94,8 +94,8 @@ func Test_InmemSourceDataSourcesUpdate(t *testing.T) {

expectedNew := &datasource.DataSources{
OciDataSources: []datasource.Source{
{Value: "oci://abc"},
{Value: "oci://def"},
{Value: "abc"},
{Value: "def"},
},
GitDataSources: []datasource.Source{
{Value: "git+https://github.com/guacsec/guac"},
Expand Down
75 changes: 63 additions & 12 deletions pkg/handler/collector/oci/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"strings"
"time"

"github.com/guacsec/guac/pkg/collectsub/datasource"
"github.com/guacsec/guac/pkg/handler/processor"
"github.com/guacsec/guac/pkg/logging"
"github.com/pkg/errors"
Expand All @@ -34,35 +35,72 @@ const (
)

type ociCollector struct {
repoTags map[string][]string
checkedDigest map[string][]string
poll bool
interval time.Duration
collectDataSource datasource.CollectSource
checkedDigest map[string][]string
poll bool
interval time.Duration
}

// NewOCICollector initializes the oci collector by passing in the repo and tag being collected.
// Note: OCI collector can be called upon by a upstream registry collector in the future to collect from all
// repos in a given registry. For further details see issue #298
func NewOCICollector(ctx context.Context, repoTags map[string][]string, poll bool, interval time.Duration) *ociCollector {
func NewOCICollector(ctx context.Context, collectDataSource datasource.CollectSource, poll bool, interval time.Duration) *ociCollector {
return &ociCollector{
repoTags: repoTags,
checkedDigest: map[string][]string{},
poll: poll,
interval: interval,
collectDataSource: collectDataSource,
checkedDigest: map[string][]string{},
poll: poll,
interval: interval,
}
}

// RetrieveArtifacts get the artifacts from the collector source based on polling or one time
func (o *ociCollector) RetrieveArtifacts(ctx context.Context, docChannel chan<- *processor.Document) error {
repoTags := map[string][]string{}
logger := logging.FromContext(ctx)

populateRepoTags := func() error {
ds, err := o.collectDataSource.GetDataSources(ctx)
if err != nil {
return fmt.Errorf("unable to retrieve datasource: %w", err)
}

for _, d := range ds.OciDataSources {
imageRef, err := ref.New(d.Value)
if err != nil {
logger.Errorf("unable to parse OCI path: %v", d.Value)
continue
}
imagePath := fmt.Sprintf("%s/%s", imageRef.Registry, imageRef.Repository)

// If a image reference has no tag, then it is considered as getting all tags
if hasNoTag(imageRef) {
repoTags[imagePath] = []string{}
} else {
// if the list is equal to the empty list, it is already looking for
// all tags
if repoTags[imagePath] == nil || len(repoTags[imagePath]) > 0 {
repoTags[imagePath] = append(repoTags[imagePath], imageRef.Tag)
}

}
}
return nil
}

if o.poll {
for {
for repo, tags := range o.repoTags {
err := populateRepoTags()
if err != nil {
return fmt.Errorf("unable to populate repotags: %w", err)
}

for repo, tags := range repoTags {
// when polling if tags are specified, it will never get any new tags
// that might be added after the fact. Defeating the point of the polling
if len(tags) > 0 {
return errors.New("image tag should not specified when using polling")
}
err := o.getTagsAndFetch(ctx, repo, tags, docChannel)
err = o.getTagsAndFetch(ctx, repo, tags, docChannel)
if err != nil {
return err
}
Expand All @@ -71,7 +109,12 @@ func (o *ociCollector) RetrieveArtifacts(ctx context.Context, docChannel chan<-
}
}
} else {
for repo, tags := range o.repoTags {
err := populateRepoTags()
if err != nil {
return fmt.Errorf("unable to populate repotags: %w", err)
}

for repo, tags := range repoTags {
err := o.getTagsAndFetch(ctx, repo, tags, docChannel)
if err != nil {
return err
Expand Down Expand Up @@ -238,3 +281,11 @@ func contains(elems []string, v string) bool {
func (o *ociCollector) Type() string {
return OCICollector
}

// hasNoTag determines if an OCI string passed in had no tag
func hasNoTag(r ref.Ref) bool {
// the reference parsing automatically sets the tag to latest if there is no tag
// specified, thus we need to check the reference to see if the latest tag was actually
// included.
return r.Tag == "latest" && r.Digest == "" && !strings.HasSuffix(r.Reference, "latest")
}
Loading

0 comments on commit d309318

Please sign in to comment.