From 51bb7108617afa0348a7b1cc59b40d1410efe4ac Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 7 Apr 2023 15:12:25 -0400 Subject: [PATCH] add squashed with all layers resolver Signed-off-by: Alex Goodman --- syft/pkg/package.go | 2 + syft/source/image_all_layers_resolver.go | 6 +- syft/source/image_all_layers_resolver_test.go | 16 +-- syft/source/image_squash_resolver.go | 2 +- .../image_squash_with_all_layers_resolver.go | 98 +++++++++++++++++++ syft/source/scope.go | 7 +- syft/source/source.go | 4 +- 7 files changed, 121 insertions(+), 14 deletions(-) create mode 100644 syft/source/image_squash_with_all_layers_resolver.go diff --git a/syft/pkg/package.go b/syft/pkg/package.go index 88351f2174d..4fc362a728d 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -59,6 +59,8 @@ func (p *Package) merge(other Package) error { return fmt.Errorf("cannot merge packages with different IDs: %q vs %q", p.id, other.id) } + log.WithFields("id", p.id, "purl", p.PURL).Trace("merging similar packages") + if p.PURL != other.PURL { log.Warnf("merging packages have with different pURLs: %q=%q vs %q=%q", p.id, p.PURL, other.id, other.PURL) } diff --git a/syft/source/image_all_layers_resolver.go b/syft/source/image_all_layers_resolver.go index ca40b12718c..3250b64dddf 100644 --- a/syft/source/image_all_layers_resolver.go +++ b/syft/source/image_all_layers_resolver.go @@ -18,8 +18,8 @@ type imageAllLayersResolver struct { layers []int } -// newAllLayersResolver returns a new resolver from the perspective of all image layers for the given image. -func newAllLayersResolver(img *image.Image) (*imageAllLayersResolver, error) { +// newImageAllLayersResolver returns a new resolver from the perspective of all image layers for the given image. +func newImageAllLayersResolver(img *image.Image) (*imageAllLayersResolver, error) { if len(img.Layers) == 0 { return nil, fmt.Errorf("the image does not contain any layers") } @@ -202,7 +202,7 @@ func (r *imageAllLayersResolver) FileContentsByLocation(location Location) (io.R return nil, fmt.Errorf("cannot read contents of non-file %q", location.ref.RealPath) } - return r.img.FileContentsByRef(location.ref) + return r.img.OpenReference(location.ref) } func (r *imageAllLayersResolver) FilesByMIMEType(types ...string) ([]Location, error) { diff --git a/syft/source/image_all_layers_resolver_test.go b/syft/source/image_all_layers_resolver_test.go index 1ba11304ddb..63265bf5036 100644 --- a/syft/source/image_all_layers_resolver_test.go +++ b/syft/source/image_all_layers_resolver_test.go @@ -91,7 +91,7 @@ func TestAllLayersResolver_FilesByPath(t *testing.T) { t.Run(c.name, func(t *testing.T) { img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - resolver, err := newAllLayersResolver(img) + resolver, err := newImageAllLayersResolver(img) if err != nil { t.Fatalf("could not create resolver: %+v", err) } @@ -205,7 +205,7 @@ func TestAllLayersResolver_FilesByGlob(t *testing.T) { t.Run(c.name, func(t *testing.T) { img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - resolver, err := newAllLayersResolver(img) + resolver, err := newImageAllLayersResolver(img) if err != nil { t.Fatalf("could not create resolver: %+v", err) } @@ -257,7 +257,7 @@ func Test_imageAllLayersResolver_FilesByMIMEType(t *testing.T) { t.Run(test.fixtureName, func(t *testing.T) { img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureName) - resolver, err := newAllLayersResolver(img) + resolver, err := newImageAllLayersResolver(img) assert.NoError(t, err) locations, err := resolver.FilesByMIMEType(test.mimeType) @@ -274,7 +274,7 @@ func Test_imageAllLayersResolver_FilesByMIMEType(t *testing.T) { func Test_imageAllLayersResolver_hasFilesystemIDInLocation(t *testing.T) { img := imagetest.GetFixtureImage(t, "docker-archive", "image-duplicate-path") - resolver, err := newAllLayersResolver(img) + resolver, err := newImageAllLayersResolver(img) assert.NoError(t, err) locations, err := resolver.FilesByMIMEType("text/plain") @@ -334,7 +334,7 @@ func TestAllLayersImageResolver_FilesContents(t *testing.T) { t.Run(test.name, func(t *testing.T) { img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - resolver, err := newAllLayersResolver(img) + resolver, err := newImageAllLayersResolver(img) assert.NoError(t, err) refs, err := resolver.FilesByPath(test.fixture) @@ -361,7 +361,7 @@ func TestAllLayersImageResolver_FilesContents_errorOnDirRequest(t *testing.T) { img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - resolver, err := newAllLayersResolver(img) + resolver, err := newImageAllLayersResolver(img) assert.NoError(t, err) var dirLoc *Location @@ -675,7 +675,7 @@ func Test_imageAllLayersResolver_resolvesLinks(t *testing.T) { img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - resolver, err := newAllLayersResolver(img) + resolver, err := newImageAllLayersResolver(img) assert.NoError(t, err) actual := test.runner(resolver) @@ -689,7 +689,7 @@ func Test_imageAllLayersResolver_resolvesLinks(t *testing.T) { func TestAllLayersResolver_AllLocations(t *testing.T) { img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted") - resolver, err := newAllLayersResolver(img) + resolver, err := newImageAllLayersResolver(img) assert.NoError(t, err) paths := strset.New() diff --git a/syft/source/image_squash_resolver.go b/syft/source/image_squash_resolver.go index d62927b309c..367dabb9947 100644 --- a/syft/source/image_squash_resolver.go +++ b/syft/source/image_squash_resolver.go @@ -168,7 +168,7 @@ func (r *imageSquashResolver) FileContentsByLocation(location Location) (io.Read return nil, fmt.Errorf("unable to get file contents for directory: %+v", location) } - return r.img.FileContentsByRef(location.ref) + return r.img.OpenReference(location.ref) } func (r *imageSquashResolver) AllLocations() <-chan Location { diff --git a/syft/source/image_squash_with_all_layers_resolver.go b/syft/source/image_squash_with_all_layers_resolver.go new file mode 100644 index 00000000000..56cc651618d --- /dev/null +++ b/syft/source/image_squash_with_all_layers_resolver.go @@ -0,0 +1,98 @@ +package source + +import ( + "github.com/anchore/stereoscope/pkg/image" + "io" +) + +var _ FileResolver = (*imageSquashWithAllLayersResolver)(nil) + +// imageSquashWithAllLayersResolver acts like a squash resolver, but additionally returns all paths in earlier layers +// that have been added/modified (like the all-layers resolver). +type imageSquashWithAllLayersResolver struct { + squashed *imageSquashResolver + allLayers *imageAllLayersResolver +} + +// newImageSquashWithAllLayersResolver returns a new resolver from the perspective of the squashed representation for +// the given image, but additionally returns all instances of a path that have been added/modified. +func newImageSquashWithAllLayersResolver(img *image.Image) (*imageSquashWithAllLayersResolver, error) { + squashed, err := newImageSquashResolver(img) + if err != nil { + return nil, err + } + + allLayers, err := newImageAllLayersResolver(img) + if err != nil { + return nil, err + } + + return &imageSquashWithAllLayersResolver{ + squashed: squashed, + allLayers: allLayers, + }, nil +} + +func (i imageSquashWithAllLayersResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { + return i.squashed.FileContentsByLocation(location) +} + +func (i imageSquashWithAllLayersResolver) HasPath(s string) bool { + return i.squashed.HasPath(s) +} + +func (i imageSquashWithAllLayersResolver) filterLocations(locations []Location, err error) ([]Location, error) { + if err != nil { + return locations, err + } + var ret []Location + for _, l := range locations { + if i.squashed.HasPath(l.RealPath) { + // not only should the real path to the file exist, but the way we took to get there should also exist + // (e.g. if we are looking for /etc/passwd, but the real path is /etc/passwd -> /etc/passwd-1, then we should + // make certain that /etc/passwd-1 exists) + if l.VirtualPath != "" && !i.squashed.HasPath(l.VirtualPath) { + continue + } + ret = append(ret, l) + } + } + return ret, nil +} + +func (i imageSquashWithAllLayersResolver) FilesByPath(paths ...string) ([]Location, error) { + return i.filterLocations(i.allLayers.FilesByPath(paths...)) +} + +func (i imageSquashWithAllLayersResolver) FilesByGlob(patterns ...string) ([]Location, error) { + return i.filterLocations(i.allLayers.FilesByGlob(patterns...)) +} + +func (i imageSquashWithAllLayersResolver) FilesByMIMEType(types ...string) ([]Location, error) { + return i.filterLocations(i.allLayers.FilesByMIMEType(types...)) +} + +func (i imageSquashWithAllLayersResolver) RelativeFileByPath(l Location, path string) *Location { + if !i.squashed.HasPath(path) { + return nil + } + return i.allLayers.RelativeFileByPath(l, path) +} + +func (i imageSquashWithAllLayersResolver) AllLocations() <-chan Location { + var ret = make(chan Location) + go func() { + defer close(ret) + for l := range i.allLayers.AllLocations() { + if i.squashed.HasPath(l.RealPath) { + ret <- l + } + } + }() + + return ret +} + +func (i imageSquashWithAllLayersResolver) FileMetadataByLocation(location Location) (FileMetadata, error) { + return fileMetadataByLocation(i.squashed.img, location) +} diff --git a/syft/source/scope.go b/syft/source/scope.go index e959d1a420f..5b0cf5d4e88 100644 --- a/syft/source/scope.go +++ b/syft/source/scope.go @@ -10,14 +10,17 @@ const ( UnknownScope Scope = "UnknownScope" // SquashedScope indicates to only catalog content visible from the squashed filesystem representation (what can be seen only within the container at runtime) SquashedScope Scope = "Squashed" - // AllLayersScope indicates to catalog content on all layers, irregardless if it is visible from the container at runtime. + // AllLayersScope indicates to catalog content on all layers, regardless if it is visible from the container at runtime. AllLayersScope Scope = "AllLayers" + // SquashedWithAllLayersScope indicates to catalog content on all layers, but only include content visible from the squashed filesystem representation. + SquashedWithAllLayersScope Scope = "SquashedWithAllLayers" ) // AllScopes is a slice containing all possible scope options var AllScopes = []Scope{ SquashedScope, AllLayersScope, + SquashedWithAllLayersScope, } // ParseScope returns a scope as indicated from the given string. @@ -27,6 +30,8 @@ func ParseScope(userStr string) Scope { return SquashedScope case "all-layers", strings.ToLower(AllLayersScope.String()): return AllLayersScope + case "squashed-with-all-layers", strings.ToLower(SquashedWithAllLayersScope.String()): + return SquashedWithAllLayersScope } return UnknownScope } diff --git a/syft/source/source.go b/syft/source/source.go index 29cfa461082..ad67f616695 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -466,7 +466,9 @@ func (s *Source) FileResolver(scope Scope) (FileResolver, error) { case SquashedScope: resolver, err = newImageSquashResolver(s.Image) case AllLayersScope: - resolver, err = newAllLayersResolver(s.Image) + resolver, err = newImageAllLayersResolver(s.Image) + case SquashedWithAllLayersScope: + resolver, err = newImageSquashWithAllLayersResolver(s.Image) default: return nil, fmt.Errorf("bad image scope provided: %+v", scope) }