Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse javascript dependency trees #2142

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ require (
github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953
github.com/knqyf263/go-rpmdb v0.0.0-20230301153543-ba94b245509b
github.com/mholt/archiver/v3 v3.5.1
github.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5
github.com/microsoft/go-rustaudit v0.0.0-20220808201409-204dfee52032
github.com/mitchellh/go-homedir v1.1.0
github.com/mitchellh/hashstructure/v2 v2.0.2
github.com/mitchellh/mapstructure v1.5.0
Expand Down Expand Up @@ -76,6 +76,11 @@ require (
modernc.org/sqlite v1.26.0
)

require (
github.com/samber/lo v1.38.1
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2
)

require (
dario.cat/mergo v1.0.0 // indirect
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230106234847-43070de90fa1 // indirect
Expand Down Expand Up @@ -203,12 +208,12 @@ require (
go.opentelemetry.io/otel v1.14.0 // indirect
go.opentelemetry.io/otel/trace v1.14.0 // indirect
golang.org/x/crypto v0.14.0 // indirect
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
golang.org/x/sync v0.3.0 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/term v0.13.0 // indirect
golang.org/x/text v0.13.0 // indirect
golang.org/x/tools v0.13.0 // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect
google.golang.org/grpc v1.55.0 // indirect
google.golang.org/protobuf v1.30.0 // indirect
Expand Down
8 changes: 6 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -543,8 +543,8 @@ github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQ
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo=
github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4=
github.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5 h1:tQRHcLQwnwrPq2j2Qra/NnyjyESBGwdeBeVdAE9kXYg=
github.com/microsoft/go-rustaudit v0.0.0-20220730194248-4b17361d90a5/go.mod h1:vYT9HE7WCvL64iVeZylKmCsWKfE+JZ8105iuh2Trk8g=
github.com/microsoft/go-rustaudit v0.0.0-20220808201409-204dfee52032 h1:TLygBUBxikNJJfLwgm+Qwdgq1FtfV8Uh7bcxRyTzK8s=
github.com/microsoft/go-rustaudit v0.0.0-20220808201409-204dfee52032/go.mod h1:vYT9HE7WCvL64iVeZylKmCsWKfE+JZ8105iuh2Trk8g=
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
Expand Down Expand Up @@ -673,6 +673,8 @@ github.com/saferwall/pe v1.4.7/go.mod h1:SNzv3cdgk8SBI0UwHfyTcdjawfdnN+nbydnEL7G
github.com/sagikazarmark/crypt v0.3.0/go.mod h1:uD/D+6UF4SrIR1uGEv7bBNkNqLGqUr43MRiaGWX1Nig=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM=
github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
github.com/sanity-io/litter v1.5.5 h1:iE+sBxPBzoK6uaEP5Lt3fHNgpKcHXc/A2HGETy0uJQo=
github.com/sanity-io/litter v1.5.5/go.mod h1:9gzJgR2i4ZpjZHsKvUXIRQVk7P+yM3e+jAF7bU2UI5U=
github.com/sassoftware/go-rpmutils v0.2.0 h1:pKW0HDYMFWQ5b4JQPiI3WI12hGsVoW0V8+GMoZiI/JE=
Expand Down Expand Up @@ -858,6 +860,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g=
golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
Expand Down
2 changes: 2 additions & 0 deletions syft/pkg/cataloger/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
java.NewJavaPomCataloger(),
java.NewNativeImageCataloger(),
javascript.NewLockCataloger(),
javascript.NewJavaScriptCataloger(),
nix.NewStoreCataloger(),
php.NewComposerLockCataloger(),
portage.NewPortageCataloger(),
Expand Down Expand Up @@ -125,6 +126,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
java.NewJavaPomCataloger(),
java.NewNativeImageCataloger(),
javascript.NewLockCataloger(),
javascript.NewJavaScriptCataloger(),
javascript.NewPackageCataloger(),
kernel.NewLinuxKernelCataloger(cfg.LinuxKernel),
nix.NewStoreCataloger(),
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/common/cpe/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ func candidateVendors(p pkg.Package) []string {
case pkg.ApkMetadataType:
vendors.union(candidateVendorsForAPK(p))
case pkg.NpmPackageJSONMetadataType:
vendors.union(candidateVendorsForJavascript(p))
vendors.union(candidateVendorsForJavaScript(p))
}

// We should no longer be generating vendor candidates with these values ["" and "*"]
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/common/cpe/javascript.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package cpe

import "github.com/anchore/syft/syft/pkg"

func candidateVendorsForJavascript(p pkg.Package) fieldCandidateSet {
func candidateVendorsForJavaScript(p pkg.Package) fieldCandidateSet {
if p.MetadataType != pkg.NpmPackageJSONMetadataType {
return nil
}
Expand Down
167 changes: 166 additions & 1 deletion syft/pkg/cataloger/generic/cataloger.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package generic

import (
"path/filepath"

"github.com/bmatcuk/doublestar/v4"

"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
Expand All @@ -10,19 +14,112 @@ import (
)

type processor func(resolver file.Resolver, env Environment) []request
type groupedProcessor func(resolver file.Resolver, env Environment) []groupedRequest

type request struct {
file.Location
Parser
}

type groupedRequest struct {
Locations []file.Location
PrimaryFileLocation file.Location
GroupedParser
}

// Cataloger implements the Catalog interface and is responsible for dispatching the proper parser function for
// a given path or glob pattern. This is intended to be reusable across many package cataloger types.
type Cataloger struct {
processor []processor
upstreamCataloger string
}

// GroupedCataloger is a special case of Cataloger that will process files together
// this is needed for the case of package.json and package-lock.json files for example
type GroupedCataloger struct {
groupedProcessor []groupedProcessor
upstreamCataloger string
}

func (c *GroupedCataloger) Name() string {
return c.upstreamCataloger
}

func isPrimaryFileGlobPresent(primaryFileGlob string, globs []string) bool {
for _, g := range globs {
if g == primaryFileGlob {
return true
}
}
return false
}

func generateGroupedProcessor(parser GroupedParser, primaryFileGlob string, globs []string) func(resolver file.Resolver, env Environment) []groupedRequest {
return func(resolver file.Resolver, env Environment) []groupedRequest {
var requests []groupedRequest
colocatedFiles := collectColocatedFiles(resolver, globs)

// Filter to only directories that contain all specified files
for _, files := range colocatedFiles {
allMatched, primaryFileLocation := isAllGlobsMatched(files, globs, primaryFileGlob)
if allMatched {
requests = append(requests, makeGroupedRequests(parser, files, primaryFileLocation))
}
}

return requests
}
}

func collectColocatedFiles(resolver file.Resolver, globs []string) map[string][]file.Location {
colocatedFiles := make(map[string][]file.Location)
for _, g := range globs {
log.WithFields("glob", g).Trace("searching for paths matching glob")
matches, err := resolver.FilesByGlob(g)
if err != nil {
log.Warnf("unable to process glob=%q: %+v", g, err)
continue
}
for _, match := range matches {
dir := filepath.Dir(match.RealPath)
colocatedFiles[dir] = append(colocatedFiles[dir], match)
}
}
return colocatedFiles
}

func isAllGlobsMatched(files []file.Location, globs []string, primaryFileGlob string) (bool, file.Location) {
globMatches := make(map[string]bool)
var primaryFileLocation file.Location

for _, g := range globs {
for _, file := range files {
if matched, _ := doublestar.PathMatch(g, file.RealPath); matched {
if g == primaryFileGlob {
primaryFileLocation = file
}
globMatches[g] = true
break
}
}
}

return len(globMatches) == len(globs), primaryFileLocation
}

// WithParserByGlobColocation is a special case of WithParserByGlob that will only match files that are colocated
// with all of the provided globs. This is useful for cases where a package is defined by multiple files (e.g. package.json + package-lock.json).
// This function will only match files that are colocated with all of the provided globs.
func (c *GroupedCataloger) WithParserByGlobColocation(parser GroupedParser, primaryFileGlob string, globs []string) *GroupedCataloger {
if !isPrimaryFileGlobPresent(primaryFileGlob, globs) {
log.Warnf("primary file glob=%q not present in globs=%+v", primaryFileGlob, globs)
return c
}

c.groupedProcessor = append(c.groupedProcessor, generateGroupedProcessor(parser, primaryFileGlob, globs))
return c
}

func (c *Cataloger) WithParserByGlobs(parser Parser, globs ...string) *Cataloger {
c.processor = append(c.processor,
func(resolver file.Resolver, env Environment) []request {
Expand All @@ -43,6 +140,69 @@ func (c *Cataloger) WithParserByGlobs(parser Parser, globs ...string) *Cataloger
return c
}

// selectFiles takes a set of file trees and resolves and file references of interest for future cataloging
func (c *GroupedCataloger) selectFiles(resolver file.Resolver) []groupedRequest {
var requests []groupedRequest
for _, proc := range c.groupedProcessor {
requests = append(requests, proc(resolver, Environment{})...)
}
return requests
}

// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source.
func (c *GroupedCataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
var packages []pkg.Package
var relationships []artifact.Relationship

logger := log.Nested("cataloger", c.upstreamCataloger)

env := Environment{
// TODO: consider passing into the cataloger, this would affect the cataloger interface (and all implementations). This can be deferred until later.
LinuxRelease: linux.IdentifyRelease(resolver),
}

for _, req := range c.selectFiles(resolver) {
parser := req.GroupedParser
var readClosers []file.LocationReadCloser

for _, location := range req.Locations {
log.WithFields("path", location.RealPath).Trace("parsing file contents")
contentReader, err := resolver.FileContentsByLocation(location)
if err != nil {
logger.WithFields("location", location.RealPath, "error", err).Warn("unable to fetch contents")
continue
}
readClosers = append(readClosers, file.NewLocationReadCloser(location, contentReader))
}

// If your parser is expecting multiple file contents, ensure its signature reflects this change
discoveredPackages, discoveredRelationships, err := parser(resolver, &env, readClosers)
for _, rc := range readClosers {
internal.CloseAndLogError(rc, rc.VirtualPath)
}
if err != nil {
logger.WithFields("error", err).Warnf("cataloger failed")
continue
}

for _, p := range discoveredPackages {
p.FoundBy = c.upstreamCataloger
packages = append(packages, p)
}

relationships = append(relationships, discoveredRelationships...)
}
return packages, relationships, nil
}

func makeGroupedRequests(parser GroupedParser, locations []file.Location, primaryFileLocation file.Location) groupedRequest {
return groupedRequest{
Locations: locations,
PrimaryFileLocation: primaryFileLocation,
GroupedParser: parser,
}
}

func (c *Cataloger) WithParserByMimeTypes(parser Parser, types ...string) *Cataloger {
c.processor = append(c.processor,
func(resolver file.Resolver, env Environment) []request {
Expand Down Expand Up @@ -98,6 +258,12 @@ func NewCataloger(upstreamCataloger string) *Cataloger {
}
}

func NewGroupedCataloger(upstreamCataloger string) *GroupedCataloger {
return &GroupedCataloger{
upstreamCataloger: upstreamCataloger,
}
}

// Name returns a string that uniquely describes the upstream cataloger that this Generic Cataloger represents.
func (c *Cataloger) Name() string {
return c.upstreamCataloger
Expand Down Expand Up @@ -125,7 +291,6 @@ func (c *Cataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.R
logger.WithFields("location", location.RealPath, "error", err).Warn("unable to fetch contents")
continue
}

discoveredPackages, discoveredRelationships, err := parser(resolver, &env, file.NewLocationReadCloser(location, contentReader))
internal.CloseAndLogError(contentReader, location.VirtualPath)
if err != nil {
Expand Down
35 changes: 35 additions & 0 deletions syft/pkg/cataloger/generic/cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,41 @@ import (
"github.com/anchore/syft/syft/pkg"
)

func Test_WithParserByGlobColocation(t *testing.T) {
matchedFilesPaths := make(map[string]bool)
parser := func(resolver file.Resolver, env *Environment, readers []file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var packages []pkg.Package
var relationships []artifact.Relationship

for _, reader := range readers {
matchedFilesPaths[reader.AccessPath()] = true
}
return packages, relationships, nil
}

upstream := "colocation-cataloger"
expectedCollocatedPaths := []string{
"test-fixtures/pkg-json/package.json",
"test-fixtures/pkg-json/package-lock.json",
}

resolver := file.NewMockResolverForPaths(expectedCollocatedPaths...)

cataloger := NewGroupedCataloger(upstream).
WithParserByGlobColocation(parser, "**/package-lock.json", []string{"**/package.json", "**/package-lock.json"})

_, _, err := cataloger.Catalog(resolver)
assert.NoError(t, err)

for path := range matchedFilesPaths {
t.Logf("Matched file path: %s", path) // Log each matched file
}

// Assert that the expected files were matched
require.True(t, matchedFilesPaths["test-fixtures/pkg-json/package.json"])
require.True(t, matchedFilesPaths["test-fixtures/pkg-json/package-lock.json"])
}

func Test_Cataloger(t *testing.T) {
allParsedPaths := make(map[string]bool)
parser := func(resolver file.Resolver, env *Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
Expand Down
1 change: 1 addition & 0 deletions syft/pkg/cataloger/generic/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ type Environment struct {
}

type Parser func(file.Resolver, *Environment, file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error)
type GroupedParser func(file.Resolver, *Environment, []file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error)
Empty file.
Empty file.
Loading
Loading