Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Helper CLI function for merging muliple analyzer results into one #5317

Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion helper-cli/src/main/kotlin/HelperMain.kt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import org.ossreviewtoolkit.helper.commands.ListLicensesCommand
import org.ossreviewtoolkit.helper.commands.ListPackagesCommand
import org.ossreviewtoolkit.helper.commands.ListStoredScanResultsCommand
import org.ossreviewtoolkit.helper.commands.MapCopyrightsCommand
import org.ossreviewtoolkit.helper.commands.MergeAnalyzerResultsCommand
import org.ossreviewtoolkit.helper.commands.MergeRepositoryConfigurationsCommand
import org.ossreviewtoolkit.helper.commands.SetDependencyRepresentationCommand
import org.ossreviewtoolkit.helper.commands.SetLabelsCommand
Expand Down Expand Up @@ -101,7 +102,8 @@ internal class HelperMain : CliktCommand(name = ORTH_NAME, epilog = "* denotes r
SetLabelsCommand(),
SubtractScanResultsCommand(),
TransformResultCommand(),
VerifySourceArtifactCurationsCommand()
VerifySourceArtifactCurationsCommand(),
MergeAnalyzerResultsCommand()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please put this in alphabetical order.

)
}

Expand Down
291 changes: 291 additions & 0 deletions helper-cli/src/main/kotlin/commands/MergeAnalyzerResultsCommand.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
/*
* Copyright (C) 2021 Porsche AG
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copyright year should probably be 2021-2022.

*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
* License-Filename: LICENSE
*/

package org.ossreviewtoolkit.helper.commands

import com.github.ajalt.clikt.core.CliktCommand
import com.github.ajalt.clikt.parameters.options.convert
import com.github.ajalt.clikt.parameters.options.option
import com.github.ajalt.clikt.parameters.options.required
import com.github.ajalt.clikt.parameters.options.split
import com.github.ajalt.clikt.parameters.types.file

import java.io.File
import java.time.Clock
import java.time.Instant
import java.time.ZonedDateTime
import java.util.LinkedList
import java.util.SortedMap
import java.util.SortedSet
import java.util.TreeMap
import java.util.TreeSet

import org.ossreviewtoolkit.model.AnalyzerResult
import org.ossreviewtoolkit.model.AnalyzerRun
import org.ossreviewtoolkit.model.Identifier
import org.ossreviewtoolkit.model.OrtIssue
import org.ossreviewtoolkit.model.OrtResult
import org.ossreviewtoolkit.model.PackageReference
import org.ossreviewtoolkit.model.Project
import org.ossreviewtoolkit.model.Repository
import org.ossreviewtoolkit.model.Scope
import org.ossreviewtoolkit.model.VcsInfo
import org.ossreviewtoolkit.model.config.AnalyzerConfiguration
import org.ossreviewtoolkit.model.config.Curations
import org.ossreviewtoolkit.model.config.Excludes
import org.ossreviewtoolkit.model.config.LicenseChoices
import org.ossreviewtoolkit.model.config.RepositoryConfiguration
import org.ossreviewtoolkit.model.config.Resolutions
import org.ossreviewtoolkit.model.readValue
import org.ossreviewtoolkit.model.writeValue
import org.ossreviewtoolkit.utils.common.expandTilde
import org.ossreviewtoolkit.utils.core.Environment

class MergeAnalyzerResultsCommand : CliktCommand(
help = "Read multiple analyzer result files and merge them into one combined analyzer result file."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please mention the most important limitations of the command here, esp. the flattening of the dependency tree.

) {
companion object {
private val utcClock = Clock.systemUTC()
private fun now(): Instant = ZonedDateTime.now(utcClock).toInstant()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is not required, you can just use Instant.now() like in the rest of the codebase.

}

private val inputAnalyzerResultFiles by option(
"--input-analyzer-result-files", "-i",
help = "A comma separated list of analyzer result files to be merged."
).convert { File(it.expandTilde()).absoluteFile.normalize() }.split(",").required()

private val outputAnalyzerResultFile by option(
"--output-analyzer-result-file", "-o",
help = "The output analyer file."
).convert { it.expandTilde() }
.file(mustExist = false, canBeFile = true, canBeDir = false, mustBeWritable = false, mustBeReadable = false)
.convert { it.absoluteFile.normalize() }
.required()

override fun run() {
val inputOrtResults: MutableList<AnalyzerRun> = LinkedList()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use a name that fits the values like inputAnalyzerRuns.

val inputRepositories: MutableList<Repository> = LinkedList()

inputAnalyzerResultFiles.stream()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to use stream() in Kotlin (same several times below).

.map { it.readValue<OrtResult>() }
.forEach {
it.analyzer.let { value -> inputOrtResults.add(value!!) }
inputRepositories.add(it.repository)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better use below code to avoid !! and add the repository only if the result has an analyzer result:

it.analyzer?.let { analyzerRun ->
  inputOrtResults.add(analyzerRun)
  inputRepositories.add(it.repository)
}

}

val analyzerRun = AnalyzerRun(
startTime = now(),
endTime = now(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned above, use Instant.now(). Bet better would be to use the actual times from the input analyzer runs.

environment = Environment(),
config = aggregateAnalyzerConfiguration(inputOrtResults),
result = aggregrateAnalyzerRun(inputOrtResults)
)

val repository = Repository(
vcs = analyzerRun.result.projects.first().vcs,
vcsProcessed = analyzerRun.result.projects.first().vcsProcessed,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ORT requires in several places that it can find the relative path of a project to the analyzer root repository. This requirement cannot be fulfilled when merging multiple analyzer result files from independent repositories, because the model was never designed to support this. Taking the VCS of the arbitrary first project here will cause issues with the handling of license findings and repository configurations in later steps, this should at least be mentioned in the documentation.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This limitation is well-known and therefore the merge code should assert that all merged analyzer results orignate from the same 'root' VCS.
I just realized that I have not implemented that in the first pull request

The helper command was by the need to handle the case of a multiple product teams working on individual project directories in a shared repository with each team delivering its own analyzer result.
From a logical perspective, it would not make any sense to merge analyzer results from arbitary sources.

config = aggregateRepositoryConfigurations(inputRepositories)
)

val outputFile = OrtResult(
repository = repository,
analyzer = analyzerRun
)

outputAnalyzerResultFile.writeValue(outputFile)
}

private fun aggregateRepositoryConfigurations(repositories: List<Repository>): RepositoryConfiguration {
fun mergeExcludes(leftExlcudes: Excludes, rightExcludes: Excludes): Excludes = Excludes(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo in leftExlcudes.

paths = (leftExlcudes.paths + rightExcludes.paths).distinct(),
scopes = (leftExlcudes.scopes + rightExcludes.scopes).distinct()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Merging excludes can lead to unexpected results, if excludes from on analyzer result accidentally match content from another analyzer result.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Point taken. The excludes will not be merged then

)

fun mergeResolutions(leftResolutions: Resolutions, rightResolutions: Resolutions): Resolutions =
Resolutions(
issues = (leftResolutions.issues + rightResolutions.issues).distinct(),
ruleViolations = (leftResolutions.ruleViolations + rightResolutions.ruleViolations).distinct(),
vulnerabilities = (leftResolutions.vulnerabilities + rightResolutions.vulnerabilities).distinct()
)

fun mergeCurations(leftCurations: Curations, rightCurations: Curations): Curations =
Curations(
licenseFindings = (leftCurations.licenseFindings + rightCurations.licenseFindings).distinct()
)

fun mergeLicenseChoices(leftChoices: LicenseChoices, rightChoices: LicenseChoices): LicenseChoices =
LicenseChoices(
repositoryLicenseChoices = (
leftChoices.repositoryLicenseChoices
+ rightChoices.repositoryLicenseChoices
).distinct(),
packageLicenseChoices = (
leftChoices.packageLicenseChoices
+ rightChoices.packageLicenseChoices
).distinct()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Merging license choices can lead to unexpected results, if they are contradictory, or if license choices from one analyzer result apply to findings from another analyzer result.
There are more similar consistency issues in the code below where data is lost because the model was not designed for this use case.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be true but we chose to leave that problem to the analyst to sort it out.

Anyway, it is bad sign if project team choose contradicting license statements

)

return repositories.stream()
.map { it.config }
.reduce { leftConfig, rightConfig ->
RepositoryConfiguration(
excludes = mergeExcludes(leftConfig.excludes, rightConfig.excludes),
resolutions = mergeResolutions(leftConfig.resolutions, rightConfig.resolutions),
curations = mergeCurations(leftConfig.curations, rightConfig.curations),
licenseChoices = mergeLicenseChoices(leftConfig.licenseChoices, rightConfig.licenseChoices)
)
}
.orElse(RepositoryConfiguration())
}

private fun aggregateAnalyzerConfiguration(inputOrtResults: List<AnalyzerRun>): AnalyzerConfiguration {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consistently use either plural or singular for the aggregate function names, but don't mix them.

return inputOrtResults.stream()
.map { it.config }
.reduce { a, b ->
AnalyzerConfiguration(
sw360Configuration = if (a.sw360Configuration != null)
a.sw360Configuration
else
b.sw360Configuration,
allowDynamicVersions = a.allowDynamicVersions or b.allowDynamicVersions
)
}.orElse(AnalyzerConfiguration(allowDynamicVersions = false))
}

private fun aggregrateAnalyzerRun(inputOrtResults: List<AnalyzerRun>): AnalyzerResult {
return inputOrtResults.stream()
.map { it.result }
.reduce { leftResult, rightResult ->
AnalyzerResult(
projects = wrapValueInSortedSet(
mergeProjects(
mergeSortedSets(
leftResult.projects,
rightResult.projects
)
)
),
packages = mergeSortedSets(leftResult.packages, rightResult.packages),
issues = mergeIssues(leftResult.issues, rightResult.issues)
)
}.orElse(AnalyzerResult.EMPTY)
}

private fun <T> mergeSortedSets(left: SortedSet<T>, right: SortedSet<T>): SortedSet<T> {
val set = TreeSet<T>()

set.addAll(left)
set.addAll(right)

return set
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be shorter:

sortedSetOf().apply {
  addAll(left)
  addAll(right)
}


private fun mergeIssues(
left: SortedMap<Identifier, List<OrtIssue>>,
right: SortedMap<Identifier, List<OrtIssue>>
): SortedMap<Identifier, List<OrtIssue>> {
val result = TreeMap<Identifier, List<OrtIssue>>()

result.putAll(left)

right.forEach { identifier, issuesList ->
val mergedList = LinkedList<OrtIssue>()

result.get(identifier)?.let { mergedList.addAll(it) }
mergedList.addAll(issuesList)

result.put(identifier, mergedList)
}

return result
}

private fun mergeProjects(projects: SortedSet<Project>): Project = projects
.stream()
.reduce { leftProject, rightProject ->
Project(
id = if (leftProject.id != Identifier.EMPTY) leftProject.id else rightProject.id,
definitionFilePath = firstNotEmptyString(
leftProject.definitionFilePath,
rightProject.definitionFilePath
),
authors = mergeSortedSets(leftProject.authors, rightProject.authors),
declaredLicenses = mergeSortedSets(leftProject.declaredLicenses, rightProject.declaredLicenses),
vcs = if (leftProject.vcs != VcsInfo.EMPTY) leftProject.vcs else rightProject.vcs,
homepageUrl = firstNotEmptyString(leftProject.homepageUrl, rightProject.homepageUrl),
scopeDependencies = mergeScopes(leftProject.scopes, rightProject.scopes)
)
}.orElse(Project.EMPTY)

private fun mergeScopes(leftScopes: SortedSet<Scope>, rightScopes: SortedSet<Scope>): SortedSet<Scope> {
val scopeMap = HashMap<String, Scope>()

leftScopes.forEach { scope -> scopeMap.put(scope.name, scope) }

rightScopes.forEach { scope ->
scopeMap.merge(
scope.name, scope,
{ left, right ->
Scope(
name = left.name,
dependencies = mergeSortedSets(
flattenDepdendencies(left.dependencies),
flattenDepdendencies(right.dependencies)
)
)
}
)
}

val resultScopes = TreeSet<Scope>()

resultScopes.addAll(scopeMap.values)

return resultScopes
}

private fun <T : Comparable<T>> wrapValueInSortedSet(value: T): SortedSet<T> {
val set = TreeSet<T>()

set.add(value)

return set
}

private fun firstNotEmptyString(left: String, right: String) = left.ifEmpty { right }

private fun flattenDepdendencies(dependencyTrees: SortedSet<PackageReference>): SortedSet<PackageReference> {
fun flattenDependenciesInternal(treeNode: PackageReference): SortedSet<PackageReference> {
val resultSet = TreeSet<PackageReference>()

resultSet.add(treeNode)

treeNode.dependencies.forEach { dependency -> resultSet.addAll(flattenDependenciesInternal(dependency)) }
treeNode.dependencies.clear()

return resultSet
}

val resultSet = TreeSet<PackageReference>()

dependencyTrees.forEach { dependency -> resultSet.addAll(flattenDependenciesInternal(dependency)) }

return resultSet
}
}