Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add hyperscan support #2675

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 21 additions & 15 deletions cve_bin_tool/checkers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,10 +393,11 @@ def __new__(cls, name, bases, props):
else:
cls.IGNORE_PATTERNS = list(map(re.compile, cls.IGNORE_PATTERNS))
# Compile regex
cls.CONTAINS_PATTERNS = list(map(re.compile, cls.CONTAINS_PATTERNS))
cls.VERSION_PATTERNS = list(map(re.compile, cls.VERSION_PATTERNS))
cls.FILENAME_PATTERNS = list(map(re.compile, cls.FILENAME_PATTERNS))
cls.CONTAINS_PATTERNS.extend(cls.VERSION_PATTERNS)
cls.REGEX_CONTAINS_PATTERNS = list(map(re.compile, cls.CONTAINS_PATTERNS))
cls.REGEX_VERSION_PATTERNS = list(map(re.compile, cls.VERSION_PATTERNS))
cls.REGEX_FILENAME_PATTERNS = list(map(re.compile, cls.FILENAME_PATTERNS))
cls.REGEX_CONTAINS_PATTERNS.extend(cls.REGEX_VERSION_PATTERNS)
cls.version_info = dict()
# Return the new checker class
return cls

Expand All @@ -405,26 +406,31 @@ class Checker(metaclass=CheckerMetaClass):
CONTAINS_PATTERNS: list[str] = []
VERSION_PATTERNS: list[str] = []
FILENAME_PATTERNS: list[str] = []
REGEX_CONTAINS_PATTERNS: list[str] = []
REGEX_VERSION_PATTERNS: list[str] = []
REGEX_FILENAME_PATTERNS: list[str] = []
VENDOR_PRODUCT: list[tuple[str, str]] = []
IGNORE_PATTERNS: list[str] = []

def guess_contains(self, lines):
if any(pattern.search(lines) for pattern in self.CONTAINS_PATTERNS):
if any(pattern.search(lines) for pattern in self.REGEX_CONTAINS_PATTERNS):
return True
return False

def get_version(self, lines, filename):
version_info = dict()
if any(pattern.match(filename) for pattern in self.REGEX_FILENAME_PATTERNS):
self.version_info["is_or_contains"] = "is"

if any(pattern.match(filename) for pattern in self.FILENAME_PATTERNS):
version_info["is_or_contains"] = "is"
if "is_or_contains" not in self.version_info and self.guess_contains(lines):
self.version_info["is_or_contains"] = "contains"

if "is_or_contains" not in version_info and self.guess_contains(lines):
version_info["is_or_contains"] = "contains"

if "is_or_contains" in version_info:
version_info["version"] = regex_find(
lines, self.VERSION_PATTERNS, self.IGNORE_PATTERNS
if "is_or_contains" in self.version_info:
version = regex_find(
lines, self.REGEX_VERSION_PATTERNS, self.IGNORE_PATTERNS
)

return version_info
# Don't override a "correct" version with UNKNOWN
if "version" not in self.version_info or version != "UNKNOWN":
self.version_info["version"] = version

return self.version_info
56 changes: 52 additions & 4 deletions cve_bin_tool/version_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from pathlib import Path, PurePath
from typing import Iterator

import attr
from pyperscan import Pattern, Scan, StreamDatabase

from cve_bin_tool.checkers import Checker
from cve_bin_tool.cvedb import CVEDB
from cve_bin_tool.egg_updater import IS_DEVELOP, update_egg
Expand All @@ -29,6 +32,14 @@
import importlib_resources as resources


@attr.define
class HyperscanMatchContext:
version_scanner: VersionScanner
filename: str
lines: str
task_result: dict


class InvalidFileError(Exception):
"""Filepath is invalid for scanning."""

Expand Down Expand Up @@ -73,6 +84,7 @@ def __init__(
self.validate = validate
# self.logger.info("Checkers loaded: %s" % (", ".join(self.checkers.keys())))
self.language_checkers = self.available_language_checkers()
self.hyperscan_db = None

@classmethod
def load_checkers(cls) -> dict[str, type[Checker]]:
Expand Down Expand Up @@ -239,15 +251,51 @@ def scan_file(self, filename: str) -> Iterator[ScanInfo]:

yield from self.run_checkers(filename, lines)

def build_hyperscan_database(self, checkers: Checker) -> None:
# The database must be built only once to improve performance
if self.hyperscan_db is None:
patterns = []
for dummy_checker_name, checker in self.checkers.items():
checker = checker()
checker.dummy_checker_name = dummy_checker_name
for pattern in checker.VERSION_PATTERNS + checker.CONTAINS_PATTERNS:
patterns.append(Pattern(pattern.encode(), tag=checker))
if patterns:
self.hyperscan_db = StreamDatabase(*patterns)

@staticmethod
def hyperscan_match(
context: HyperscanMatchContext, checker: Checker, offset: int, end: int
) -> Scan:
# Confirm hyperscan match with get_version as hyperscan doesn't support
# group capture. SOM_LEFTMOST is not enabled (offset is always 0)
result = checker.get_version(context.lines[offset:end], context.filename)

context.task_result[checker] = result

return Scan.Continue

def run_checkers(self, filename: str, lines: str) -> Iterator[ScanInfo]:
"""process a Set of checker objects, run them on file lines,
and yield information about detected products and versions.
It uses logging to provide debug and error information along the way."""
self.build_hyperscan_database(self.checkers)

task_result = dict()
hyperscan_context = HyperscanMatchContext(
version_scanner=self,
filename=filename,
lines=lines,
task_result=task_result,
)

if self.hyperscan_db is not None:
scanner = self.hyperscan_db.build(hyperscan_context, self.hyperscan_match)
scanner.scan(lines.encode())

# tko
for dummy_checker_name, checker in self.checkers.items():
checker = checker()
result = checker.get_version(lines, filename)
for checker in task_result:
result = task_result[checker]
dummy_checker_name = checker.dummy_checker_name
# do some magic so we can iterate over all results, even the ones that just return 1 hit
if "is_or_contains" in result:
results = [dict()]
Expand Down
1 change: 1 addition & 0 deletions requirements.csv
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ python_not_in_db,importlib_resources
vsajip_not_in_db,python-gnupg
anthonyharrison_not_in_db,lib4sbom
the_purl_authors_not_in_db,packageurl-python
vlaci_not_in_db,pyperscan
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ python-gnupg
packageurl-python
packaging
plotly
pyperscan
pyyaml>=5.4
requests
rich
Expand Down
6 changes: 3 additions & 3 deletions test/test_checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ class MyChecker(Checker):
VENDOR_PRODUCT = [("myvendor", "myproduct")]
IGNORE_PATTERNS = [r"ignore"]

assert type(MyChecker.CONTAINS_PATTERNS[0]) is Pattern
assert type(MyChecker.VERSION_PATTERNS[0]) is Pattern
assert type(MyChecker.FILENAME_PATTERNS[0]) is Pattern
assert type(MyChecker.REGEX_CONTAINS_PATTERNS[0]) is Pattern
assert type(MyChecker.REGEX_VERSION_PATTERNS[0]) is Pattern
assert type(MyChecker.REGEX_FILENAME_PATTERNS[0]) is Pattern
assert type(MyChecker.VENDOR_PRODUCT[0]) is VendorProductPair
assert type(MyChecker.IGNORE_PATTERNS[0]) is Pattern

Expand Down
Loading