Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add multiline string finder in helper script #1690

Merged
merged 16 commits into from
Jun 16, 2022
Merged
69 changes: 60 additions & 9 deletions cve_bin_tool/helper_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def __init__(
self.version_pattern: list[str] = []
self.vendor_product: list[tuple[str, str]] | None = []

self.multiline_pattern: bool = True

# for scanning files versions
self.version_scanner = VersionScanner()

Expand All @@ -75,7 +77,9 @@ def extract_and_parse_file(self, filename: str) -> list[str] | None:
LOGGER.debug(f"{clean_path} <--- this is an ELF binary")
file_content = self.version_scanner.parse_strings(filepath)

matches = self.search_pattern(file_content, self.product_name)
matches = self.search_pattern(
file_content, self.product_name, self.version_number
)

# searching for version strings in the found matches
version_string = self.search_version_string(matches)
Expand All @@ -101,18 +105,65 @@ def extract_and_parse_file(self, filename: str) -> list[str] | None:

LOGGER.debug(f"{self.filename_pattern}")

if not self.multiline_pattern:
self.version_pattern = [
x for x in self.version_pattern if "\\n" not in x
]

# to resolve case when there are no strings common with product_name in them
if self.contains_patterns:
return self.contains_patterns
return binary_string_list

def search_pattern(self, file_content: str, pattern: str) -> list[str]:
def search_pattern(
self, file_content: str, pattern: str, version_pattern: str
) -> list[str]:
"""find strings for CONTAINS_PATTERNS with product_name in them"""

file_content_list = file_content.split("\n")
matches = [
i.strip() for i in file_content_list if re.search(pattern, i, re.IGNORECASE)
]
version_pattern = rf".+{version_pattern}"
matches = []
product_matches = []
version_matches = []

for i, line in enumerate(file_content_list):
string_present = re.search(pattern, line, re.IGNORECASE)
version_present = re.search(version_pattern, line, re.IGNORECASE)
if string_present and version_present:
if line.find(".debug") != -1:
continue
matches.append(line.strip())
self.multiline_pattern = False
continue
if string_present:
product_matches.append([i, line.strip()])
if version_present:
version_matches.append([i, line.strip()])

for product_line_number, product in product_matches:
matches.append(product)

for version_line_number, version in version_matches:
if not product_matches:
break

closest_product_line_number = min(
product_matches, key=lambda x: abs(x[0] - version_line_number)
)[0]
line_distance = abs(closest_product_line_number - version_line_number)
closest_products = [
x
for x in product_matches
if abs(x[0] - version_line_number) == line_distance
]
for product_line_number, product in closest_products:
line = (
"(?:(?:\\r?\\n.*?)*)".join([product, version])
if version_line_number > product_line_number
else "(?:(?:\\r?\\n.*?)*)".join([version, product])
)
matches.append(line)

LOGGER.debug(
f"found matches = {matches}"
) # TODO: regex highlight in these matched strings?
Expand All @@ -121,12 +172,11 @@ def search_pattern(self, file_content: str, pattern: str) -> list[str]:
def search_version_string(self, matched_list: list[str]) -> list[str]:
"""finds version strings from matched list"""

# TODO: add multiline string finding

pattern1 = rf"{self.product_name}(.*){self.version_number}"
pattern2 = rf"{self.version_number}(.*){self.product_name}"
# ^ this does not work for debian packages

# pattern2 = rf"{self.product_name}(.*)([0-9]+[.-][0-9]+([.-][0-9]+)?)"
# pattern3 = rf"{self.product_name}(.*)([0-9]+[.-][0-9]+([.-][0-9]+)?)"
# this matches patterns like:
# product1.2.3
# product 1.2.3
Expand All @@ -138,7 +188,8 @@ def search_version_string(self, matched_list: list[str]) -> list[str]:
version_strings = [
i
for i in matched_list
if re.search(pattern1, i, re.IGNORECASE)
if re.search(pattern1, i, re.IGNORECASE | re.DOTALL)
or re.search(pattern2, i, re.IGNORECASE | re.DOTALL)
if not i.endswith(
".debug"
) # removes .debug, so, this does not gets printed
Expand Down
Binary file not shown.
47 changes: 40 additions & 7 deletions test/test_helper_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def test_parse_filename(self, filename, product_name, version_name):
def test_scan_files_no_product(self, caplog):
args = {
"filenames": [
"condensed-downloads/dovecot-2.3.14-1.fc34.i686.rpm",
"condensed-downloads/dovecot-core_2.3.13+dfsg1-1ubuntu1_amd64.deb",
"test/condensed-downloads/dovecot-2.3.14-1.fc34.i686.rpm",
"test/condensed-downloads/dovecot-core_2.3.13+dfsg1-1ubuntu1_amd64.deb",
],
"product_name": None,
"version_number": None,
Expand All @@ -71,8 +71,8 @@ def test_scan_files_no_product(self, caplog):
def test_scan_files_version(self, caplog):
args = {
"filenames": [
"condensed-downloads/dovecot-2.3.14-1.fc34.i686.rpm",
"condensed-downloads/dovecot-core_2.3.13+dfsg1-1ubuntu1_amd64.deb",
"test/condensed-downloads/dovecot-2.3.14-1.fc34.i686.rpm",
"test/condensed-downloads/dovecot-core_2.3.13+dfsg1-1ubuntu1_amd64.deb",
],
"product_name": "dovecot",
"version_number": "2.3.14",
Expand All @@ -89,8 +89,8 @@ def test_scan_files_version(self, caplog):
def test_scan_files_common(self, capfd):
args = {
"filenames": [
"condensed-downloads/dovecot-2.3.14-1.fc34.i686.rpm",
"condensed-downloads/dovecot-core_2.3.13+dfsg1-1ubuntu1_amd64.deb",
"test/condensed-downloads/dovecot-2.3.14-1.fc34.i686.rpm",
"test/condensed-downloads/dovecot-core_2.3.13+dfsg1-1ubuntu1_amd64.deb",
],
"product_name": "dovecot",
"version_number": "2.3.14",
Expand All @@ -107,7 +107,7 @@ def test_scan_files_common(self, capfd):
def test_scan_files_single(self, capfd):
args = {
"filenames": [
"condensed-downloads/dovecot-2.3.14-1.fc34.i686.rpm",
"test/condensed-downloads/dovecot-2.3.14-1.fc34.i686.rpm",
],
"product_name": "dovecot",
"version_number": "2.3.14",
Expand All @@ -121,6 +121,39 @@ def test_scan_files_single(self, capfd):
assert "VERSION_PATTERNS" in out
assert "VENDOR_PRODUCT" in out

def test_scan_files_multiline(self, capfd):
args = {
"filenames": [
"test/condensed-downloads/dovecot-2.3.14-1.fc34.i686.rpm",
],
"product_name": "dovecot",
"version_number": "2.3.14",
"string_length": 30,
}

scan_files(args)
out, _ = capfd.readouterr()
out = out.split("VERSION_PATTERNS")[1]
assert "(?:(?:\\r?\\n.*?)*)" not in out

args = {
"filenames": [
"test/condensed-downloads/gnome-shell-41.2-1.fc35.x86_64.rpm",
],
"product_name": "gnome-shell",
"version_number": "41.2",
"string_length": 30,
}

scan_files(args)
out, _ = capfd.readouterr()
out = out.split("VERSION_PATTERNS")[1]
assert "(?:(?:\\r?\\n.*?)*)" in out

# @pytest.mark.parametrize("filename", [
# "bash-4.2.46-34.el7.x86_64.abc" # unsupported file type
# ])

# @pytest.mark.parametrize("filename", [
# "bash-4.2.46-34.el7.x86_64.abc" # unsupported file type
# ])
Expand Down