Adding a new parser to cve-bin-tool¶
Overview¶
Parsers enhance cve-bin-tool
by helping it discover vulnerabilities for
different file types and manifest formats.
Parsers¶
The following parsers have been added to the project:
DartParser
GoParser
JavaParser
JavascriptParser
PerlParser
PhpParser
PythonParser
PythonRequirementsParser
RParser
RubyParser
RustParser
SwiftParser
BanditParser
Usage¶
To utilize these parsers, ensure that your project includes the following imports:
from cve_bin_tool.parsers.dart import DartParser
from cve_bin_tool.parsers.go import GoParser
from cve_bin_tool.parsers.java import JavaParser
from cve_bin_tool.parsers.javascript import JavascriptParser
from cve_bin_tool.parsers.perl import PerlParser
from cve_bin_tool.parsers.php import PhpParser
from cve_bin_tool.parsers.python import PythonParser, PythonRequirementsParser
from cve_bin_tool.parsers.r import RParser
from cve_bin_tool.parsers.ruby import RubyParser
from cve_bin_tool.parsers.rust import RustParser
from cve_bin_tool.parsers.swift import SwiftParser
from cve_bin_tool.parsers.bandit import BanditParser
Setting Up a New Package and Entry Point¶
To implement a new parser plugin follow these steps:
1. Create the Parser Class¶
First, create the parser class. This class should be located in the appropriate directory within your project. For example, you might place it in cve_bin_tool_parser_env/env.py
.
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import annotations
import dataclasses
import pathlib
import re
from packageurl import PackageURL
from cve_bin_tool.parsers import Parser
from cve_bin_tool.util import ProductInfo, ScanInfo
@dataclasses.dataclass
class EnvNamespaceConfig:
"""
Configuration details for environment namespace in the CVE Bin tool
Attributes:
CVE ID associated with this namespace, vendor name, product name, version of the product, file path where product is located
"""
ad_hoc_cve_id: str
vendor: str
product: str
version: str
location: str = "/usr/local/bin/product"
@dataclasses.dataclass
class EnvConfig:
"""
Configuration for multiple environment namespaces
Attributes:
A dictionary mapping namespace names to their configurations
"""
namespaces: dict[str, EnvNamespaceConfig]
class EnvParser(Parser):
"""
Parser for Python requirements files.
This parser is designed to parse Python requirements files (usually named
requirements.txt) and generate PURLs (Package URLs) for the listed packages.
"""
PARSER_MATCH_FILENAMES = [
".env",
]
@staticmethod
def parse_file_contents(contents):
"""
Parse the contents of an environment configuration file
Args:
contents(str): textual content of environment configuration file
Returns:
EnvConfig: EnvConfig instance containing parsed namespace configurations
"""
lines = list(
[
line
for line in contents.replace("\r\n", "\n").split("\n")
if line.strip() and line.startswith("CVE_BIN_TOOL_")
]
)
namespaces = {}
for i, line in enumerate(lines):
key, value = line.split("=", maxsplit=1)
namespace, key = key[len("CVE_BIN_TOOL_") :].split("_", maxsplit=1)
if value.startswith('"'):
value = value[1:]
if value.endswith('"'):
value = value[:-1]
namespaces.setdefault(namespace, {})
namespaces[namespace][key.lower()] = value
for namespace, config in namespaces.items():
namespaces[namespace] = EnvNamespaceConfig(**config)
return EnvConfig(namespaces=namespaces)
def run_checker(self, filename):
"""
Parse the .env file and yield ScanInfo objects for the listed packages.
Args:
filename (str): The path to the .env file.
Yields:
str: ScanInfo objects for the packages listed in the file.
"""
self.filename = filename
contents = pathlib.Path(self.filename).read_text()
env_config = self.parse_file_contents(contents)
data_source = "environment"
affected_data = [
{
"cve_id": cve.ad_hoc_cve_id,
"vendor": cve.vendor,
"product": cve.product,
# TODO Version MUST be unique to this bug!
"version": cve.version,
"versionStartIncluding": "",
# "versionStartIncluding": cve.version,
"versionStartExcluding": "",
"versionEndIncluding": "",
# "versionEndIncluding": cve.version,
"versionEndExcluding": "",
}
for _namespace, cve in env_config.namespaces.items()
]
severity_data = [
{
"ID": cve.ad_hoc_cve_id,
# TODO severity
"severity": "LOW",
# TODO description
"description": "TODO",
# TODO score
"score": 0,
# TODO CVSS_version
"CVSS_version": 3,
# TODO CVSS_vector
"CVSS_vector": "",
"last_modified": "",
}
for _namespace, cve in env_config.namespaces.items()
]
with self.cve_db.with_cursor() as cursor:
self.cve_db.populate_cve_metrics(severity_data, cursor)
self.cve_db.populate_severity(severity_data, cursor, data_source)
self.cve_db.populate_affected(affected_data, cursor, data_source)
for _namespace, cve in env_config.namespaces.items():
yield ScanInfo(
ProductInfo(
cve.vendor,
cve.product,
cve.version,
cve.location,
PackageURL(
type="ad-hoc",
namespace=cve.vendor,
name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(),
version=cve.version,
qualifiers={},
subpath=None,
),
),
pathlib.Path(filename).resolve(),
)
2. Set Up setup.py
¶
Next, configure the setup.py
file boilerplate.
import site
import sys
import setuptools
# See https://github.com/pypa/pip/issues/7953
site.ENABLE_USER_SITE = "--user" in sys.argv[1:]
setuptools.setup(use_scm_version=True)
3. Set Up setup.cfg
¶
Next, configure the setup.cfg
file to include your new parser as an entry point. This allows the parser to be dynamically discovered and used by the project.
[metadata]
name = cve-bin-tool-parser-env
version = 1.0.0
description = CVE Binary Tool: Parser: .env
[options]
packages = find:
entry_points = file: entry_points.txt
setup_requires =
setuptools_scm[toml]>=3.4.3
4. Create entry_points.txt
¶
You may also need to configure an entry_points.txt
file if your project uses it to manage entry points.
[cve_bin_tool.parsers]
env = cve_bin_tool_parser_env.env:EnvParser
5. Install your plugin¶
You need to activate your virtualenv before installing if you set one up.
$ touch cve_bin_tool_parser_env/__init__.py
$ git init
$ python -m pip install -e .
6. Populate the to-be-parsed file¶
In this example we implemented the EnvParser
which is the standard
/etc/environment
style format, let’s save the following as .env
.
CVE_BIN_TOOL_0_PRODUCT="myproduct"
CVE_BIN_TOOL_0_VENDOR="myvendor"
CVE_BIN_TOOL_0_VERSION="v0.0.0.dev-15abff2d529396937e18c657ecee1ed224842000"
CVE_BIN_TOOL_0_AD_HOC_CVE_ID="CVE-0001-15004435-aa84-43ff-9c26-f703a26069f8"
7. Run cve-bin-tool
and see your plugin’s findings¶
Let’s test that our defined CVE comes up by scanning a .env
file.
$ cve-bin-tool --log debug .env
Advanced Example: Ad-Hoc CVEs¶
For more information see: https://github.com/ossf/wg-vulnerability-disclosures/issues/94
1. Create the Parser Class¶
First, create the parser class. This class should be located in the appropriate directory within your project. For example, you might place it in cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py
.
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import annotations
import dataclasses
import json
import os
import pathlib
import re
import subprocess
import sys
import uuid
import yaml
from packageurl import PackageURL
from cve_bin_tool.parsers import Parser
from cve_bin_tool.util import ProductInfo, ScanInfo
@dataclasses.dataclass
class BanditNamespaceConfig:
ad_hoc_cve_id: str
vendor: str
product: str
version: str
location: str
description: str
severity: str
score: float
@dataclasses.dataclass
class BanditConfig:
namespaces: dict[str, BanditNamespaceConfig]
class BanditParser(Parser):
"""
Parser for Python requirements files.
This parser is designed to parse Python requirements files (usually named
requirements.txt) and generate PURLs (Package URLs) for the listed packages.
"""
PARSER_MATCH_FILENAMES = [
".py",
]
@staticmethod
def parse_bandit_output(filename, contents):
username = os.environ.get("USER", "unknown-user")
config_gh_hosts_yaml_path = pathlib.Path(
"~", ".config", "gh", "hosts.yml"
).expanduser()
if config_gh_hosts_yaml_path.exists():
# GitHub username if gh CLI installed
config_gh_hosts_yaml = yaml.safe_load(config_gh_hosts_yaml_path.read_text())
platform = "github.com"
username = config_gh_hosts_yaml[platform]["user"]
vendor = f"username:{username}:platform:{platform}"
product = f"filepath:{filename}"
version = f"v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-{uuid.uuid4()}"
contents = json.loads(contents)
errors = contents.get("errors", [])
if errors:
raise Exception(json.dumps(contents))
namespaces = {}
for i, result in enumerate(contents.get("results", [])):
# Version is the same when code at location matches code from output
result["issue_text"]
result["code"]
# TODO Replace UUID with with SCITT URN
# SCITT A.4.2
ad_hoc_cve_id = f"CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...{i}...qnGmr1o"
# TODO Sort by something, line? Int of content address?
namespace = f"bandit-{i}"
# TODO Take vendor product and version automatically from git repo
# or installed pypi package meta-info.
namespaces[namespace] = BanditNamespaceConfig(
ad_hoc_cve_id=ad_hoc_cve_id,
vendor=vendor,
product=product,
version=version,
severity="LOW",
score=0.0,
location=result["line_number"],
description=json.dumps(result),
)
return BanditConfig(namespaces=namespaces)
def run_checker(self, filename):
"""
Parse the .bandit file and yield ScanInfo objects for the listed packages.
Args:
filename (str): The path to the .bandit file.
Yields:
str: ScanInfo objects for the packages listed in the file.
"""
file_path = pathlib.Path(filename).resolve()
cmd = [
sys.executable,
"-um",
"bandit",
"-f",
"json",
"--exit-zero",
"--",
# TODO Relative paths? Need top level directory being scanned
str(file_path),
]
try:
stdout = subprocess.check_output(
cmd,
)
except subprocess.CalledProcessError as error:
raise Exception(error.stderr) from error
bandit_config = self.parse_bandit_output(filename, stdout)
# TODO Create SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE
# by making a request to the poligy engine and getting it's workflow
# manifest as output and deriving from that or extend it to return that.
data_source = "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE"
affected_data = []
severity_data = []
for _namespace, cve in bandit_config.namespaces.items():
affected_data.append(
{
"cve_id": cve.ad_hoc_cve_id,
"vendor": cve.vendor,
"product": cve.product,
# TODO Version MUST be unique to this bug!
"version": cve.version,
"versionStartIncluding": "",
# "versionStartIncluding": cve.version,
"versionStartExcluding": "",
"versionEndIncluding": "",
# "versionEndIncluding": cve.version,
"versionEndExcluding": "",
}
)
severity_data.append(
{
"ID": cve.ad_hoc_cve_id,
# TODO severity
"severity": cve.severity,
# TODO description
"description": cve.description,
# TODO score
"score": 0,
# TODO CVSS_version
"CVSS_version": 3,
# TODO CVSS_vector
"CVSS_vector": "",
# TODO Ideally this comes from bisecting and pinpointing the
# bug's introduction to the codebase
"last_modified": "",
}
)
with self.cve_db.with_cursor() as cursor:
self.cve_db.populate_cve_metrics(severity_data, cursor)
self.cve_db.populate_severity(severity_data, cursor, data_source)
self.cve_db.populate_affected(affected_data, cursor, data_source)
product_infos = {}
for _namespace, cve in bandit_config.namespaces.items():
product_infos_key = (
cve.vendor,
cve.product,
cve.version,
)
product_infos.setdefault(
product_infos_key,
ProductInfo(
cve.vendor,
cve.product,
cve.version,
cve.location,
PackageURL(
type="ad-hoc",
namespace=cve.vendor,
name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(),
version=cve.version,
qualifiers={},
subpath=None,
),
),
)
product_info = product_infos[product_infos_key]
for _namespace, cve in bandit_config.namespaces.items():
yield ScanInfo(product_info, pathlib.Path(filename).resolve())
# TODO VEX attached via linked data to ad-hoc CVE-ID
2. Set Up setup.py
¶
Next, configure the setup.py
file boilerplate.
import site
import sys
import setuptools
# See https://github.com/pypa/pip/issues/7953
site.ENABLE_USER_SITE = "--user" in sys.argv[1:]
setuptools.setup(use_scm_version=True)
3. Set Up setup.cfg
¶
Next, configure the setup.cfg
file to include your new parser as an entry point. This allows the parser to be dynamically discovered and used by the project.
[metadata]
name = cve-bin-tool-parser-static-analysis-bandit
version = 1.0.0
description = CVE Binary Tool: Parser: Static Analysis: Bandit
[options]
packages = find:
entry_points = file: entry_points.txt
setup_requires =
setuptools_scm[toml]>=3.4.3
4. Create entry_points.txt
¶
You may also need to configure an entry_points.txt
file if your project uses it to manage entry points.
[cve_bin_tool.parsers]
static_analysis_bandit = cve_bin_tool_parser_static_analysis_bandit.static_analysis_bandit:BanditParser
5. Install your plugin¶
You need to activate your virtualenv before installing if you set one up.
$ touch cve_bin_tool_parser_static_analysis_bandit/__init__.py
$ git init
$ python -m pip install -e .
6. Run cve-bin-tool
¶
In this example we implemented the BanditParser
which is a static
analysis tool for Python files. We’ll test that it loads by scanning
a .py
file.
$ cve-bin-tool --format json --detail -- cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py
7. View Findings¶
Let’s view our two findings, we need to decode the JSON stored in the description which will be an object describing the bug.
$ cat output.cve-bin-tool.*.json | jq '.[] | .description = (.description | fromjson)'
{
"vendor": "username:alice:platform:example.com",
"product": "filepath:example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
"version": "v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-2d8852cf-ebfd-4495-97e2-2ce23e4e557d",
"location": 11,
"cve_number": "CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...0...qnGmr1o",
"severity": "LOW",
"score": "unknown",
"source": "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE",
"cvss_version": "3",
"cvss_vector": "unknown",
"paths": "example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
"remarks": "NewFound",
"comments": "",
"description": {
"code": "10 import re\n11 import subprocess\n12 import sys\n",
"col_offset": 0,
"end_col_offset": 17,
"filename": "/home/alice/Documents/python/cve-bin-tool/example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
"issue_confidence": "HIGH",
"issue_cwe": {
"id": 78,
"link": "https://cwe.mitre.org/data/definitions/78.html"
},
"issue_severity": "LOW",
"issue_text": "Consider possible security implications associated with the subprocess module.",
"line_number": 11,
"line_range": [
11
],
"more_info": "https://bandit.readthedocs.io/en/1.7.8/blacklists/blacklist_imports.html#b404-import-subprocess",
"test_id": "B404",
"test_name": "blacklist"
}
}
{
"vendor": "username:alice:platform:example.com",
"product": "filepath:example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
"version": "v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-2d8852cf-ebfd-4495-97e2-2ce23e4e557d",
"location": 11,
"cve_number": "CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...1...qnGmr1o",
"severity": "LOW",
"score": "unknown",
"source": "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE",
"cvss_version": "3",
"cvss_vector": "unknown",
"paths": "example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
"remarks": "NewFound",
"comments": "",
"description": {
"code": "118 try:\n119 stdout = subprocess.check_output(\n120 cmd,\n121 )\n122 except subprocess.CalledProcessError as error:\n",
"col_offset": 21,
"end_col_offset": 13,
"filename": "/home/alice/Documents/python/cve-bin-tool/example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
"issue_confidence": "HIGH",
"issue_cwe": {
"id": 78,
"link": "https://cwe.mitre.org/data/definitions/78.html"
},
"issue_severity": "LOW",
"issue_text": "subprocess call - check for execution of untrusted input.",
"line_number": 119,
"line_range": [
119,
120,
121
],
"more_info": "https://bandit.readthedocs.io/en/1.7.8/plugins/b603_subprocess_without_shell_equals_true.html",
"test_id": "B603",
"test_name": "subprocess_without_shell_equals_true"
}
}
Test Implementation¶
A new test class TestParsers has been introduced to verify that the expected file types are correctly mapped to their respective parsers. The test ensures that the actual valid files match the expected valid files.
Test Method¶
test_parser_match_filenames_results_in_correct_valid_files: This test compares the EXPECTED_VALID_FILES dictionary with the actual_valid_files dictionary imported from cve_bin_tool.parsers.parse. If there is any discrepancy between the two, the test will fail, indicating that the loaded file types do not match the expected registered file types.