Adding a new parser to cve-bin-tool

Overview

Parsers enhance cve-bin-tool by helping it discover vulnerabilities for different file types and manifest formats.

Parsers

The following parsers have been added to the project:

  • DartParser

  • GoParser

  • JavaParser

  • JavascriptParser

  • PerlParser

  • PhpParser

  • PythonParser

  • PythonRequirementsParser

  • RParser

  • RubyParser

  • RustParser

  • SwiftParser

  • BanditParser

Usage

To utilize these parsers, ensure that your project includes the following imports:

from cve_bin_tool.parsers.dart import DartParser
from cve_bin_tool.parsers.go import GoParser
from cve_bin_tool.parsers.java import JavaParser
from cve_bin_tool.parsers.javascript import JavascriptParser
from cve_bin_tool.parsers.perl import PerlParser
from cve_bin_tool.parsers.php import PhpParser
from cve_bin_tool.parsers.python import PythonParser, PythonRequirementsParser
from cve_bin_tool.parsers.r import RParser
from cve_bin_tool.parsers.ruby import RubyParser
from cve_bin_tool.parsers.rust import RustParser
from cve_bin_tool.parsers.swift import SwiftParser
from cve_bin_tool.parsers.bandit import BanditParser

Setting Up a New Package and Entry Point

To implement a new parser plugin follow these steps:

1. Create the Parser Class

First, create the parser class. This class should be located in the appropriate directory within your project. For example, you might place it in cve_bin_tool_parser_env/env.py.

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: GPL-3.0-or-later

from __future__ import annotations

import dataclasses
import pathlib
import re

from packageurl import PackageURL

from cve_bin_tool.parsers import Parser
from cve_bin_tool.util import ProductInfo, ScanInfo


@dataclasses.dataclass
class EnvNamespaceConfig:
    """
    Configuration details for environment namespace in the CVE Bin tool
    Attributes:
        CVE ID associated with this namespace, vendor name, product name, version of the product, file path where product is located
    """

    ad_hoc_cve_id: str
    vendor: str
    product: str
    version: str
    location: str = "/usr/local/bin/product"


@dataclasses.dataclass
class EnvConfig:
    """
    Configuration for multiple environment namespaces
    Attributes:
        A dictionary mapping namespace names to their configurations
    """

    namespaces: dict[str, EnvNamespaceConfig]


class EnvParser(Parser):
    """
    Parser for Python requirements files.
    This parser is designed to parse Python requirements files (usually named
    requirements.txt) and generate PURLs (Package URLs) for the listed packages.
    """

    PARSER_MATCH_FILENAMES = [
        ".env",
    ]

    @staticmethod
    def parse_file_contents(contents):
        """
        Parse the contents of an environment configuration file
        Args:
            contents(str): textual content of environment configuration file
        Returns:
            EnvConfig: EnvConfig instance containing parsed namespace configurations
        """
        lines = list(
            [
                line
                for line in contents.replace("\r\n", "\n").split("\n")
                if line.strip() and line.startswith("CVE_BIN_TOOL_")
            ]
        )
        namespaces = {}
        for i, line in enumerate(lines):
            key, value = line.split("=", maxsplit=1)
            namespace, key = key[len("CVE_BIN_TOOL_") :].split("_", maxsplit=1)
            if value.startswith('"'):
                value = value[1:]
            if value.endswith('"'):
                value = value[:-1]
            namespaces.setdefault(namespace, {})
            namespaces[namespace][key.lower()] = value
        for namespace, config in namespaces.items():
            namespaces[namespace] = EnvNamespaceConfig(**config)
        return EnvConfig(namespaces=namespaces)

    def run_checker(self, filename):
        """
        Parse the .env file and yield ScanInfo objects for the listed packages.
        Args:
            filename (str): The path to the .env file.
        Yields:
            str: ScanInfo objects for the packages listed in the file.
        """
        self.filename = filename
        contents = pathlib.Path(self.filename).read_text()

        env_config = self.parse_file_contents(contents)

        data_source = "environment"
        affected_data = [
            {
                "cve_id": cve.ad_hoc_cve_id,
                "vendor": cve.vendor,
                "product": cve.product,
                # TODO Version MUST be unique to this bug!
                "version": cve.version,
                "versionStartIncluding": "",
                # "versionStartIncluding": cve.version,
                "versionStartExcluding": "",
                "versionEndIncluding": "",
                # "versionEndIncluding": cve.version,
                "versionEndExcluding": "",
            }
            for _namespace, cve in env_config.namespaces.items()
        ]
        severity_data = [
            {
                "ID": cve.ad_hoc_cve_id,
                # TODO severity
                "severity": "LOW",
                # TODO description
                "description": "TODO",
                # TODO score
                "score": 0,
                # TODO CVSS_version
                "CVSS_version": 3,
                # TODO CVSS_vector
                "CVSS_vector": "",
                "last_modified": "",
            }
            for _namespace, cve in env_config.namespaces.items()
        ]

        with self.cve_db.with_cursor() as cursor:
            self.cve_db.populate_cve_metrics(severity_data, cursor)
            self.cve_db.populate_severity(severity_data, cursor, data_source)
            self.cve_db.populate_affected(affected_data, cursor, data_source)

        for _namespace, cve in env_config.namespaces.items():
            yield ScanInfo(
                ProductInfo(
                    cve.vendor,
                    cve.product,
                    cve.version,
                    cve.location,
                    PackageURL(
                        type="ad-hoc",
                        namespace=cve.vendor,
                        name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(),
                        version=cve.version,
                        qualifiers={},
                        subpath=None,
                    ),
                ),
                pathlib.Path(filename).resolve(),
            )

2. Set Up setup.py

Next, configure the setup.py file boilerplate.

import site
import sys

import setuptools

# See https://github.com/pypa/pip/issues/7953
site.ENABLE_USER_SITE = "--user" in sys.argv[1:]

setuptools.setup(use_scm_version=True)

3. Set Up setup.cfg

Next, configure the setup.cfg file to include your new parser as an entry point. This allows the parser to be dynamically discovered and used by the project.

[metadata]
name = cve-bin-tool-parser-env
version = 1.0.0
description = CVE Binary Tool: Parser: .env

[options]
packages = find:
entry_points = file: entry_points.txt
setup_requires =
    setuptools_scm[toml]>=3.4.3

4. Create entry_points.txt

You may also need to configure an entry_points.txt file if your project uses it to manage entry points.

[cve_bin_tool.parsers]
env = cve_bin_tool_parser_env.env:EnvParser

5. Install your plugin

You need to activate your virtualenv before installing if you set one up.

$ touch cve_bin_tool_parser_env/__init__.py
$ git init
$ python -m pip install -e .

6. Populate the to-be-parsed file

In this example we implemented the EnvParser which is the standard /etc/environment style format, let’s save the following as .env.

CVE_BIN_TOOL_0_PRODUCT="myproduct"
CVE_BIN_TOOL_0_VENDOR="myvendor"
CVE_BIN_TOOL_0_VERSION="v0.0.0.dev-15abff2d529396937e18c657ecee1ed224842000"
CVE_BIN_TOOL_0_AD_HOC_CVE_ID="CVE-0001-15004435-aa84-43ff-9c26-f703a26069f8"

7. Run cve-bin-tool and see your plugin’s findings

Let’s test that our defined CVE comes up by scanning a .env file.

$ cve-bin-tool --log debug .env

Advanced Example: Ad-Hoc CVEs

For more information see: https://github.com/ossf/wg-vulnerability-disclosures/issues/94

1. Create the Parser Class

First, create the parser class. This class should be located in the appropriate directory within your project. For example, you might place it in cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py.

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: GPL-3.0-or-later

from __future__ import annotations

import dataclasses
import json
import os
import pathlib
import re
import subprocess
import sys
import uuid

import yaml
from packageurl import PackageURL

from cve_bin_tool.parsers import Parser
from cve_bin_tool.util import ProductInfo, ScanInfo


@dataclasses.dataclass
class BanditNamespaceConfig:
    ad_hoc_cve_id: str
    vendor: str
    product: str
    version: str
    location: str
    description: str
    severity: str
    score: float


@dataclasses.dataclass
class BanditConfig:
    namespaces: dict[str, BanditNamespaceConfig]


class BanditParser(Parser):
    """
    Parser for Python requirements files.
    This parser is designed to parse Python requirements files (usually named
    requirements.txt) and generate PURLs (Package URLs) for the listed packages.
    """

    PARSER_MATCH_FILENAMES = [
        ".py",
    ]

    @staticmethod
    def parse_bandit_output(filename, contents):
        username = os.environ.get("USER", "unknown-user")
        config_gh_hosts_yaml_path = pathlib.Path(
            "~", ".config", "gh", "hosts.yml"
        ).expanduser()
        if config_gh_hosts_yaml_path.exists():
            # GitHub username if gh CLI installed
            config_gh_hosts_yaml = yaml.safe_load(config_gh_hosts_yaml_path.read_text())
            platform = "github.com"
            username = config_gh_hosts_yaml[platform]["user"]
        vendor = f"username:{username}:platform:{platform}"
        product = f"filepath:{filename}"
        version = f"v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-{uuid.uuid4()}"

        contents = json.loads(contents)

        errors = contents.get("errors", [])
        if errors:
            raise Exception(json.dumps(contents))

        namespaces = {}
        for i, result in enumerate(contents.get("results", [])):
            # Version is the same when code at location matches code from output
            result["issue_text"]
            result["code"]

            # TODO Replace UUID with with SCITT URN
            # SCITT A.4.2
            ad_hoc_cve_id = f"CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...{i}...qnGmr1o"

            # TODO Sort by something, line? Int of content address?
            namespace = f"bandit-{i}"

            # TODO Take vendor product and version automatically from git repo
            # or installed pypi package meta-info.
            namespaces[namespace] = BanditNamespaceConfig(
                ad_hoc_cve_id=ad_hoc_cve_id,
                vendor=vendor,
                product=product,
                version=version,
                severity="LOW",
                score=0.0,
                location=result["line_number"],
                description=json.dumps(result),
            )
        return BanditConfig(namespaces=namespaces)

    def run_checker(self, filename):
        """
        Parse the .bandit file and yield ScanInfo objects for the listed packages.
        Args:
            filename (str): The path to the .bandit file.
        Yields:
            str: ScanInfo objects for the packages listed in the file.
        """
        file_path = pathlib.Path(filename).resolve()
        cmd = [
            sys.executable,
            "-um",
            "bandit",
            "-f",
            "json",
            "--exit-zero",
            "--",
            # TODO Relative paths? Need top level directory being scanned
            str(file_path),
        ]
        try:
            stdout = subprocess.check_output(
                cmd,
            )
        except subprocess.CalledProcessError as error:
            raise Exception(error.stderr) from error

        bandit_config = self.parse_bandit_output(filename, stdout)

        # TODO Create SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE
        # by making a request to the poligy engine and getting it's workflow
        # manifest as output and deriving from that or extend it to return that.
        data_source = "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE"

        affected_data = []
        severity_data = []

        for _namespace, cve in bandit_config.namespaces.items():
            affected_data.append(
                {
                    "cve_id": cve.ad_hoc_cve_id,
                    "vendor": cve.vendor,
                    "product": cve.product,
                    # TODO Version MUST be unique to this bug!
                    "version": cve.version,
                    "versionStartIncluding": "",
                    # "versionStartIncluding": cve.version,
                    "versionStartExcluding": "",
                    "versionEndIncluding": "",
                    # "versionEndIncluding": cve.version,
                    "versionEndExcluding": "",
                }
            )
            severity_data.append(
                {
                    "ID": cve.ad_hoc_cve_id,
                    # TODO severity
                    "severity": cve.severity,
                    # TODO description
                    "description": cve.description,
                    # TODO score
                    "score": 0,
                    # TODO CVSS_version
                    "CVSS_version": 3,
                    # TODO CVSS_vector
                    "CVSS_vector": "",
                    # TODO Ideally this comes from bisecting and pinpointing the
                    # bug's introduction to the codebase
                    "last_modified": "",
                }
            )

        with self.cve_db.with_cursor() as cursor:
            self.cve_db.populate_cve_metrics(severity_data, cursor)
            self.cve_db.populate_severity(severity_data, cursor, data_source)
            self.cve_db.populate_affected(affected_data, cursor, data_source)

        product_infos = {}
        for _namespace, cve in bandit_config.namespaces.items():
            product_infos_key = (
                cve.vendor,
                cve.product,
                cve.version,
            )
            product_infos.setdefault(
                product_infos_key,
                ProductInfo(
                    cve.vendor,
                    cve.product,
                    cve.version,
                    cve.location,
                    PackageURL(
                        type="ad-hoc",
                        namespace=cve.vendor,
                        name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(),
                        version=cve.version,
                        qualifiers={},
                        subpath=None,
                    ),
                ),
            )
            product_info = product_infos[product_infos_key]
            for _namespace, cve in bandit_config.namespaces.items():
                yield ScanInfo(product_info, pathlib.Path(filename).resolve())

        # TODO VEX attached via linked data to ad-hoc CVE-ID

2. Set Up setup.py

Next, configure the setup.py file boilerplate.

import site
import sys

import setuptools

# See https://github.com/pypa/pip/issues/7953
site.ENABLE_USER_SITE = "--user" in sys.argv[1:]

setuptools.setup(use_scm_version=True)

3. Set Up setup.cfg

Next, configure the setup.cfg file to include your new parser as an entry point. This allows the parser to be dynamically discovered and used by the project.

[metadata]
name = cve-bin-tool-parser-static-analysis-bandit
version = 1.0.0
description = CVE Binary Tool: Parser: Static Analysis: Bandit

[options]
packages = find:
entry_points = file: entry_points.txt
setup_requires =
    setuptools_scm[toml]>=3.4.3

4. Create entry_points.txt

You may also need to configure an entry_points.txt file if your project uses it to manage entry points.

[cve_bin_tool.parsers]
static_analysis_bandit = cve_bin_tool_parser_static_analysis_bandit.static_analysis_bandit:BanditParser

5. Install your plugin

You need to activate your virtualenv before installing if you set one up.

$ touch cve_bin_tool_parser_static_analysis_bandit/__init__.py
$ git init
$ python -m pip install -e .

6. Run cve-bin-tool

In this example we implemented the BanditParser which is a static analysis tool for Python files. We’ll test that it loads by scanning a .py file.

$ cve-bin-tool --format json --detail -- cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py

7. View Findings

Let’s view our two findings, we need to decode the JSON stored in the description which will be an object describing the bug.

$ cat output.cve-bin-tool.*.json | jq '.[] | .description = (.description | fromjson)'
{
    "vendor": "username:alice:platform:example.com",
    "product": "filepath:example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
    "version": "v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-2d8852cf-ebfd-4495-97e2-2ce23e4e557d",
    "location": 11,
    "cve_number": "CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...0...qnGmr1o",
    "severity": "LOW",
    "score": "unknown",
    "source": "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE",
    "cvss_version": "3",
    "cvss_vector": "unknown",
    "paths": "example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
    "remarks": "NewFound",
    "comments": "",
    "description": {
      "code": "10 import re\n11 import subprocess\n12 import sys\n",
      "col_offset": 0,
      "end_col_offset": 17,
      "filename": "/home/alice/Documents/python/cve-bin-tool/example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
      "issue_confidence": "HIGH",
      "issue_cwe": {
        "id": 78,
        "link": "https://cwe.mitre.org/data/definitions/78.html"
      },
      "issue_severity": "LOW",
      "issue_text": "Consider possible security implications associated with the subprocess module.",
      "line_number": 11,
      "line_range": [
        11
      ],
      "more_info": "https://bandit.readthedocs.io/en/1.7.8/blacklists/blacklist_imports.html#b404-import-subprocess",
      "test_id": "B404",
      "test_name": "blacklist"
    }
  }
{
  "vendor": "username:alice:platform:example.com",
  "product": "filepath:example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
  "version": "v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-2d8852cf-ebfd-4495-97e2-2ce23e4e557d",
  "location": 11,
  "cve_number": "CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...1...qnGmr1o",
  "severity": "LOW",
  "score": "unknown",
  "source": "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE",
  "cvss_version": "3",
  "cvss_vector": "unknown",
  "paths": "example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
  "remarks": "NewFound",
  "comments": "",
  "description": {
    "code": "118         try:\n119             stdout = subprocess.check_output(\n120                 cmd,\n121             )\n122         except subprocess.CalledProcessError as error:\n",
    "col_offset": 21,
    "end_col_offset": 13,
    "filename": "/home/alice/Documents/python/cve-bin-tool/example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py",
    "issue_confidence": "HIGH",
    "issue_cwe": {
      "id": 78,
      "link": "https://cwe.mitre.org/data/definitions/78.html"
    },
    "issue_severity": "LOW",
    "issue_text": "subprocess call - check for execution of untrusted input.",
    "line_number": 119,
    "line_range": [
      119,
      120,
      121
    ],
    "more_info": "https://bandit.readthedocs.io/en/1.7.8/plugins/b603_subprocess_without_shell_equals_true.html",
    "test_id": "B603",
    "test_name": "subprocess_without_shell_equals_true"
  }
}

Test Implementation

A new test class TestParsers has been introduced to verify that the expected file types are correctly mapped to their respective parsers. The test ensures that the actual valid files match the expected valid files.

Test Method

  • test_parser_match_filenames_results_in_correct_valid_files: This test compares the EXPECTED_VALID_FILES dictionary with the actual_valid_files dictionary imported from cve_bin_tool.parsers.parse. If there is any discrepancy between the two, the test will fail, indicating that the loaded file types do not match the expected registered file types.