diff --git a/cyclonedx/model/bom.py b/cyclonedx/model/bom.py index 7cb0081e9..3f6181c79 100644 --- a/cyclonedx/model/bom.py +++ b/cyclonedx/model/bom.py @@ -19,17 +19,14 @@ from collections.abc import Generator, Iterable from datetime import datetime from enum import Enum -from itertools import chain from typing import TYPE_CHECKING, Optional, Union from uuid import UUID, uuid4 -from warnings import warn import py_serializable as serializable from sortedcontainers import SortedSet from .._internal.compare import ComparableTuple as _ComparableTuple from .._internal.time import get_now_utc as _get_now_utc -from ..exception.model import LicenseExpressionAlongWithOthersException, UnknownComponentDependencyException from ..schema.deprecation import SchemaDeprecationWarning1Dot6 from ..schema.schema import ( SchemaVersion1Dot0, @@ -48,7 +45,7 @@ from .contact import OrganizationalContact, OrganizationalEntity from .definition import Definitions from .dependency import Dependable, Dependency -from .license import License, LicenseExpression, LicenseRepository, _LicenseRepositorySerializationHelper +from .license import License, LicenseRepository, _LicenseRepositorySerializationHelper from .lifecycle import Lifecycle, LifecycleRepository, _LifecycleRepositoryHelper from .service import Service from .tool import Tool, ToolRepository, _ToolRepositoryHelper @@ -804,67 +801,6 @@ def urn(self) -> str: # idea: have 'serial_number' be a string, and use it instead of this method return f'{_BOM_LINK_PREFIX}{self.serial_number}/{self.version}' - def validate(self) -> bool: - """ - Perform data-model level validations to make sure we have some known data integrity prior to attempting output - of this `Bom` - - Returns: - `bool` - - .. deprecated:: next - Deprecated without any replacement. - """ - # !! deprecated function. have this as an part of the normalization process, like the BomRefDiscrimator - # 0. Make sure all Dependable have a Dependency entry - if self.metadata.component: - self.register_dependency(target=self.metadata.component) - for _c in self.components: - self.register_dependency(target=_c) - for _s in self.services: - self.register_dependency(target=_s) - - # 1. Make sure dependencies are all in this Bom. - component_bom_refs = set(map(lambda c: c.bom_ref, self._get_all_components())) | set( - map(lambda s: s.bom_ref, self.services)) - dependency_bom_refs = set(chain( - (d.ref for d in self.dependencies), - chain.from_iterable(d.dependencies_as_bom_refs() for d in self.dependencies) - )) - dependency_diff = dependency_bom_refs - component_bom_refs - if len(dependency_diff) > 0: - raise UnknownComponentDependencyException( - 'One or more Components have Dependency references to Components/Services that are not known in this ' - f'BOM. They are: {dependency_diff}') - - # 2. if root component is set and there are other components: dependencies should exist for the Component - # this BOM is describing - if self.metadata.component and len(self.components) > 0 and not any(map( - lambda d: d.ref == self.metadata.component.bom_ref and len(d.dependencies) > 0, # type:ignore[union-attr] - self.dependencies - )): - warn( - f'The Component this BOM is describing {self.metadata.component.purl} has no defined dependencies ' - 'which means the Dependency Graph is incomplete - you should add direct dependencies to this ' - '"root" Component to complete the Dependency Graph data.', - category=UserWarning, stacklevel=1 - ) - - # 3. If a LicenseExpression is set, then there must be no other license. - # see https://github.com/CycloneDX/specification/pull/205 - elem: Union[BomMetaData, Component, Service] - for elem in chain( # type:ignore[assignment] - [self.metadata], - self.metadata.component.get_all_nested_components(include_self=True) if self.metadata.component else [], - chain.from_iterable(c.get_all_nested_components(include_self=True) for c in self.components), - self.services - ): - if len(elem.licenses) > 1 and any(isinstance(li, LicenseExpression) for li in elem.licenses): - raise LicenseExpressionAlongWithOthersException( - f'Found LicenseExpression along with others licenses in: {elem!r}') - - return True - def __comparable_tuple(self) -> _ComparableTuple: return _ComparableTuple(( self.serial_number, self.version, self.metadata, _ComparableTuple( diff --git a/cyclonedx/output/__init__.py b/cyclonedx/output/__init__.py index 95f66e0eb..c201ee92c 100644 --- a/cyclonedx/output/__init__.py +++ b/cyclonedx/output/__init__.py @@ -27,8 +27,10 @@ from itertools import chain from random import random from typing import TYPE_CHECKING, Any, Literal, Optional, Union, overload +from warnings import warn as _warn from ..schema import OutputFormat, SchemaVersion +from ..validation.model import ModelValidationErrorSeverity, ModelValidator if TYPE_CHECKING: # pragma: no cover from ..model.bom import Bom @@ -68,6 +70,20 @@ def get_bom(self) -> 'Bom': def set_bom(self, bom: 'Bom') -> None: self._bom = bom + def _prepare(self) -> None: + """Normalize dependency graph and validate model integrity before serialization.""" + bom = self._bom + if bom.metadata.component: + bom.register_dependency(target=bom.metadata.component) + for _c in bom.components: + bom.register_dependency(target=_c) + for _s in bom.services: + bom.register_dependency(target=_s) + for _err in ModelValidator().validate(bom): + if _err.severity is ModelValidationErrorSeverity.ERROR: + raise _err.data + _warn(str(_err.data), stacklevel=3) + @abstractmethod def generate(self, force_regeneration: bool = False) -> None: ... # pragma: no cover diff --git a/cyclonedx/output/json.py b/cyclonedx/output/json.py index 23d7a6666..028d902a1 100644 --- a/cyclonedx/output/json.py +++ b/cyclonedx/output/json.py @@ -68,8 +68,8 @@ def generate(self, force_regeneration: bool = False) -> None: 'specVersion': self.schema_version.to_version() } _view = SCHEMA_VERSIONS.get(self.schema_version_enum) + self._prepare() bom = self.get_bom() - bom.validate() with BomRefDiscriminator.from_bom(bom): bom_json: dict[str, Any] = json_loads( bom.as_json( # type:ignore[attr-defined] diff --git a/cyclonedx/output/xml.py b/cyclonedx/output/xml.py index 8c206349f..627a28c95 100644 --- a/cyclonedx/output/xml.py +++ b/cyclonedx/output/xml.py @@ -57,8 +57,8 @@ def generate(self, force_regeneration: bool = False) -> None: return _view = SCHEMA_VERSIONS[self.schema_version_enum] + self._prepare() bom = self.get_bom() - bom.validate() xmlns = self.get_target_namespace() with BomRefDiscriminator.from_bom(bom): self._bom_xml = '\n' + xml_dumps( # type:ignore[call-overload] diff --git a/cyclonedx/validation/model.py b/cyclonedx/validation/model.py index 1f8b60610..43dc9c86c 100644 --- a/cyclonedx/validation/model.py +++ b/cyclonedx/validation/model.py @@ -16,7 +16,107 @@ # Copyright (c) OWASP Foundation. All Rights Reserved. -# nothing here, yet. -# in the future this could be the place where model validation is done. -# like the current `model.bom.Bom.validate()` -# see also: https://github.com/CycloneDX/cyclonedx-python-lib/issues/455 +__all__ = ['ModelValidator', 'ModelValidationError', 'ModelValidationErrorSeverity'] + +from collections.abc import Iterable +from enum import Enum +from itertools import chain +from typing import TYPE_CHECKING, Any, Union + +from ..exception.model import LicenseExpressionAlongWithOthersException, UnknownComponentDependencyException +from . import ValidationError + +if TYPE_CHECKING: # pragma: no cover + from ..model.bom import Bom, BomMetaData + from ..model.component import Component + from ..model.service import Service + + +class ModelValidationErrorSeverity(str, Enum): + """Severity level of a :class:`ModelValidationError`.""" + + ERROR = 'error' + """BOM is structurally invalid and cannot be serialized correctly.""" + + WARNING = 'warning' + """BOM may have issues but can still be serialized; attention is recommended.""" + + +class ModelValidationError(ValidationError): + """Validation failed with this specific error. + + Use :attr:`~data` to access the content. + Use :attr:`~severity` to determine how critical the issue is. + """ + + def __init__(self, data: Any, + severity: ModelValidationErrorSeverity = ModelValidationErrorSeverity.ERROR) -> None: + super().__init__(data) + self.severity = severity + + +class ModelValidator: + """Perform data-model level validations to make sure we have some known data integrity.""" + + def validate(self, bom: 'Bom') -> Iterable[ModelValidationError]: + """Validate a :class:`~cyclonedx.model.bom.Bom` at the data-model level. + + Yields :class:`ModelValidationError` instances — one per issue found. + Errors with :attr:`~ModelValidationErrorSeverity.ERROR` severity indicate structural + invalidity; errors with :attr:`~ModelValidationErrorSeverity.WARNING` severity are + advisory. + + This method has no side-effects: it does not mutate the ``bom`` passed in. + + :param bom: The :class:`~cyclonedx.model.bom.Bom` to validate. + :return: An iterable of :class:`ModelValidationError` for each issue found. + """ + from ..model.license import LicenseExpression + + # Collect all components across the BOM, including nested ones. + all_components: set['Component'] = set(chain.from_iterable( + c.get_all_nested_components(include_self=True) for c in bom.components + )) + if bom.metadata.component: + all_components.update( + bom.metadata.component.get_all_nested_components(include_self=True) + ) + + # 1. Make sure every bom_ref referenced in the dependency graph exists in this BOM. + all_dependable_bom_refs = {e.bom_ref for e in chain(all_components, bom.services)} + all_dependency_bom_refs = set(chain( + (d.ref for d in bom.dependencies), + chain.from_iterable(d.dependencies_as_bom_refs() for d in bom.dependencies), + )) + dependency_diff = all_dependency_bom_refs - all_dependable_bom_refs + if dependency_diff: + yield ModelValidationError(UnknownComponentDependencyException( + 'One or more Components have Dependency references to Components/Services that are not known in this ' + f'BOM. They are: {dependency_diff}' + )) + + # 2. If the root component is set and there are other components, the root should declare + # at least one dependency — otherwise the Dependency Graph is incomplete. + # NOTE: guard on the component, not the BomRef — BomRef is falsy when value is None. + if bom.metadata.component is not None and len(bom.components) > 0 and not any( + len(d.dependencies) > 0 + for d in bom.dependencies + if d.ref == bom.metadata.component.bom_ref + ): + yield ModelValidationError( + UserWarning( + f'The Component this BOM is describing {bom.metadata.component.purl} has no defined ' + 'dependencies which means the Dependency Graph is incomplete - you should add direct ' + 'dependencies to this "root" Component to complete the Dependency Graph data.' + ), + severity=ModelValidationErrorSeverity.WARNING, + ) + + # 3. If a LicenseExpression is set, then there must be no other license. + # see https://github.com/CycloneDX/specification/pull/205 + elem: Union['BomMetaData', 'Component', 'Service'] + for elem in chain([bom.metadata], all_components, bom.services): # type: ignore[assignment] + if len(elem.licenses) > 1 and any(isinstance(li, LicenseExpression) for li in elem.licenses): + yield ModelValidationError(LicenseExpressionAlongWithOthersException( + f'Found LicenseExpression along with others licenses in: {elem!r}' + )) diff --git a/tests/test_model_bom.py b/tests/test_model_bom.py index 030ee4a56..aacd60d3c 100644 --- a/tests/test_model_bom.py +++ b/tests/test_model_bom.py @@ -32,6 +32,7 @@ from cyclonedx.model.lifecycle import LifecyclePhase, NamedLifecycle, PredefinedLifecycle from cyclonedx.model.tool import Tool from cyclonedx.output.json import JsonV1Dot7 +from cyclonedx.validation.model import ModelValidationErrorSeverity, ModelValidator from tests import reorder from tests._data.models import ( get_bom_component_licenses_invalid, @@ -254,7 +255,9 @@ def test_bom_nested_components_issue_275(self) -> None: bom = get_bom_for_issue_275_components() self.assertIsInstance(bom.metadata.component, Component) self.assertEqual(2, len(bom.components)) - bom.validate() + errors = [e for e in ModelValidator().validate(bom) + if e.severity is ModelValidationErrorSeverity.ERROR] + self.assertFalse(errors) @named_data( ['metadata_licenses', get_bom_metadata_licenses_invalid], @@ -266,8 +269,8 @@ def test_bom_nested_components_issue_275(self) -> None: ) def test_validate_with_invalid_license_constellation_throws(self, get_bom: Callable[[], Bom]) -> None: bom = get_bom() - with self.assertRaises(LicenseExpressionAlongWithOthersException): - bom.validate() + error_types = [type(e.data) for e in ModelValidator().validate(bom)] + self.assertIn(LicenseExpressionAlongWithOthersException, error_types) # def test_bom_nested_services_issue_275(self) -> None: # """regression test for issue #275 diff --git a/tests/test_real_world_examples.py b/tests/test_real_world_examples.py index 232e32915..14faa2ac0 100644 --- a/tests/test_real_world_examples.py +++ b/tests/test_real_world_examples.py @@ -23,6 +23,7 @@ from unittest.mock import patch from cyclonedx.model.bom import Bom +from cyclonedx.validation.model import ModelValidationErrorSeverity, ModelValidator from tests import OWN_DATA_DIRECTORY @@ -44,7 +45,9 @@ def test_regression_issue677(self, *_: Any, **__: Any) -> None: json = json_loads(input_json.read()) bom = Bom.from_json(json) self.assertEqual(4, len(bom.components)) - bom.validate() + errors = [e for e in ModelValidator().validate(bom) + if e.severity is ModelValidationErrorSeverity.ERROR] + self.assertFalse(errors) def test_regression_issue753(self, *_: Any, **__: Any) -> None: # tests https://github.com/CycloneDX/cyclonedx-python-lib/issues/753 @@ -52,7 +55,9 @@ def test_regression_issue753(self, *_: Any, **__: Any) -> None: json = json_loads(input_json.read()) bom = Bom.from_json(json) self.assertEqual(2, len(bom.components)) - bom.validate() + errors = [e for e in ModelValidator().validate(bom) + if e.severity is ModelValidationErrorSeverity.ERROR] + self.assertFalse(errors) def test_regression_issue_850(self, *_: Any, **__: Any) -> None: # tests https://github.com/CycloneDX/cyclonedx-python-lib/issues/850 diff --git a/tests/test_validation_model.py b/tests/test_validation_model.py new file mode 100644 index 000000000..d5351634f --- /dev/null +++ b/tests/test_validation_model.py @@ -0,0 +1,105 @@ +# This file is part of CycloneDX Python Library +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) OWASP Foundation. All Rights Reserved. + +from unittest import TestCase + +from cyclonedx.exception.model import LicenseExpressionAlongWithOthersException, UnknownComponentDependencyException +from cyclonedx.model.bom import Bom +from cyclonedx.model.component import Component +from cyclonedx.model.dependency import Dependency +from cyclonedx.model.license import DisjunctiveLicense, LicenseExpression +from cyclonedx.validation.model import ModelValidationErrorSeverity, ModelValidator + + +class TestModelValidator(TestCase): + + def test_validate_clean_bom(self) -> None: + bom = Bom() + bom.metadata.component = Component(name='root', version='1.0', bom_ref='root') + errors = list(ModelValidator().validate(bom)) + self.assertEqual(0, len(errors)) + + def test_validate_multiple_errors_have_error_severity(self) -> None: + bom = Bom() + comp = Component(name='test', version='1.0', bom_ref='test-comp') + comp.licenses.update([ + DisjunctiveLicense(id='MIT'), + LicenseExpression(value='Apache-2.0 OR MIT'), + ]) + bom.components.add(comp) + bom.dependencies.add(Dependency('test-comp', dependencies=[Dependency('non-existent-ref')])) + + errors = list(ModelValidator().validate(bom)) + + self.assertEqual(2, len(errors)) + error_types = [type(e.data) for e in errors] + self.assertIn(UnknownComponentDependencyException, error_types) + self.assertIn(LicenseExpressionAlongWithOthersException, error_types) + for error in errors: + self.assertEqual(ModelValidationErrorSeverity.ERROR, error.severity) + + def test_validate_unknown_toplevel_dependency_ref_detected(self) -> None: + """Regression: top-level d.ref values must also be validated against known BOM components.""" + bom = Bom() + comp = Component(name='real', version='1.0', bom_ref='real-comp') + bom.components.add(comp) + # 'ghost-ref' is not in the BOM at all + bom.dependencies.add(Dependency('ghost-ref')) + + errors = list(ModelValidator().validate(bom)) + + error_types = [type(e.data) for e in errors] + self.assertIn(UnknownComponentDependencyException, error_types) + + def test_validate_incomplete_dependency_graph_yields_warning(self) -> None: + """Check #2 must yield a WARNING-severity error, not a Python UserWarning.""" + import warnings as _warnings + bom = Bom() + bom.metadata.component = Component(name='root', version='1.0', bom_ref='root') + bom.components.add(Component(name='dep', version='1.0', bom_ref='dep')) + + with _warnings.catch_warnings(): + _warnings.simplefilter('error') # turn any Python warning into an error + errors = list(ModelValidator().validate(bom)) # must not raise + + warning_errors = [e for e in errors if e.severity == ModelValidationErrorSeverity.WARNING] + self.assertEqual(1, len(warning_errors)) + self.assertIsInstance(warning_errors[0].data, UserWarning) + + def test_validate_nested_root_component_license_invalid(self) -> None: + """Regression: nested components under metadata.component must be license-checked.""" + bom = Bom() + root = Component(name='root', version='1.0', bom_ref='root') + nested = Component(name='nested', version='1.0', bom_ref='nested') + nested.licenses.update([ + DisjunctiveLicense(id='MIT'), + LicenseExpression(value='Apache-2.0 OR MIT'), + ]) + root.components.add(nested) + bom.metadata.component = root + + errors = list(ModelValidator().validate(bom)) + + error_types = [type(e.data) for e in errors] + self.assertIn(LicenseExpressionAlongWithOthersException, error_types) + + def test_validate_no_side_effects(self) -> None: + bom = Bom() + bom.metadata.component = Component(name='root', version='1.0', bom_ref='root') + self.assertEqual(0, len(bom.dependencies)) + list(ModelValidator().validate(bom)) + self.assertEqual(0, len(bom.dependencies))