Skip to content

Deepchecks

zenml.integrations.deepchecks special

Deepchecks integration for ZenML.

The Deepchecks integration provides a way to validate your data in your pipelines. It includes a way to detect data anomalies and define checks to ensure quality of data.

The integration includes custom materializers to store Deepchecks SuiteResults and a visualizer to visualize the results in an easy way on a notebook and in your browser.

DeepchecksIntegration (Integration)

Definition of Deepchecks integration for ZenML.

Source code in zenml/integrations/deepchecks/__init__.py
class DeepchecksIntegration(Integration):
    """Definition of [Deepchecks](https://github.com/deepchecks/deepchecks) integration for ZenML."""

    NAME = DEEPCHECKS
    REQUIREMENTS = ["deepchecks[vision]==0.8.0", "torchvision==0.14.0"]
    APT_PACKAGES = ["ffmpeg", "libsm6", "libxext6"]

    @staticmethod
    def activate() -> None:
        """Activate the Deepchecks integration."""
        from zenml.integrations.deepchecks import materializers  # noqa
        from zenml.integrations.deepchecks import visualizers  # noqa

    @classmethod
    def flavors(cls) -> List[Type[Flavor]]:
        """Declare the stack component flavors for the Deepchecks integration.

        Returns:
            List of stack component flavors for this integration.
        """
        from zenml.integrations.deepchecks.flavors import (
            DeepchecksDataValidatorFlavor,
        )

        return [DeepchecksDataValidatorFlavor]

activate() staticmethod

Activate the Deepchecks integration.

Source code in zenml/integrations/deepchecks/__init__.py
@staticmethod
def activate() -> None:
    """Activate the Deepchecks integration."""
    from zenml.integrations.deepchecks import materializers  # noqa
    from zenml.integrations.deepchecks import visualizers  # noqa

flavors() classmethod

Declare the stack component flavors for the Deepchecks integration.

Returns:

Type Description
List[Type[zenml.stack.flavor.Flavor]]

List of stack component flavors for this integration.

Source code in zenml/integrations/deepchecks/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
    """Declare the stack component flavors for the Deepchecks integration.

    Returns:
        List of stack component flavors for this integration.
    """
    from zenml.integrations.deepchecks.flavors import (
        DeepchecksDataValidatorFlavor,
    )

    return [DeepchecksDataValidatorFlavor]

data_validators special

Initialization of the Deepchecks data validator for ZenML.

deepchecks_data_validator

Implementation of the Deepchecks data validator.

DeepchecksDataValidator (BaseDataValidator)

Deepchecks data validator stack component.

Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
class DeepchecksDataValidator(BaseDataValidator):
    """Deepchecks data validator stack component."""

    NAME: ClassVar[str] = "Deepchecks"
    FLAVOR: ClassVar[
        Type[BaseDataValidatorFlavor]
    ] = DeepchecksDataValidatorFlavor

    @staticmethod
    def _split_checks(
        check_list: Sequence[str],
    ) -> Tuple[Sequence[str], Sequence[str]]:
        """Split a list of check identifiers in two lists, one for tabular and one for computer vision checks.

        Args:
            check_list: A list of check identifiers.

        Returns:
            List of tabular check identifiers and list of computer vision
            check identifiers.
        """
        tabular_checks = list(
            filter(
                lambda check: DeepchecksValidationCheck.is_tabular_check(
                    check
                ),
                check_list,
            )
        )
        vision_checks = list(
            filter(
                lambda check: DeepchecksValidationCheck.is_vision_check(check),
                check_list,
            )
        )
        return tabular_checks, vision_checks

    @classmethod
    def _create_and_run_check_suite(
        cls,
        check_enum: Type[DeepchecksValidationCheck],
        reference_dataset: Union[pd.DataFrame, DataLoader[Any]],
        comparison_dataset: Optional[
            Union[pd.DataFrame, DataLoader[Any]]
        ] = None,
        model: Optional[Union[ClassifierMixin, Module]] = None,
        check_list: Optional[Sequence[str]] = None,
        dataset_kwargs: Dict[str, Any] = {},
        check_kwargs: Dict[str, Dict[str, Any]] = {},
        run_kwargs: Dict[str, Any] = {},
    ) -> SuiteResult:
        """Create and run a Deepchecks check suite corresponding to the input parameters.

        This method contains generic logic common to all Deepchecks data
        validator methods that validates the input arguments and uses them to
        generate and run a Deepchecks check suite.

        Args:
            check_enum: ZenML enum type grouping together Deepchecks checks with
                the same characteristics. This is used to generate a default
                list of checks, if a custom list isn't provided via the
                `check_list` argument.
            reference_dataset: Primary (reference) dataset argument used during
                validation.
            comparison_dataset: Optional secondary (comparison) dataset argument
                used during comparison checks.
            model: Optional model argument used during validation.
            check_list: Optional list of ZenML Deepchecks check identifiers
                specifying the list of Deepchecks checks to be performed.
            dataset_kwargs: Additional keyword arguments to be passed to the
                Deepchecks tabular.Dataset or vision.VisionData constructor.
            check_kwargs: Additional keyword arguments to be passed to the
                Deepchecks check object constructors. Arguments are grouped for
                each check and indexed using the full check class name or
                check enum value as dictionary keys.
            run_kwargs: Additional keyword arguments to be passed to the
                Deepchecks Suite `run` method.

        Returns:
            Deepchecks SuiteResult object with the Suite run results.

        Raises:
            TypeError: If the datasets, model and check list arguments combine
                data types and/or checks from different categories (tabular and
                computer vision).
        """
        # Detect what type of check to perform (tabular or computer vision) from
        # the dataset/model datatypes and the check list. At the same time,
        # validate the combination of data types used for dataset and model
        # arguments and the check list.
        is_tabular = False
        is_vision = False
        for dataset in [reference_dataset, comparison_dataset]:
            if dataset is None:
                continue
            if isinstance(dataset, pd.DataFrame):
                is_tabular = True
            elif isinstance(dataset, DataLoader):
                is_vision = True
            else:
                raise TypeError(
                    f"Unsupported dataset data type found: {type(dataset)}. "
                    f"Supported data types are {str(pd.DataFrame)} for tabular "
                    f"data and {str(DataLoader)} for computer vision data."
                )

        if model:
            if isinstance(model, ClassifierMixin):
                is_tabular = True
            elif isinstance(model, Module):
                is_vision = True
            else:
                raise TypeError(
                    f"Unsupported model data type found: {type(model)}. "
                    f"Supported data types are {str(ClassifierMixin)} for "
                    f"tabular data and {str(Module)} for computer vision "
                    f"data."
                )

        if is_tabular and is_vision:
            raise TypeError(
                f"Tabular and computer vision data types used for datasets and "
                f"models cannot be mixed. They must all belong to the same "
                f"category. Supported data types for tabular data are "
                f"{str(pd.DataFrame)} for datasets and {str(ClassifierMixin)} "
                f"for models. Supported data types for computer vision data "
                f"are {str(pd.DataFrame)} for datasets and and {str(Module)} "
                f"for models."
            )

        if not check_list:
            # default to executing all the checks listed in the supplied
            # checks enum type if a custom check list is not supplied
            tabular_checks, vision_checks = cls._split_checks(
                check_enum.values()
            )
            if is_tabular:
                check_list = tabular_checks
                vision_checks = []
            else:
                check_list = vision_checks
                tabular_checks = []
        else:
            tabular_checks, vision_checks = cls._split_checks(check_list)

        if tabular_checks and vision_checks:
            raise TypeError(
                f"The check list cannot mix tabular checks "
                f"({tabular_checks}) and computer vision checks ("
                f"{vision_checks})."
            )

        if is_tabular and vision_checks:
            raise TypeError(
                f"Tabular data types used for datasets and models can only "
                f"be used with tabular validation checks. The following "
                f"computer vision checks included in the check list are "
                f"not valid: {vision_checks}."
            )

        if is_vision and tabular_checks:
            raise TypeError(
                f"Computer vision data types used for datasets and models "
                f"can only be used with computer vision validation checks. "
                f"The following tabular checks included in the check list "
                f"are not valid: {tabular_checks}."
            )

        check_classes = map(
            lambda check: (
                check,
                check_enum.get_check_class(check),
            ),
            check_list,
        )

        # use the pipeline name and the step name to generate a unique suite
        # name
        try:
            # get pipeline name and step name
            step_env = cast(
                StepEnvironment, Environment()[STEP_ENVIRONMENT_NAME]
            )
            suite_name = f"{step_env.pipeline_name}_{step_env.step_name}"
        except KeyError:
            # if not running inside a pipeline step, use random values
            suite_name = f"suite_{random_str(5)}"

        if is_tabular:
            dataset_class = TabularData
            suite_class = TabularSuite
            full_suite = full_tabular_suite()
        else:
            dataset_class = VisionData
            suite_class = VisionSuite
            full_suite = full_vision_suite()

        train_dataset = dataset_class(reference_dataset, **dataset_kwargs)
        test_dataset = None
        if comparison_dataset is not None:
            test_dataset = dataset_class(comparison_dataset, **dataset_kwargs)
        suite = suite_class(name=suite_name)

        # Some Deepchecks checks require a minimum configuration such as
        # conditions to be configured (see https://docs.deepchecks.com/stable/user-guide/general/customizations/examples/plot_configure_check_conditions.html#sphx-glr-user-guide-general-customizations-examples-plot-configure-check-conditions-py)
        # for their execution to have meaning. For checks that don't have
        # custom configuration attributes explicitly specified in the
        # `check_kwargs` input parameter, we use the default check
        # instances extracted from the full suite shipped with Deepchecks.
        default_checks = {
            check.__class__: check for check in full_suite.checks.values()
        }
        for check_name, check_class in check_classes:
            extra_kwargs = check_kwargs.get(check_name, {})
            default_check = default_checks.get(check_class)
            check: BaseCheck
            if extra_kwargs or not default_check:
                check = check_class(**check_kwargs)
            else:
                check = default_check

            # extract the condition kwargs from the check kwargs
            for arg_name, condition_kwargs in extra_kwargs.items():
                if not arg_name.startswith("condition_") or not isinstance(
                    condition_kwargs, dict
                ):
                    continue
                condition_method = getattr(check, f"add_{arg_name}", None)
                if not condition_method or not callable(condition_method):
                    logger.warning(
                        f"Deepchecks check type {check.__class__} has no "
                        f"condition named {arg_name}. Ignoring the check "
                        f"argument."
                    )
                    continue
                condition_method(**condition_kwargs)

            suite.add(check)
        return suite.run(
            train_dataset=train_dataset,
            test_dataset=test_dataset,
            model=model,
            **run_kwargs,
        )

    def data_validation(
        self,
        dataset: Union[pd.DataFrame, DataLoader[Any]],
        comparison_dataset: Optional[Any] = None,
        check_list: Optional[Sequence[str]] = None,
        dataset_kwargs: Dict[str, Any] = {},
        check_kwargs: Dict[str, Dict[str, Any]] = {},
        run_kwargs: Dict[str, Any] = {},
        **kwargs: Any,
    ) -> SuiteResult:
        """Run one or more Deepchecks data validation checks on a dataset.

        Call this method to analyze and identify potential integrity problems
        with a single dataset (e.g. missing values, conflicting labels, mixed
        data types etc.) and dataset comparison checks (e.g. data drift
        checks). Dataset comparison checks require that a second dataset be
        supplied via the `comparison_dataset` argument.

        The `check_list` argument may be used to specify a custom set of
        Deepchecks data integrity checks to perform, identified by
        `DeepchecksDataIntegrityCheck` and `DeepchecksDataDriftCheck` enum
        values. If omitted:

        * if the `comparison_dataset` is omitted, a suite with all available
        data integrity checks will be performed on the input data. See
        `DeepchecksDataIntegrityCheck` for a list of Deepchecks builtin
        checks that are compatible with this method.

        * if the `comparison_dataset` is supplied, a suite with all
        available data drift checks will be performed on the input
        data. See `DeepchecksDataDriftCheck` for a list of Deepchecks
        builtin checks that are compatible with this method.

        Args:
            dataset: Target dataset to be validated.
            comparison_dataset: Optional second dataset to be used for data
                comparison checks (e.g data drift checks).
            check_list: Optional list of ZenML Deepchecks check identifiers
                specifying the data validation checks to be performed.
                `DeepchecksDataIntegrityCheck` enum values should be used for
                single data validation checks and `DeepchecksDataDriftCheck`
                enum values for data comparison checks. If not supplied, the
                entire set of checks applicable to the input dataset(s)
                will be performed.
            dataset_kwargs: Additional keyword arguments to be passed to the
                Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
            check_kwargs: Additional keyword arguments to be passed to the
                Deepchecks check object constructors. Arguments are grouped for
                each check and indexed using the full check class name or
                check enum value as dictionary keys.
            run_kwargs: Additional keyword arguments to be passed to the
                Deepchecks Suite `run` method.
            kwargs: Additional keyword arguments (unused).

        Returns:
            A Deepchecks SuiteResult with the results of the validation.
        """
        check_enum: Type[DeepchecksValidationCheck]
        if comparison_dataset is None:
            check_enum = DeepchecksDataIntegrityCheck
        else:
            check_enum = DeepchecksDataDriftCheck

        return self._create_and_run_check_suite(
            check_enum=check_enum,
            reference_dataset=dataset,
            comparison_dataset=comparison_dataset,
            check_list=check_list,
            dataset_kwargs=dataset_kwargs,
            check_kwargs=check_kwargs,
            run_kwargs=run_kwargs,
        )

    def model_validation(
        self,
        dataset: Union[pd.DataFrame, DataLoader[Any]],
        model: Union[ClassifierMixin, Module],
        comparison_dataset: Optional[Any] = None,
        check_list: Optional[Sequence[str]] = None,
        dataset_kwargs: Dict[str, Any] = {},
        check_kwargs: Dict[str, Dict[str, Any]] = {},
        run_kwargs: Dict[str, Any] = {},
        **kwargs: Any,
    ) -> Any:
        """Run one or more Deepchecks model validation checks.

        Call this method to perform model validation checks (e.g. confusion
        matrix validation, performance reports, model error analyses, etc).
        A second dataset is required for model performance comparison tests
        (i.e. tests that identify changes in a model behavior by comparing how
        it performs on two different datasets).

        The `check_list` argument may be used to specify a custom set of
        Deepchecks model validation checks to perform, identified by
        `DeepchecksModelValidationCheck` and `DeepchecksModelDriftCheck` enum
        values. If omitted:

            * if the `comparison_dataset` is omitted, a suite with all available
            model validation checks will be performed on the input data. See
            `DeepchecksModelValidationCheck` for a list of Deepchecks builtin
            checks that are compatible with this method.

            * if the `comparison_dataset` is supplied, a suite with all
            available model comparison checks will be performed on the input
            data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
            builtin checks that are compatible with this method.

        Args:
            dataset: Target dataset to be validated.
            model: Target model to be validated.
            comparison_dataset: Optional second dataset to be used for model
                comparison checks.
            check_list: Optional list of ZenML Deepchecks check identifiers
                specifying the model validation checks to be performed.
                `DeepchecksModelValidationCheck` enum values should be used for
                model validation checks and `DeepchecksModelDriftCheck` enum
                values for model comparison checks. If not supplied, the
                entire set of checks applicable to the input dataset(s)
                will be performed.
            dataset_kwargs: Additional keyword arguments to be passed to the
                Deepchecks tabular.Dataset or vision.VisionData constructor.
            check_kwargs: Additional keyword arguments to be passed to the
                Deepchecks check object constructors. Arguments are grouped for
                each check and indexed using the full check class name or
                check enum value as dictionary keys.
            run_kwargs: Additional keyword arguments to be passed to the
                Deepchecks Suite `run` method.
            kwargs: Additional keyword arguments (unused).

        Returns:
            A Deepchecks SuiteResult with the results of the validation.
        """
        check_enum: Type[DeepchecksValidationCheck]
        if comparison_dataset is None:
            check_enum = DeepchecksModelValidationCheck
        else:
            check_enum = DeepchecksModelDriftCheck

        return self._create_and_run_check_suite(
            check_enum=check_enum,
            reference_dataset=dataset,
            comparison_dataset=comparison_dataset,
            model=model,
            check_list=check_list,
            dataset_kwargs=dataset_kwargs,
            check_kwargs=check_kwargs,
            run_kwargs=run_kwargs,
        )
FLAVOR (BaseDataValidatorFlavor)

Flavor of the Deepchecks data validator.

Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
class DeepchecksDataValidatorFlavor(BaseDataValidatorFlavor):
    """Flavor of the Deepchecks data validator."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return DEEPCHECKS_DATA_VALIDATOR_FLAVOR

    @property
    def implementation_class(self) -> Type["DeepchecksDataValidator"]:
        """Implementation class.

        Returns:
            The implementation class.
        """
        from zenml.integrations.deepchecks.data_validators import (
            DeepchecksDataValidator,
        )

        return DeepchecksDataValidator
implementation_class: Type[DeepchecksDataValidator] property readonly

Implementation class.

Returns:

Type Description
Type[DeepchecksDataValidator]

The implementation class.

name: str property readonly

Name of the flavor.

Returns:

Type Description
str

The name of the flavor.

data_validation(self, dataset, comparison_dataset=None, check_list=None, dataset_kwargs={}, check_kwargs={}, run_kwargs={}, **kwargs)

Run one or more Deepchecks data validation checks on a dataset.

Call this method to analyze and identify potential integrity problems with a single dataset (e.g. missing values, conflicting labels, mixed data types etc.) and dataset comparison checks (e.g. data drift checks). Dataset comparison checks require that a second dataset be supplied via the comparison_dataset argument.

The check_list argument may be used to specify a custom set of Deepchecks data integrity checks to perform, identified by DeepchecksDataIntegrityCheck and DeepchecksDataDriftCheck enum values. If omitted:

  • if the comparison_dataset is omitted, a suite with all available data integrity checks will be performed on the input data. See DeepchecksDataIntegrityCheck for a list of Deepchecks builtin checks that are compatible with this method.

  • if the comparison_dataset is supplied, a suite with all available data drift checks will be performed on the input data. See DeepchecksDataDriftCheck for a list of Deepchecks builtin checks that are compatible with this method.

Parameters:

Name Type Description Default
dataset Union[pandas.core.frame.DataFrame, torch.utils.data.dataloader.DataLoader[Any]]

Target dataset to be validated.

required
comparison_dataset Optional[Any]

Optional second dataset to be used for data comparison checks (e.g data drift checks).

None
check_list Optional[Sequence[str]]

Optional list of ZenML Deepchecks check identifiers specifying the data validation checks to be performed. DeepchecksDataIntegrityCheck enum values should be used for single data validation checks and DeepchecksDataDriftCheck enum values for data comparison checks. If not supplied, the entire set of checks applicable to the input dataset(s) will be performed.

None
dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

{}
check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

{}
run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

{}
kwargs Any

Additional keyword arguments (unused).

{}

Returns:

Type Description
SuiteResult

A Deepchecks SuiteResult with the results of the validation.

Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
def data_validation(
    self,
    dataset: Union[pd.DataFrame, DataLoader[Any]],
    comparison_dataset: Optional[Any] = None,
    check_list: Optional[Sequence[str]] = None,
    dataset_kwargs: Dict[str, Any] = {},
    check_kwargs: Dict[str, Dict[str, Any]] = {},
    run_kwargs: Dict[str, Any] = {},
    **kwargs: Any,
) -> SuiteResult:
    """Run one or more Deepchecks data validation checks on a dataset.

    Call this method to analyze and identify potential integrity problems
    with a single dataset (e.g. missing values, conflicting labels, mixed
    data types etc.) and dataset comparison checks (e.g. data drift
    checks). Dataset comparison checks require that a second dataset be
    supplied via the `comparison_dataset` argument.

    The `check_list` argument may be used to specify a custom set of
    Deepchecks data integrity checks to perform, identified by
    `DeepchecksDataIntegrityCheck` and `DeepchecksDataDriftCheck` enum
    values. If omitted:

    * if the `comparison_dataset` is omitted, a suite with all available
    data integrity checks will be performed on the input data. See
    `DeepchecksDataIntegrityCheck` for a list of Deepchecks builtin
    checks that are compatible with this method.

    * if the `comparison_dataset` is supplied, a suite with all
    available data drift checks will be performed on the input
    data. See `DeepchecksDataDriftCheck` for a list of Deepchecks
    builtin checks that are compatible with this method.

    Args:
        dataset: Target dataset to be validated.
        comparison_dataset: Optional second dataset to be used for data
            comparison checks (e.g data drift checks).
        check_list: Optional list of ZenML Deepchecks check identifiers
            specifying the data validation checks to be performed.
            `DeepchecksDataIntegrityCheck` enum values should be used for
            single data validation checks and `DeepchecksDataDriftCheck`
            enum values for data comparison checks. If not supplied, the
            entire set of checks applicable to the input dataset(s)
            will be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
        kwargs: Additional keyword arguments (unused).

    Returns:
        A Deepchecks SuiteResult with the results of the validation.
    """
    check_enum: Type[DeepchecksValidationCheck]
    if comparison_dataset is None:
        check_enum = DeepchecksDataIntegrityCheck
    else:
        check_enum = DeepchecksDataDriftCheck

    return self._create_and_run_check_suite(
        check_enum=check_enum,
        reference_dataset=dataset,
        comparison_dataset=comparison_dataset,
        check_list=check_list,
        dataset_kwargs=dataset_kwargs,
        check_kwargs=check_kwargs,
        run_kwargs=run_kwargs,
    )
model_validation(self, dataset, model, comparison_dataset=None, check_list=None, dataset_kwargs={}, check_kwargs={}, run_kwargs={}, **kwargs)

Run one or more Deepchecks model validation checks.

Call this method to perform model validation checks (e.g. confusion matrix validation, performance reports, model error analyses, etc). A second dataset is required for model performance comparison tests (i.e. tests that identify changes in a model behavior by comparing how it performs on two different datasets).

The check_list argument may be used to specify a custom set of Deepchecks model validation checks to perform, identified by DeepchecksModelValidationCheck and DeepchecksModelDriftCheck enum values. If omitted:

* if the `comparison_dataset` is omitted, a suite with all available
model validation checks will be performed on the input data. See
`DeepchecksModelValidationCheck` for a list of Deepchecks builtin
checks that are compatible with this method.

* if the `comparison_dataset` is supplied, a suite with all
available model comparison checks will be performed on the input
data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
builtin checks that are compatible with this method.

Parameters:

Name Type Description Default
dataset Union[pandas.core.frame.DataFrame, torch.utils.data.dataloader.DataLoader[Any]]

Target dataset to be validated.

required
model Union[sklearn.base.ClassifierMixin, torch.nn.modules.module.Module]

Target model to be validated.

required
comparison_dataset Optional[Any]

Optional second dataset to be used for model comparison checks.

None
check_list Optional[Sequence[str]]

Optional list of ZenML Deepchecks check identifiers specifying the model validation checks to be performed. DeepchecksModelValidationCheck enum values should be used for model validation checks and DeepchecksModelDriftCheck enum values for model comparison checks. If not supplied, the entire set of checks applicable to the input dataset(s) will be performed.

None
dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

{}
check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

{}
run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

{}
kwargs Any

Additional keyword arguments (unused).

{}

Returns:

Type Description
Any

A Deepchecks SuiteResult with the results of the validation.

Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
def model_validation(
    self,
    dataset: Union[pd.DataFrame, DataLoader[Any]],
    model: Union[ClassifierMixin, Module],
    comparison_dataset: Optional[Any] = None,
    check_list: Optional[Sequence[str]] = None,
    dataset_kwargs: Dict[str, Any] = {},
    check_kwargs: Dict[str, Dict[str, Any]] = {},
    run_kwargs: Dict[str, Any] = {},
    **kwargs: Any,
) -> Any:
    """Run one or more Deepchecks model validation checks.

    Call this method to perform model validation checks (e.g. confusion
    matrix validation, performance reports, model error analyses, etc).
    A second dataset is required for model performance comparison tests
    (i.e. tests that identify changes in a model behavior by comparing how
    it performs on two different datasets).

    The `check_list` argument may be used to specify a custom set of
    Deepchecks model validation checks to perform, identified by
    `DeepchecksModelValidationCheck` and `DeepchecksModelDriftCheck` enum
    values. If omitted:

        * if the `comparison_dataset` is omitted, a suite with all available
        model validation checks will be performed on the input data. See
        `DeepchecksModelValidationCheck` for a list of Deepchecks builtin
        checks that are compatible with this method.

        * if the `comparison_dataset` is supplied, a suite with all
        available model comparison checks will be performed on the input
        data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
        builtin checks that are compatible with this method.

    Args:
        dataset: Target dataset to be validated.
        model: Target model to be validated.
        comparison_dataset: Optional second dataset to be used for model
            comparison checks.
        check_list: Optional list of ZenML Deepchecks check identifiers
            specifying the model validation checks to be performed.
            `DeepchecksModelValidationCheck` enum values should be used for
            model validation checks and `DeepchecksModelDriftCheck` enum
            values for model comparison checks. If not supplied, the
            entire set of checks applicable to the input dataset(s)
            will be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks tabular.Dataset or vision.VisionData constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
        kwargs: Additional keyword arguments (unused).

    Returns:
        A Deepchecks SuiteResult with the results of the validation.
    """
    check_enum: Type[DeepchecksValidationCheck]
    if comparison_dataset is None:
        check_enum = DeepchecksModelValidationCheck
    else:
        check_enum = DeepchecksModelDriftCheck

    return self._create_and_run_check_suite(
        check_enum=check_enum,
        reference_dataset=dataset,
        comparison_dataset=comparison_dataset,
        model=model,
        check_list=check_list,
        dataset_kwargs=dataset_kwargs,
        check_kwargs=check_kwargs,
        run_kwargs=run_kwargs,
    )

flavors special

Deepchecks integration flavors.

deepchecks_data_validator_flavor

Deepchecks data validator flavor.

DeepchecksDataValidatorFlavor (BaseDataValidatorFlavor)

Flavor of the Deepchecks data validator.

Source code in zenml/integrations/deepchecks/flavors/deepchecks_data_validator_flavor.py
class DeepchecksDataValidatorFlavor(BaseDataValidatorFlavor):
    """Flavor of the Deepchecks data validator."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return DEEPCHECKS_DATA_VALIDATOR_FLAVOR

    @property
    def implementation_class(self) -> Type["DeepchecksDataValidator"]:
        """Implementation class.

        Returns:
            The implementation class.
        """
        from zenml.integrations.deepchecks.data_validators import (
            DeepchecksDataValidator,
        )

        return DeepchecksDataValidator
implementation_class: Type[DeepchecksDataValidator] property readonly

Implementation class.

Returns:

Type Description
Type[DeepchecksDataValidator]

The implementation class.

name: str property readonly

Name of the flavor.

Returns:

Type Description
str

The name of the flavor.

materializers special

Deepchecks materializers.

deepchecks_dataset_materializer

Implementation of Deepchecks dataset materializer.

DeepchecksDatasetMaterializer (BaseMaterializer)

Materializer to read data to and from Deepchecks dataset.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
class DeepchecksDatasetMaterializer(BaseMaterializer):
    """Materializer to read data to and from Deepchecks dataset."""

    ASSOCIATED_TYPES = (Dataset,)
    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.DATA

    def load(self, data_type: Type[Any]) -> Dataset:
        """Reads pandas dataframes and creates deepchecks.Dataset from it.

        Args:
            data_type: The type of the data to read.

        Returns:
            A Deepchecks Dataset.
        """
        super().load(data_type)

        # Outsource to pandas
        pandas_materializer = PandasMaterializer(self.uri)
        df = pandas_materializer.load(data_type)

        # Recreate from pandas dataframe
        return Dataset(df)

    def save(self, df: Dataset) -> None:
        """Serializes pandas dataframe within a Dataset object.

        Args:
            df: A deepchecks.Dataset object.
        """
        super().save(df)

        # Outsource to pandas
        pandas_materializer = PandasMaterializer(self.uri)
        pandas_materializer.save(df.data)
load(self, data_type)

Reads pandas dataframes and creates deepchecks.Dataset from it.

Parameters:

Name Type Description Default
data_type Type[Any]

The type of the data to read.

required

Returns:

Type Description
Dataset

A Deepchecks Dataset.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def load(self, data_type: Type[Any]) -> Dataset:
    """Reads pandas dataframes and creates deepchecks.Dataset from it.

    Args:
        data_type: The type of the data to read.

    Returns:
        A Deepchecks Dataset.
    """
    super().load(data_type)

    # Outsource to pandas
    pandas_materializer = PandasMaterializer(self.uri)
    df = pandas_materializer.load(data_type)

    # Recreate from pandas dataframe
    return Dataset(df)
save(self, df)

Serializes pandas dataframe within a Dataset object.

Parameters:

Name Type Description Default
df Dataset

A deepchecks.Dataset object.

required
Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def save(self, df: Dataset) -> None:
    """Serializes pandas dataframe within a Dataset object.

    Args:
        df: A deepchecks.Dataset object.
    """
    super().save(df)

    # Outsource to pandas
    pandas_materializer = PandasMaterializer(self.uri)
    pandas_materializer.save(df.data)

deepchecks_results_materializer

Implementation of Deepchecks suite results materializer.

DeepchecksResultMaterializer (BaseMaterializer)

Materializer to read data to and from CheckResult and SuiteResult objects.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
class DeepchecksResultMaterializer(BaseMaterializer):
    """Materializer to read data to and from CheckResult and SuiteResult objects."""

    ASSOCIATED_TYPES = (
        CheckResult,
        SuiteResult,
    )
    ASSOCIATED_ARTIFACT_TYPE = ArtifactType.DATA_ANALYSIS

    def load(self, data_type: Type[Any]) -> Union[CheckResult, SuiteResult]:
        """Reads a Deepchecks check or suite result from a serialized JSON file.

        Args:
            data_type: The type of the data to read.

        Returns:
            A Deepchecks CheckResult or SuiteResult.

        Raises:
            RuntimeError: if the input data type is not supported.
        """
        super().load(data_type)
        filepath = os.path.join(self.uri, RESULTS_FILENAME)

        json_res = io_utils.read_file_contents_as_string(filepath)
        if data_type == SuiteResult:
            res = SuiteResult.from_json(json_res)
        elif data_type == CheckResult:
            res = CheckResult.from_json(json_res)
        else:
            raise RuntimeError(f"Unknown data type: {data_type}")
        return res

    def save(self, result: Union[CheckResult, SuiteResult]) -> None:
        """Creates a JSON serialization for a CheckResult or SuiteResult.

        Args:
            result: A Deepchecks CheckResult or SuiteResult.
        """
        super().save(result)

        filepath = os.path.join(self.uri, RESULTS_FILENAME)

        serialized_json = result.to_json(True)
        io_utils.write_file_contents_as_string(filepath, serialized_json)
load(self, data_type)

Reads a Deepchecks check or suite result from a serialized JSON file.

Parameters:

Name Type Description Default
data_type Type[Any]

The type of the data to read.

required

Returns:

Type Description
Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult]

A Deepchecks CheckResult or SuiteResult.

Exceptions:

Type Description
RuntimeError

if the input data type is not supported.

Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def load(self, data_type: Type[Any]) -> Union[CheckResult, SuiteResult]:
    """Reads a Deepchecks check or suite result from a serialized JSON file.

    Args:
        data_type: The type of the data to read.

    Returns:
        A Deepchecks CheckResult or SuiteResult.

    Raises:
        RuntimeError: if the input data type is not supported.
    """
    super().load(data_type)
    filepath = os.path.join(self.uri, RESULTS_FILENAME)

    json_res = io_utils.read_file_contents_as_string(filepath)
    if data_type == SuiteResult:
        res = SuiteResult.from_json(json_res)
    elif data_type == CheckResult:
        res = CheckResult.from_json(json_res)
    else:
        raise RuntimeError(f"Unknown data type: {data_type}")
    return res
save(self, result)

Creates a JSON serialization for a CheckResult or SuiteResult.

Parameters:

Name Type Description Default
result Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult]

A Deepchecks CheckResult or SuiteResult.

required
Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def save(self, result: Union[CheckResult, SuiteResult]) -> None:
    """Creates a JSON serialization for a CheckResult or SuiteResult.

    Args:
        result: A Deepchecks CheckResult or SuiteResult.
    """
    super().save(result)

    filepath = os.path.join(self.uri, RESULTS_FILENAME)

    serialized_json = result.to_json(True)
    io_utils.write_file_contents_as_string(filepath, serialized_json)

steps special

Initialization of the Deepchecks Standard Steps.

deepchecks_data_drift

Implementation of the Deepchecks data drift validation step.

DeepchecksDataDriftCheckStep (BaseStep)

Deepchecks data drift validator step.

Source code in zenml/integrations/deepchecks/steps/deepchecks_data_drift.py
class DeepchecksDataDriftCheckStep(BaseStep):
    """Deepchecks data drift validator step."""

    def entrypoint(
        self,
        reference_dataset: pd.DataFrame,
        target_dataset: pd.DataFrame,
        params: DeepchecksDataDriftCheckStepParameters,
    ) -> SuiteResult:
        """Main entrypoint for the Deepchecks data drift validator step.

        Args:
            reference_dataset: Reference dataset for the data drift check.
            target_dataset: Target dataset to be used for the data drift check.
            params: The parameters for the step

        Returns:
            A Deepchecks suite result with the validation results.
        """
        data_validator = cast(
            DeepchecksDataValidator,
            DeepchecksDataValidator.get_active_data_validator(),
        )

        return data_validator.data_validation(
            dataset=reference_dataset,
            comparison_dataset=target_dataset,
            check_list=cast(Optional[Sequence[str]], params.check_list),
            dataset_kwargs=params.dataset_kwargs,
            check_kwargs=params.check_kwargs,
            run_kwargs=params.run_kwargs,
        )
PARAMETERS_CLASS (BaseParameters) pydantic-model

Parameter class for the Deepchecks data drift validator step.

Attributes:

Name Type Description
check_list Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksDataDriftCheck]]

Optional list of DeepchecksDataDriftCheck identifiers specifying the subset of Deepchecks data drift checks to be performed. If not supplied, the entire set of data drift checks will be performed.

dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

Source code in zenml/integrations/deepchecks/steps/deepchecks_data_drift.py
class DeepchecksDataDriftCheckStepParameters(BaseParameters):
    """Parameter class for the Deepchecks data drift validator step.

    Attributes:
        check_list: Optional list of DeepchecksDataDriftCheck identifiers
            specifying the subset of Deepchecks data drift checks to be
            performed. If not supplied, the entire set of data drift checks will
            be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
    """

    check_list: Optional[Sequence[DeepchecksDataDriftCheck]] = None
    dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
    check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
    run_kwargs: Dict[str, Any] = Field(default_factory=dict)
entrypoint(self, reference_dataset, target_dataset, params)

Main entrypoint for the Deepchecks data drift validator step.

Parameters:

Name Type Description Default
reference_dataset DataFrame

Reference dataset for the data drift check.

required
target_dataset DataFrame

Target dataset to be used for the data drift check.

required
params DeepchecksDataDriftCheckStepParameters

The parameters for the step

required

Returns:

Type Description
SuiteResult

A Deepchecks suite result with the validation results.

Source code in zenml/integrations/deepchecks/steps/deepchecks_data_drift.py
def entrypoint(
    self,
    reference_dataset: pd.DataFrame,
    target_dataset: pd.DataFrame,
    params: DeepchecksDataDriftCheckStepParameters,
) -> SuiteResult:
    """Main entrypoint for the Deepchecks data drift validator step.

    Args:
        reference_dataset: Reference dataset for the data drift check.
        target_dataset: Target dataset to be used for the data drift check.
        params: The parameters for the step

    Returns:
        A Deepchecks suite result with the validation results.
    """
    data_validator = cast(
        DeepchecksDataValidator,
        DeepchecksDataValidator.get_active_data_validator(),
    )

    return data_validator.data_validation(
        dataset=reference_dataset,
        comparison_dataset=target_dataset,
        check_list=cast(Optional[Sequence[str]], params.check_list),
        dataset_kwargs=params.dataset_kwargs,
        check_kwargs=params.check_kwargs,
        run_kwargs=params.run_kwargs,
    )
DeepchecksDataDriftCheckStepParameters (BaseParameters) pydantic-model

Parameter class for the Deepchecks data drift validator step.

Attributes:

Name Type Description
check_list Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksDataDriftCheck]]

Optional list of DeepchecksDataDriftCheck identifiers specifying the subset of Deepchecks data drift checks to be performed. If not supplied, the entire set of data drift checks will be performed.

dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

Source code in zenml/integrations/deepchecks/steps/deepchecks_data_drift.py
class DeepchecksDataDriftCheckStepParameters(BaseParameters):
    """Parameter class for the Deepchecks data drift validator step.

    Attributes:
        check_list: Optional list of DeepchecksDataDriftCheck identifiers
            specifying the subset of Deepchecks data drift checks to be
            performed. If not supplied, the entire set of data drift checks will
            be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
    """

    check_list: Optional[Sequence[DeepchecksDataDriftCheck]] = None
    dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
    check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
    run_kwargs: Dict[str, Any] = Field(default_factory=dict)
deepchecks_data_drift_check_step(step_name, params)

Shortcut function to create a new instance of the DeepchecksDataDriftCheckStep step.

The returned DeepchecksDataDriftCheckStep can be used in a pipeline to run data drift checks on two input pd.DataFrame and return the results as a Deepchecks SuiteResult object.

Parameters:

Name Type Description Default
step_name str

The name of the step

required
params DeepchecksDataDriftCheckStepParameters

The parameters for the step

required

Returns:

Type Description
BaseStep

a DeepchecksDataDriftCheckStep step instance

Source code in zenml/integrations/deepchecks/steps/deepchecks_data_drift.py
def deepchecks_data_drift_check_step(
    step_name: str,
    params: DeepchecksDataDriftCheckStepParameters,
) -> BaseStep:
    """Shortcut function to create a new instance of the DeepchecksDataDriftCheckStep step.

    The returned DeepchecksDataDriftCheckStep can be used in a pipeline to
    run data drift checks on two input pd.DataFrame and return the results
    as a Deepchecks SuiteResult object.

    Args:
        step_name: The name of the step
        params: The parameters for the step

    Returns:
        a DeepchecksDataDriftCheckStep step instance
    """
    return DeepchecksDataDriftCheckStep(name=step_name, params=params)

deepchecks_data_integrity

Implementation of the Deepchecks data integrity validation step.

DeepchecksDataIntegrityCheckStep (BaseStep)

Deepchecks data integrity validator step.

Source code in zenml/integrations/deepchecks/steps/deepchecks_data_integrity.py
class DeepchecksDataIntegrityCheckStep(BaseStep):
    """Deepchecks data integrity validator step."""

    def entrypoint(
        self,
        dataset: pd.DataFrame,
        params: DeepchecksDataIntegrityCheckStepParameters,
    ) -> SuiteResult:
        """Main entrypoint for the Deepchecks data integrity validator step.

        Args:
            dataset: a Pandas DataFrame to validate
            params: The parameters for the step

        Returns:
            A Deepchecks suite result with the validation results.
        """
        data_validator = cast(
            DeepchecksDataValidator,
            DeepchecksDataValidator.get_active_data_validator(),
        )

        return data_validator.data_validation(
            dataset=dataset,
            check_list=cast(Optional[Sequence[str]], params.check_list),
            dataset_kwargs=params.dataset_kwargs,
            check_kwargs=params.check_kwargs,
            run_kwargs=params.run_kwargs,
        )
PARAMETERS_CLASS (BaseParameters) pydantic-model

Parameters class for the Deepchecks data integrity validator step.

Attributes:

Name Type Description
check_list Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksDataIntegrityCheck]]

Optional list of DeepchecksDataIntegrityCheck identifiers specifying the subset of Deepchecks data integrity checks to be performed. If not supplied, the entire set of data integrity checks will be performed.

dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

Source code in zenml/integrations/deepchecks/steps/deepchecks_data_integrity.py
class DeepchecksDataIntegrityCheckStepParameters(BaseParameters):
    """Parameters class for the Deepchecks data integrity validator step.

    Attributes:
        check_list: Optional list of DeepchecksDataIntegrityCheck identifiers
            specifying the subset of Deepchecks data integrity checks to be
            performed. If not supplied, the entire set of data integrity checks
            will be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
    """

    check_list: Optional[Sequence[DeepchecksDataIntegrityCheck]] = None
    dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
    check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
    run_kwargs: Dict[str, Any] = Field(default_factory=dict)
entrypoint(self, dataset, params)

Main entrypoint for the Deepchecks data integrity validator step.

Parameters:

Name Type Description Default
dataset DataFrame

a Pandas DataFrame to validate

required
params DeepchecksDataIntegrityCheckStepParameters

The parameters for the step

required

Returns:

Type Description
SuiteResult

A Deepchecks suite result with the validation results.

Source code in zenml/integrations/deepchecks/steps/deepchecks_data_integrity.py
def entrypoint(
    self,
    dataset: pd.DataFrame,
    params: DeepchecksDataIntegrityCheckStepParameters,
) -> SuiteResult:
    """Main entrypoint for the Deepchecks data integrity validator step.

    Args:
        dataset: a Pandas DataFrame to validate
        params: The parameters for the step

    Returns:
        A Deepchecks suite result with the validation results.
    """
    data_validator = cast(
        DeepchecksDataValidator,
        DeepchecksDataValidator.get_active_data_validator(),
    )

    return data_validator.data_validation(
        dataset=dataset,
        check_list=cast(Optional[Sequence[str]], params.check_list),
        dataset_kwargs=params.dataset_kwargs,
        check_kwargs=params.check_kwargs,
        run_kwargs=params.run_kwargs,
    )
DeepchecksDataIntegrityCheckStepParameters (BaseParameters) pydantic-model

Parameters class for the Deepchecks data integrity validator step.

Attributes:

Name Type Description
check_list Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksDataIntegrityCheck]]

Optional list of DeepchecksDataIntegrityCheck identifiers specifying the subset of Deepchecks data integrity checks to be performed. If not supplied, the entire set of data integrity checks will be performed.

dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

Source code in zenml/integrations/deepchecks/steps/deepchecks_data_integrity.py
class DeepchecksDataIntegrityCheckStepParameters(BaseParameters):
    """Parameters class for the Deepchecks data integrity validator step.

    Attributes:
        check_list: Optional list of DeepchecksDataIntegrityCheck identifiers
            specifying the subset of Deepchecks data integrity checks to be
            performed. If not supplied, the entire set of data integrity checks
            will be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
    """

    check_list: Optional[Sequence[DeepchecksDataIntegrityCheck]] = None
    dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
    check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
    run_kwargs: Dict[str, Any] = Field(default_factory=dict)
deepchecks_data_integrity_check_step(step_name, params)

Shortcut function to create a new instance of the DeepchecksDataIntegrityCheckStep step.

The returned DeepchecksDataIntegrityCheckStep can be used in a pipeline to run data integrity checks on an input pd.DataFrame and return the results as a Deepchecks SuiteResult object.

Parameters:

Name Type Description Default
step_name str

The name of the step

required
params DeepchecksDataIntegrityCheckStepParameters

The parameters for the step

required

Returns:

Type Description
BaseStep

a DeepchecksDataIntegrityCheckStep step instance

Source code in zenml/integrations/deepchecks/steps/deepchecks_data_integrity.py
def deepchecks_data_integrity_check_step(
    step_name: str,
    params: DeepchecksDataIntegrityCheckStepParameters,
) -> BaseStep:
    """Shortcut function to create a new instance of the DeepchecksDataIntegrityCheckStep step.

    The returned DeepchecksDataIntegrityCheckStep can be used in a pipeline to
    run data integrity checks on an input pd.DataFrame and return the results
    as a Deepchecks SuiteResult object.

    Args:
        step_name: The name of the step
        params: The parameters for the step

    Returns:
        a DeepchecksDataIntegrityCheckStep step instance
    """
    return DeepchecksDataIntegrityCheckStep(name=step_name, params=params)

deepchecks_model_drift

Implementation of the Deepchecks model drift validation step.

DeepchecksModelDriftCheckStep (BaseStep)

Deepchecks model drift step.

Source code in zenml/integrations/deepchecks/steps/deepchecks_model_drift.py
class DeepchecksModelDriftCheckStep(BaseStep):
    """Deepchecks model drift step."""

    def entrypoint(
        self,
        reference_dataset: pd.DataFrame,
        target_dataset: pd.DataFrame,
        model: ClassifierMixin,
        params: DeepchecksModelDriftCheckStepParameters,
    ) -> SuiteResult:
        """Main entrypoint for the Deepchecks model drift step.

        Args:
            reference_dataset: Reference dataset for the model drift check.
            target_dataset: Target dataset to be used for the model drift check.
            model: a scikit-learn model to validate
            params: the parameters for the step

        Returns:
            A Deepchecks suite result with the validation results.
        """
        data_validator = cast(
            DeepchecksDataValidator,
            DeepchecksDataValidator.get_active_data_validator(),
        )

        return data_validator.model_validation(
            dataset=reference_dataset,
            comparison_dataset=target_dataset,
            model=model,
            check_list=cast(Optional[Sequence[str]], params.check_list),
            dataset_kwargs=params.dataset_kwargs,
            check_kwargs=params.check_kwargs,
            run_kwargs=params.run_kwargs,
        )
PARAMETERS_CLASS (BaseParameters) pydantic-model

Parameters class for the Deepchecks model drift validator step.

Attributes:

Name Type Description
check_list Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksModelDriftCheck]]

Optional list of DeepchecksModelDriftCheck identifiers specifying the subset of Deepchecks model drift checks to be performed. If not supplied, the entire set of model drift checks will be performed.

dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

Source code in zenml/integrations/deepchecks/steps/deepchecks_model_drift.py
class DeepchecksModelDriftCheckStepParameters(BaseParameters):
    """Parameters class for the Deepchecks model drift validator step.

    Attributes:
        check_list: Optional list of DeepchecksModelDriftCheck identifiers
            specifying the subset of Deepchecks model drift checks to be
            performed. If not supplied, the entire set of model drift checks
            will be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
    """

    check_list: Optional[Sequence[DeepchecksModelDriftCheck]] = None
    dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
    check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
    run_kwargs: Dict[str, Any] = Field(default_factory=dict)
entrypoint(self, reference_dataset, target_dataset, model, params)

Main entrypoint for the Deepchecks model drift step.

Parameters:

Name Type Description Default
reference_dataset DataFrame

Reference dataset for the model drift check.

required
target_dataset DataFrame

Target dataset to be used for the model drift check.

required
model ClassifierMixin

a scikit-learn model to validate

required
params DeepchecksModelDriftCheckStepParameters

the parameters for the step

required

Returns:

Type Description
SuiteResult

A Deepchecks suite result with the validation results.

Source code in zenml/integrations/deepchecks/steps/deepchecks_model_drift.py
def entrypoint(
    self,
    reference_dataset: pd.DataFrame,
    target_dataset: pd.DataFrame,
    model: ClassifierMixin,
    params: DeepchecksModelDriftCheckStepParameters,
) -> SuiteResult:
    """Main entrypoint for the Deepchecks model drift step.

    Args:
        reference_dataset: Reference dataset for the model drift check.
        target_dataset: Target dataset to be used for the model drift check.
        model: a scikit-learn model to validate
        params: the parameters for the step

    Returns:
        A Deepchecks suite result with the validation results.
    """
    data_validator = cast(
        DeepchecksDataValidator,
        DeepchecksDataValidator.get_active_data_validator(),
    )

    return data_validator.model_validation(
        dataset=reference_dataset,
        comparison_dataset=target_dataset,
        model=model,
        check_list=cast(Optional[Sequence[str]], params.check_list),
        dataset_kwargs=params.dataset_kwargs,
        check_kwargs=params.check_kwargs,
        run_kwargs=params.run_kwargs,
    )
DeepchecksModelDriftCheckStepParameters (BaseParameters) pydantic-model

Parameters class for the Deepchecks model drift validator step.

Attributes:

Name Type Description
check_list Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksModelDriftCheck]]

Optional list of DeepchecksModelDriftCheck identifiers specifying the subset of Deepchecks model drift checks to be performed. If not supplied, the entire set of model drift checks will be performed.

dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

Source code in zenml/integrations/deepchecks/steps/deepchecks_model_drift.py
class DeepchecksModelDriftCheckStepParameters(BaseParameters):
    """Parameters class for the Deepchecks model drift validator step.

    Attributes:
        check_list: Optional list of DeepchecksModelDriftCheck identifiers
            specifying the subset of Deepchecks model drift checks to be
            performed. If not supplied, the entire set of model drift checks
            will be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
    """

    check_list: Optional[Sequence[DeepchecksModelDriftCheck]] = None
    dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
    check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
    run_kwargs: Dict[str, Any] = Field(default_factory=dict)
deepchecks_model_drift_check_step(step_name, params)

Shortcut function to create a new instance of the DeepchecksModelDriftCheckStep step.

The returned DeepchecksModelDriftCheckStep can be used in a pipeline to run model drift checks on two input pd.DataFrame datasets and an input scikit-learn ClassifierMixin model and return the results as a Deepchecks SuiteResult object.

Parameters:

Name Type Description Default
step_name str

The name of the step

required
params DeepchecksModelDriftCheckStepParameters

The parameters for the step

required

Returns:

Type Description
BaseStep

a DeepchecksModelDriftCheckStep step instance

Source code in zenml/integrations/deepchecks/steps/deepchecks_model_drift.py
def deepchecks_model_drift_check_step(
    step_name: str,
    params: DeepchecksModelDriftCheckStepParameters,
) -> BaseStep:
    """Shortcut function to create a new instance of the DeepchecksModelDriftCheckStep step.

    The returned DeepchecksModelDriftCheckStep can be used in a pipeline to
    run model drift checks on two input pd.DataFrame datasets and an input
    scikit-learn ClassifierMixin model and return the results as a Deepchecks
    SuiteResult object.

    Args:
        step_name: The name of the step
        params: The parameters for the step

    Returns:
        a DeepchecksModelDriftCheckStep step instance
    """
    return DeepchecksModelDriftCheckStep(name=step_name, params=params)

deepchecks_model_validation

Implementation of the Deepchecks model validation validation step.

DeepchecksModelValidationCheckStep (BaseStep)

Deepchecks model validation step.

Source code in zenml/integrations/deepchecks/steps/deepchecks_model_validation.py
class DeepchecksModelValidationCheckStep(BaseStep):
    """Deepchecks model validation step."""

    def entrypoint(
        self,
        dataset: pd.DataFrame,
        model: ClassifierMixin,
        params: DeepchecksModelValidationCheckStepParameters,
    ) -> SuiteResult:
        """Main entrypoint for the Deepchecks model validation step.

        Args:
            dataset: a Pandas DataFrame to use for the validation
            model: a scikit-learn model to validate
            params: the parameters for the step

        Returns:
            A Deepchecks suite result with the validation results.
        """
        data_validator = cast(
            DeepchecksDataValidator,
            DeepchecksDataValidator.get_active_data_validator(),
        )

        return data_validator.model_validation(
            dataset=dataset,
            model=model,
            check_list=cast(Optional[Sequence[str]], params.check_list),
            dataset_kwargs=params.dataset_kwargs,
            check_kwargs=params.check_kwargs,
            run_kwargs=params.run_kwargs,
        )
PARAMETERS_CLASS (BaseParameters) pydantic-model

Parameters class for the Deepchecks model validation validator step.

Attributes:

Name Type Description
check_list Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksModelValidationCheck]]

Optional list of DeepchecksModelValidationCheck identifiers specifying the subset of Deepchecks model validation checks to be performed. If not supplied, the entire set of model validation checks will be performed.

dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

Source code in zenml/integrations/deepchecks/steps/deepchecks_model_validation.py
class DeepchecksModelValidationCheckStepParameters(BaseParameters):
    """Parameters class for the Deepchecks model validation validator step.

    Attributes:
        check_list: Optional list of DeepchecksModelValidationCheck identifiers
            specifying the subset of Deepchecks model validation checks to be
            performed. If not supplied, the entire set of model validation checks
            will be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
    """

    check_list: Optional[Sequence[DeepchecksModelValidationCheck]] = None
    dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
    check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
    run_kwargs: Dict[str, Any] = Field(default_factory=dict)
entrypoint(self, dataset, model, params)

Main entrypoint for the Deepchecks model validation step.

Parameters:

Name Type Description Default
dataset DataFrame

a Pandas DataFrame to use for the validation

required
model ClassifierMixin

a scikit-learn model to validate

required
params DeepchecksModelValidationCheckStepParameters

the parameters for the step

required

Returns:

Type Description
SuiteResult

A Deepchecks suite result with the validation results.

Source code in zenml/integrations/deepchecks/steps/deepchecks_model_validation.py
def entrypoint(
    self,
    dataset: pd.DataFrame,
    model: ClassifierMixin,
    params: DeepchecksModelValidationCheckStepParameters,
) -> SuiteResult:
    """Main entrypoint for the Deepchecks model validation step.

    Args:
        dataset: a Pandas DataFrame to use for the validation
        model: a scikit-learn model to validate
        params: the parameters for the step

    Returns:
        A Deepchecks suite result with the validation results.
    """
    data_validator = cast(
        DeepchecksDataValidator,
        DeepchecksDataValidator.get_active_data_validator(),
    )

    return data_validator.model_validation(
        dataset=dataset,
        model=model,
        check_list=cast(Optional[Sequence[str]], params.check_list),
        dataset_kwargs=params.dataset_kwargs,
        check_kwargs=params.check_kwargs,
        run_kwargs=params.run_kwargs,
    )
DeepchecksModelValidationCheckStepParameters (BaseParameters) pydantic-model

Parameters class for the Deepchecks model validation validator step.

Attributes:

Name Type Description
check_list Optional[Sequence[zenml.integrations.deepchecks.validation_checks.DeepchecksModelValidationCheck]]

Optional list of DeepchecksModelValidationCheck identifiers specifying the subset of Deepchecks model validation checks to be performed. If not supplied, the entire set of model validation checks will be performed.

dataset_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor.

check_kwargs Dict[str, Dict[str, Any]]

Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys.

run_kwargs Dict[str, Any]

Additional keyword arguments to be passed to the Deepchecks Suite run method.

Source code in zenml/integrations/deepchecks/steps/deepchecks_model_validation.py
class DeepchecksModelValidationCheckStepParameters(BaseParameters):
    """Parameters class for the Deepchecks model validation validator step.

    Attributes:
        check_list: Optional list of DeepchecksModelValidationCheck identifiers
            specifying the subset of Deepchecks model validation checks to be
            performed. If not supplied, the entire set of model validation checks
            will be performed.
        dataset_kwargs: Additional keyword arguments to be passed to the
            Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
        check_kwargs: Additional keyword arguments to be passed to the
            Deepchecks check object constructors. Arguments are grouped for
            each check and indexed using the full check class name or
            check enum value as dictionary keys.
        run_kwargs: Additional keyword arguments to be passed to the
            Deepchecks Suite `run` method.
    """

    check_list: Optional[Sequence[DeepchecksModelValidationCheck]] = None
    dataset_kwargs: Dict[str, Any] = Field(default_factory=dict)
    check_kwargs: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
    run_kwargs: Dict[str, Any] = Field(default_factory=dict)
deepchecks_model_validation_check_step(step_name, params)

Shortcut function to create a new instance of the DeepchecksModelValidationCheckStep step.

The returned DeepchecksModelValidationCheckStep can be used in a pipeline to run model validation checks on an input pd.DataFrame dataset and an input scikit-learn ClassifierMixin model and return the results as a Deepchecks SuiteResult object.

Parameters:

Name Type Description Default
step_name str

The name of the step

required
params DeepchecksModelValidationCheckStepParameters

The parameters for the step

required

Returns:

Type Description
BaseStep

a DeepchecksModelValidationCheckStep step instance

Source code in zenml/integrations/deepchecks/steps/deepchecks_model_validation.py
def deepchecks_model_validation_check_step(
    step_name: str,
    params: DeepchecksModelValidationCheckStepParameters,
) -> BaseStep:
    """Shortcut function to create a new instance of the DeepchecksModelValidationCheckStep step.

    The returned DeepchecksModelValidationCheckStep can be used in a pipeline to
    run model validation checks on an input pd.DataFrame dataset and an input
    scikit-learn ClassifierMixin model and return the results as a Deepchecks
    SuiteResult object.

    Args:
        step_name: The name of the step
        params: The parameters for the step

    Returns:
        a DeepchecksModelValidationCheckStep step instance
    """
    return DeepchecksModelValidationCheckStep(name=step_name, params=params)

validation_checks

Definition of the Deepchecks validation check types.

DeepchecksDataDriftCheck (DeepchecksValidationCheck)

Categories of Deepchecks data drift checks.

This list reflects the set of train-test validation checks provided by Deepchecks:

All these checks inherit from deepchecks.tabular.TrainTestCheck or deepchecks.vision.TrainTestCheck and require two datasets as input.

Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksDataDriftCheck(DeepchecksValidationCheck):
    """Categories of Deepchecks data drift checks.

    This list reflects the set of train-test validation checks provided by
    Deepchecks:

      * [for tabular data](https://docs.deepchecks.com/stable/checks_gallery/tabular.html#train-test-validation)
      * [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#train-test-validation)

    All these checks inherit from `deepchecks.tabular.TrainTestCheck` or
    `deepchecks.vision.TrainTestCheck` and require two datasets as input.
    """

    TABULAR_CATEGORY_MISMATCH_TRAIN_TEST = resolve_class(
        tabular_checks.CategoryMismatchTrainTest
    )
    TABULAR_DATASET_SIZE_COMPARISON = resolve_class(
        tabular_checks.DatasetsSizeComparison
    )
    TABULAR_DATE_TRAIN_TEST_LEAKAGE_DUPLICATES = resolve_class(
        tabular_checks.DateTrainTestLeakageDuplicates
    )
    TABULAR_DATE_TRAIN_TEST_LEAKAGE_OVERLAP = resolve_class(
        tabular_checks.DateTrainTestLeakageOverlap
    )
    TABULAR_DOMINANT_FREQUENCY_CHANGE = resolve_class(
        tabular_checks.DominantFrequencyChange
    )
    TABULAR_FEATURE_LABEL_CORRELATION_CHANGE = resolve_class(
        tabular_checks.FeatureLabelCorrelationChange
    )
    TABULAR_INDEX_LEAKAGE = resolve_class(tabular_checks.IndexTrainTestLeakage)
    TABULAR_NEW_LABEL_TRAIN_TEST = resolve_class(
        tabular_checks.NewLabelTrainTest
    )
    TABULAR_STRING_MISMATCH_COMPARISON = resolve_class(
        tabular_checks.StringMismatchComparison
    )
    TABULAR_TRAIN_TEST_FEATURE_DRIFT = resolve_class(
        tabular_checks.TrainTestFeatureDrift
    )
    TABULAR_TRAIN_TEST_LABEL_DRIFT = resolve_class(
        tabular_checks.TrainTestLabelDrift
    )
    TABULAR_TRAIN_TEST_SAMPLES_MIX = resolve_class(
        tabular_checks.TrainTestSamplesMix
    )
    TABULAR_WHOLE_DATASET_DRIFT = resolve_class(
        tabular_checks.WholeDatasetDrift
    )

    VISION_FEATURE_LABEL_CORRELATION_CHANGE = resolve_class(
        vision_checks.FeatureLabelCorrelationChange
    )
    VISION_HEATMAP_COMPARISON = resolve_class(vision_checks.HeatmapComparison)
    VISION_IMAGE_DATASET_DRIFT = resolve_class(vision_checks.ImageDatasetDrift)
    VISION_IMAGE_PROPERTY_DRIFT = resolve_class(
        vision_checks.ImagePropertyDrift
    )
    VISION_NEW_LABELS = resolve_class(vision_checks.NewLabels)
    VISION_SIMILAR_IMAGE_LEAKAGE = resolve_class(
        vision_checks.SimilarImageLeakage
    )
    VISION_TRAIN_TEST_LABEL_DRIFT = resolve_class(
        vision_checks.TrainTestLabelDrift
    )

DeepchecksDataIntegrityCheck (DeepchecksValidationCheck)

Categories of Deepchecks data integrity checks.

This list reflects the set of data integrity checks provided by Deepchecks:

All these checks inherit from deepchecks.tabular.SingleDatasetCheck or deepchecks.vision.SingleDatasetCheck and require a single dataset as input.

Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksDataIntegrityCheck(DeepchecksValidationCheck):
    """Categories of Deepchecks data integrity checks.

    This list reflects the set of data integrity checks provided by Deepchecks:

      * [for tabular data](https://docs.deepchecks.com/en/stable/checks_gallery/tabular.html#data-integrity)
      * [for computer vision](https://docs.deepchecks.com/en/stable/checks_gallery/vision.html#data-integrity)

    All these checks inherit from `deepchecks.tabular.SingleDatasetCheck` or
    `deepchecks.vision.SingleDatasetCheck` and require a single dataset as input.
    """

    TABULAR_COLUMNS_INFO = resolve_class(tabular_checks.ColumnsInfo)
    TABULAR_CONFLICTING_LABELS = resolve_class(
        tabular_checks.ConflictingLabels
    )
    TABULAR_DATA_DUPLICATES = resolve_class(tabular_checks.DataDuplicates)
    TABULAR_FEATURE_FEATURE_CORRELATION = resolve_class(
        FeatureFeatureCorrelation
    )
    TABULAR_FEATURE_LABEL_CORRELATION = resolve_class(
        tabular_checks.FeatureLabelCorrelation
    )
    TABULAR_IDENTIFIER_LEAKAGE = resolve_class(
        tabular_checks.IdentifierLeakage
    )
    TABULAR_IS_SINGLE_VALUE = resolve_class(tabular_checks.IsSingleValue)
    TABULAR_MIXED_DATA_TYPES = resolve_class(tabular_checks.MixedDataTypes)
    TABULAR_MIXED_NULLS = resolve_class(tabular_checks.MixedNulls)
    TABULAR_OUTLIER_SAMPLE_DETECTION = resolve_class(
        tabular_checks.OutlierSampleDetection
    )
    TABULAR_SPECIAL_CHARS = resolve_class(tabular_checks.SpecialCharacters)
    TABULAR_STRING_LENGTH_OUT_OF_BOUNDS = resolve_class(
        tabular_checks.StringLengthOutOfBounds
    )
    TABULAR_STRING_MISMATCH = resolve_class(tabular_checks.StringMismatch)

    VISION_IMAGE_PROPERTY_OUTLIERS = resolve_class(
        vision_checks.ImagePropertyOutliers
    )
    VISION_LABEL_PROPERTY_OUTLIERS = resolve_class(
        vision_checks.LabelPropertyOutliers
    )

DeepchecksModelDriftCheck (DeepchecksValidationCheck)

Categories of Deepchecks model drift checks.

This list includes a subset of the model evaluation checks provided by Deepchecks that require two datasets and a mandatory model as input:

All these checks inherit from deepchecks.tabular.TrainTestCheck or deepchecks.vision.TrainTestCheck and require two datasets and a mandatory model as input.

Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksModelDriftCheck(DeepchecksValidationCheck):
    """Categories of Deepchecks model drift checks.

    This list includes a subset of the model evaluation checks provided by
    Deepchecks that require two datasets and a mandatory model as input:

      * [for tabular data](https://docs.deepchecks.com/en/stable/checks_gallery/tabular.html#model-evaluation)
      * [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#model-evaluation)

    All these checks inherit from `deepchecks.tabular.TrainTestCheck` or
    `deepchecks.vision.TrainTestCheck` and require two datasets and a mandatory
    model as input.
    """

    TABULAR_BOOSTING_OVERFIT = resolve_class(tabular_checks.BoostingOverfit)
    TABULAR_MODEL_ERROR_ANALYSIS = resolve_class(
        tabular_checks.ModelErrorAnalysis
    )
    TABULAR_PERFORMANCE_REPORT = resolve_class(
        tabular_checks.PerformanceReport
    )
    TABULAR_SIMPLE_MODEL_COMPARISON = resolve_class(
        tabular_checks.SimpleModelComparison
    )
    TABULAR_TRAIN_TEST_PREDICTION_DRIFT = resolve_class(
        tabular_checks.TrainTestPredictionDrift
    )
    TABULAR_UNUSED_FEATURES = resolve_class(tabular_checks.UnusedFeatures)

    VISION_CLASS_PERFORMANCE = resolve_class(vision_checks.ClassPerformance)
    VISION_MODEL_ERROR_ANALYSIS = resolve_class(
        vision_checks.ModelErrorAnalysis
    )
    VISION_SIMPLE_MODEL_COMPARISON = resolve_class(
        vision_checks.SimpleModelComparison
    )
    VISION_TRAIN_TEST_PREDICTION_DRIFT = resolve_class(
        vision_checks.TrainTestPredictionDrift
    )

DeepchecksModelValidationCheck (DeepchecksValidationCheck)

Categories of Deepchecks model validation checks.

This list includes a subset of the model evaluation checks provided by Deepchecks that require a single dataset and a mandatory model as input:

All these checks inherit from deepchecks.tabular.SingleDatasetCheck or `deepchecks.vision.SingleDatasetCheck and require a dataset and a mandatory model as input.

Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksModelValidationCheck(DeepchecksValidationCheck):
    """Categories of Deepchecks model validation checks.

    This list includes a subset of the model evaluation checks provided by
    Deepchecks that require a single dataset and a mandatory model as input:

      * [for tabular data](https://docs.deepchecks.com/en/stable/checks_gallery/tabular.html#model-evaluation)
      * [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#model-evaluation)

    All these checks inherit from `deepchecks.tabular.SingleDatasetCheck` or
    `deepchecks.vision.SingleDatasetCheck and require a dataset and a mandatory
    model as input.
    """

    TABULAR_CALIBRATION_SCORE = resolve_class(tabular_checks.CalibrationScore)
    TABULAR_CONFUSION_MATRIX_REPORT = resolve_class(
        tabular_checks.ConfusionMatrixReport
    )
    TABULAR_MODEL_INFERENCE_TIME = resolve_class(
        tabular_checks.ModelInferenceTime
    )
    TABULAR_REGRESSION_ERROR_DISTRIBUTION = resolve_class(
        tabular_checks.RegressionErrorDistribution
    )
    TABULAR_REGRESSION_SYSTEMATIC_ERROR = resolve_class(
        tabular_checks.RegressionSystematicError
    )
    TABULAR_ROC_REPORT = resolve_class(tabular_checks.RocReport)
    TABULAR_SEGMENT_PERFORMANCE = resolve_class(
        tabular_checks.SegmentPerformance
    )

    VISION_CONFUSION_MATRIX_REPORT = resolve_class(
        vision_checks.ConfusionMatrixReport
    )
    VISION_IMAGE_SEGMENT_PERFORMANCE = resolve_class(
        vision_checks.ImageSegmentPerformance
    )
    VISION_MEAN_AVERAGE_PRECISION_REPORT = resolve_class(
        vision_checks.MeanAveragePrecisionReport
    )
    VISION_MEAN_AVERAGE_RECALL_REPORT = resolve_class(
        vision_checks.MeanAverageRecallReport
    )
    VISION_ROBUSTNESS_REPORT = resolve_class(vision_checks.RobustnessReport)
    VISION_SINGLE_DATASET_SCALAR_PERFORMANCE = resolve_class(
        vision_checks.SingleDatasetScalarPerformance
    )

DeepchecksValidationCheck (StrEnum)

Base class for all Deepchecks categories of validation checks.

This base class defines some conventions used for all enum values used to identify the various validation checks that can be performed with Deepchecks:

  • enum values represent fully formed class paths pointing to Deepchecks BaseCheck subclasses
  • all tabular data checks are located under the deepchecks.tabular.checks module sub-tree
  • all computer vision data checks are located under the deepchecks.vision.checks module sub-tree
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksValidationCheck(StrEnum):
    """Base class for all Deepchecks categories of validation checks.

    This base class defines some conventions used for all enum values used to
    identify the various validation checks that can be performed with
    Deepchecks:

      * enum values represent fully formed class paths pointing to Deepchecks
      BaseCheck subclasses
      * all tabular data checks are located under the
      `deepchecks.tabular.checks` module sub-tree
      * all computer vision data checks are located under the
      `deepchecks.vision.checks` module sub-tree
    """

    @classmethod
    def validate_check_name(cls, check_name: str) -> None:
        """Validate a Deepchecks check identifier.

        Args:
            check_name: Identifies a builtin Deepchecks check. The identifier
                must be formatted as `deepchecks.{tabular|vision}.checks.<...>.<class-name>`.

        Raises:
            ValueError: If the check identifier does not follow the convention
                used by ZenML to identify Deepchecks builtin checks.
        """
        if not re.match(
            r"^deepchecks\.(tabular|vision)\.checks\.",
            check_name,
        ):
            raise ValueError(
                f"The supplied Deepcheck check identifier does not follow the "
                f"convention used by ZenML: `{check_name}`. The identifier "
                f"must be formatted as `deepchecks.<tabular|vision>.checks...` "
                f"and must be resolvable to a valid Deepchecks BaseCheck "
                f"subclass."
            )

    @classmethod
    def is_tabular_check(cls, check_name: str) -> bool:
        """Check if a validation check is applicable to tabular data.

        Args:
            check_name: Identifies a builtin Deepchecks check.

        Returns:
            True if the check is applicable to tabular data, otherwise False.
        """
        cls.validate_check_name(check_name)
        return check_name.startswith("deepchecks.tabular.")

    @classmethod
    def is_vision_check(cls, check_name: str) -> bool:
        """Check if a validation check is applicable to computer vision data.

        Args:
            check_name: Identifies a builtin Deepchecks check.

        Returns:
            True if the check is applicable to compute vision data, otherwise
            False.
        """
        cls.validate_check_name(check_name)
        return check_name.startswith("deepchecks.vision.")

    @classmethod
    def get_check_class(cls, check_name: str) -> Type[BaseCheck]:
        """Get the Deepchecks check class associated with an enum value or a custom check name.

        Args:
            check_name: Identifies a builtin Deepchecks check. The identifier
                must be formatted as `deepchecks.{tabular|vision}.checks.<class-name>`
                and must be resolvable to a valid Deepchecks BaseCheck class.

        Returns:
            The Deepchecks check class associated with this enum value.

        Raises:
            ValueError: If the check name could not be converted to a valid
                Deepchecks check class. This can happen for example if the enum
                values fall out of sync with the Deepchecks code base or if a
                custom check name is supplied that cannot be resolved to a valid
                Deepchecks BaseCheck class.
        """
        cls.validate_check_name(check_name)

        try:
            check_class = import_class_by_path(check_name)
        except AttributeError:
            raise ValueError(
                f"Could not map the `{check_name}` check identifier to a valid "
                f"Deepchecks check class."
            )

        if not issubclass(check_class, BaseCheck):
            raise ValueError(
                f"The `{check_name}` check identifier is mapped to an invalid "
                f"data type. Expected a {str(BaseCheck)} subclass, but instead "
                f"got: {str(check_class)}."
            )

        if check_name not in cls.values():
            logger.warning(
                f"You are using a custom Deepchecks check identifier that is "
                f"not listed in the `{str(cls)}` enum type. This could lead "
                f"to unexpected behavior."
            )

        return check_class

    @property
    def check_class(self) -> Type[BaseCheck]:
        """Convert the enum value to a valid Deepchecks check class.

        Returns:
            The Deepchecks check class associated with the enum value.
        """
        return self.get_check_class(self.value)

visualizers special

Deepchecks visualizer.

deepchecks_visualizer

Implementation of the Deepchecks visualizer.

DeepchecksVisualizer (BaseVisualizer)

The implementation of a Deepchecks Visualizer.

Source code in zenml/integrations/deepchecks/visualizers/deepchecks_visualizer.py
class DeepchecksVisualizer(BaseVisualizer):
    """The implementation of a Deepchecks Visualizer."""

    @abstractmethod
    def visualize(self, object: StepView, *args: Any, **kwargs: Any) -> None:
        """Method to visualize components.

        Args:
            object: StepView fetched from run.get_step().
            *args: Additional arguments (unused).
            **kwargs: Additional keyword arguments (unused).
        """
        for artifact_view in object.outputs.values():
            # filter out anything but data analysis artifacts
            if artifact_view.type == ArtifactType.DATA_ANALYSIS:
                artifact = artifact_view.read()
                self.generate_report(artifact)

    def generate_report(self, result: Union[CheckResult, SuiteResult]) -> None:
        """Generate a Deepchecks Report.

        Args:
            result: A SuiteResult.
        """
        print(result)

        if Environment.in_notebook():
            result.show()
        else:
            logger.warning(
                "The magic functions are only usable in a Jupyter notebook."
            )
            with tempfile.NamedTemporaryFile(
                mode="w", delete=False, suffix=".html", encoding="utf-8"
            ) as f:
                result.save_as_html(f)
                url = f"file:///{f.name}"
            logger.info("Opening %s in a new browser.." % f.name)
            webbrowser.open(url, new=2)
generate_report(self, result)

Generate a Deepchecks Report.

Parameters:

Name Type Description Default
result Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult]

A SuiteResult.

required
Source code in zenml/integrations/deepchecks/visualizers/deepchecks_visualizer.py
def generate_report(self, result: Union[CheckResult, SuiteResult]) -> None:
    """Generate a Deepchecks Report.

    Args:
        result: A SuiteResult.
    """
    print(result)

    if Environment.in_notebook():
        result.show()
    else:
        logger.warning(
            "The magic functions are only usable in a Jupyter notebook."
        )
        with tempfile.NamedTemporaryFile(
            mode="w", delete=False, suffix=".html", encoding="utf-8"
        ) as f:
            result.save_as_html(f)
            url = f"file:///{f.name}"
        logger.info("Opening %s in a new browser.." % f.name)
        webbrowser.open(url, new=2)
visualize(self, object, *args, **kwargs)

Method to visualize components.

Parameters:

Name Type Description Default
object StepView

StepView fetched from run.get_step().

required
*args Any

Additional arguments (unused).

()
**kwargs Any

Additional keyword arguments (unused).

{}
Source code in zenml/integrations/deepchecks/visualizers/deepchecks_visualizer.py
@abstractmethod
def visualize(self, object: StepView, *args: Any, **kwargs: Any) -> None:
    """Method to visualize components.

    Args:
        object: StepView fetched from run.get_step().
        *args: Additional arguments (unused).
        **kwargs: Additional keyword arguments (unused).
    """
    for artifact_view in object.outputs.values():
        # filter out anything but data analysis artifacts
        if artifact_view.type == ArtifactType.DATA_ANALYSIS:
            artifact = artifact_view.read()
            self.generate_report(artifact)