Deepchecks
zenml.integrations.deepchecks
special
Deepchecks integration for ZenML.
The Deepchecks integration provides a way to validate your data in your pipelines. It includes a way to detect data anomalies and define checks to ensure quality of data.
The integration includes custom materializers to store and visualize Deepchecks
SuiteResults
.
DeepchecksIntegration (Integration)
Definition of Deepchecks integration for ZenML.
Source code in zenml/integrations/deepchecks/__init__.py
class DeepchecksIntegration(Integration):
"""Definition of [Deepchecks](https://github.com/deepchecks/deepchecks) integration for ZenML."""
NAME = DEEPCHECKS
REQUIREMENTS = [
"deepchecks[vision]~=0.18.0",
"torchvision>=0.14.0",
"opencv-python==4.5.5.64", # pin to same version
"opencv-python-headless==4.5.5.64", # pin to same version
"tenacity!=8.4.0", # https://github.com/jd/tenacity/issues/471
# The deepchecks integrations requires pandas to work.
# However, their version 0.18.0 is still not compatible with
# pandas>=2.2.0, so we limit the version here.
"pandas<2.2.0",
]
APT_PACKAGES = ["ffmpeg", "libsm6", "libxext6"]
REQUIREMENTS_IGNORED_ON_UNINSTALL = ["pandas", "torchvision", "tenacity"]
@classmethod
def activate(cls) -> None:
"""Activate the Deepchecks integration."""
from zenml.integrations.deepchecks import materializers # noqa
@classmethod
def get_requirements(cls, target_os: Optional[str] = None) -> List[str]:
"""Method to get the requirements for the integration.
Args:
target_os: The target operating system to get the requirements for.
Returns:
A list of requirements.
"""
from zenml.integrations.pandas import PandasIntegration
return cls.REQUIREMENTS + \
PandasIntegration.get_requirements(target_os=target_os)
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Deepchecks integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.deepchecks.flavors import (
DeepchecksDataValidatorFlavor,
)
return [DeepchecksDataValidatorFlavor]
activate()
classmethod
Activate the Deepchecks integration.
Source code in zenml/integrations/deepchecks/__init__.py
@classmethod
def activate(cls) -> None:
"""Activate the Deepchecks integration."""
from zenml.integrations.deepchecks import materializers # noqa
flavors()
classmethod
Declare the stack component flavors for the Deepchecks integration.
Returns:
Type | Description |
---|---|
List[Type[zenml.stack.flavor.Flavor]] |
List of stack component flavors for this integration. |
Source code in zenml/integrations/deepchecks/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Deepchecks integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.deepchecks.flavors import (
DeepchecksDataValidatorFlavor,
)
return [DeepchecksDataValidatorFlavor]
get_requirements(target_os=None)
classmethod
Method to get the requirements for the integration.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
target_os |
Optional[str] |
The target operating system to get the requirements for. |
None |
Returns:
Type | Description |
---|---|
List[str] |
A list of requirements. |
Source code in zenml/integrations/deepchecks/__init__.py
@classmethod
def get_requirements(cls, target_os: Optional[str] = None) -> List[str]:
"""Method to get the requirements for the integration.
Args:
target_os: The target operating system to get the requirements for.
Returns:
A list of requirements.
"""
from zenml.integrations.pandas import PandasIntegration
return cls.REQUIREMENTS + \
PandasIntegration.get_requirements(target_os=target_os)
data_validators
special
Initialization of the Deepchecks data validator for ZenML.
deepchecks_data_validator
Implementation of the Deepchecks data validator.
DeepchecksDataValidator (BaseDataValidator)
Deepchecks data validator stack component.
Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
class DeepchecksDataValidator(BaseDataValidator):
"""Deepchecks data validator stack component."""
NAME: ClassVar[str] = "Deepchecks"
FLAVOR: ClassVar[Type[BaseDataValidatorFlavor]] = (
DeepchecksDataValidatorFlavor
)
@staticmethod
def _split_checks(
check_list: Sequence[str],
) -> Tuple[Sequence[str], Sequence[str]]:
"""Split a list of check identifiers in two lists, one for tabular and one for computer vision checks.
Args:
check_list: A list of check identifiers.
Returns:
List of tabular check identifiers and list of computer vision
check identifiers.
"""
tabular_checks = list(
filter(
lambda check: DeepchecksValidationCheck.is_tabular_check(
check
),
check_list,
)
)
vision_checks = list(
filter(
lambda check: DeepchecksValidationCheck.is_vision_check(check),
check_list,
)
)
return tabular_checks, vision_checks
@classmethod
def _create_and_run_check_suite(
cls,
check_enum: Type[DeepchecksValidationCheck],
reference_dataset: Union[pd.DataFrame, DataLoader[Any]],
comparison_dataset: Optional[
Union[pd.DataFrame, DataLoader[Any]]
] = None,
models: Optional[List[Union[ClassifierMixin, Module]]] = None,
check_list: Optional[Sequence[str]] = None,
dataset_kwargs: Dict[str, Any] = {},
check_kwargs: Dict[str, Dict[str, Any]] = {},
run_kwargs: Dict[str, Any] = {},
) -> SuiteResult:
"""Create and run a Deepchecks check suite corresponding to the input parameters.
This method contains generic logic common to all Deepchecks data
validator methods that validates the input arguments and uses them to
generate and run a Deepchecks check suite.
Args:
check_enum: ZenML enum type grouping together Deepchecks checks with
the same characteristics. This is used to generate a default
list of checks, if a custom list isn't provided via the
`check_list` argument.
reference_dataset: Primary (reference) dataset argument used during
validation.
comparison_dataset: Optional secondary (comparison) dataset argument
used during comparison checks.
models: Optional model argument used during validation.
check_list: Optional list of ZenML Deepchecks check identifiers
specifying the list of Deepchecks checks to be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks tabular.Dataset or vision.VisionData constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
Returns:
Deepchecks SuiteResult object with the Suite run results.
Raises:
TypeError: If the datasets, model and check list arguments combine
data types and/or checks from different categories (tabular and
computer vision).
"""
# Detect what type of check to perform (tabular or computer vision) from
# the dataset/model datatypes and the check list. At the same time,
# validate the combination of data types used for dataset and model
# arguments and the check list.
is_tabular = False
is_vision = False
is_multi_model = False
for dataset in [reference_dataset, comparison_dataset]:
if dataset is None:
continue
if isinstance(dataset, pd.DataFrame):
is_tabular = True
elif isinstance(dataset, DataLoader):
is_vision = True
else:
raise TypeError(
f"Unsupported dataset data type found: {type(dataset)}. "
f"Supported data types are {str(pd.DataFrame)} for tabular "
f"data and {str(DataLoader)} for computer vision data."
)
if models:
# if there's more than one models, we should set the
# is_multi_model to True
if len(models) > 1:
is_multi_model = True
# if the models are of different types, raise an error
# only the same type of models can be used for comparison
if len(set(type(model) for model in models)) > 1:
raise TypeError(
"Models used for comparison checks must be of the same type."
)
model = models[0]
if isinstance(model, ClassifierMixin):
is_tabular = True
elif isinstance(model, Module):
is_vision = True
else:
raise TypeError(
f"Unsupported model data type found: {type(model)}. "
f"Supported data types are {str(ClassifierMixin)} for "
f"tabular data and {str(Module)} for computer vision "
f"data."
)
if is_tabular and is_vision:
raise TypeError(
f"Tabular and computer vision data types used for datasets and "
f"models cannot be mixed. They must all belong to the same "
f"category. Supported data types for tabular data are "
f"{str(pd.DataFrame)} for datasets and {str(ClassifierMixin)} "
f"for models. Supported data types for computer vision data "
f"are {str(pd.DataFrame)} for datasets and and {str(Module)} "
f"for models."
)
if not check_list:
# default to executing all the checks listed in the supplied
# checks enum type if a custom check list is not supplied
# don't include the TABULAR_PERFORMANCE_BIAS check enum value
# as it requires a protected feature name to be set
checks_to_exclude = [
DeepchecksModelValidationCheck.TABULAR_PERFORMANCE_BIAS
]
check_enum_values = [
check.value
for check in check_enum
if check not in checks_to_exclude
]
tabular_checks, vision_checks = cls._split_checks(
check_enum_values
)
if is_tabular:
check_list = tabular_checks
vision_checks = []
else:
check_list = vision_checks
tabular_checks = []
else:
tabular_checks, vision_checks = cls._split_checks(check_list)
if tabular_checks and vision_checks:
raise TypeError(
f"The check list cannot mix tabular checks "
f"({tabular_checks}) and computer vision checks ("
f"{vision_checks})."
)
if is_tabular and vision_checks:
raise TypeError(
f"Tabular data types used for datasets and models can only "
f"be used with tabular validation checks. The following "
f"computer vision checks included in the check list are "
f"not valid: {vision_checks}."
)
if is_vision and tabular_checks:
raise TypeError(
f"Computer vision data types used for datasets and models "
f"can only be used with computer vision validation checks. "
f"The following tabular checks included in the check list "
f"are not valid: {tabular_checks}."
)
check_classes = map(
lambda check: (
check,
check_enum.get_check_class(check),
),
check_list,
)
# use the pipeline name and the step name to generate a unique suite
# name
try:
# get pipeline name and step name
step_context = get_step_context()
pipeline_name = step_context.pipeline.name
step_name = step_context.step_run.name
suite_name = f"{pipeline_name}_{step_name}"
except RuntimeError:
# if not running inside a pipeline step, use random values
suite_name = f"suite_{random_str(5)}"
if is_tabular:
dataset_class = TabularData
suite_class = TabularSuite
full_suite = full_tabular_suite()
else:
dataset_class = VisionData
suite_class = VisionSuite
full_suite = full_vision_suite()
# if is_multi_model is True, we need to use the ModelComparisonSuite
if is_multi_model:
suite_class = ModelComparisonSuite
train_dataset = dataset_class(reference_dataset, **dataset_kwargs)
test_dataset = None
if comparison_dataset is not None:
test_dataset = dataset_class(comparison_dataset, **dataset_kwargs)
suite = suite_class(name=suite_name)
# Some Deepchecks checks require a minimum configuration such as
# conditions to be configured (see https://docs.deepchecks.com/stable/user-guide/general/customizations/examples/plot_configure_check_conditions.html#sphx-glr-user-guide-general-customizations-examples-plot-configure-check-conditions-py)
# for their execution to have meaning. For checks that don't have
# custom configuration attributes explicitly specified in the
# `check_kwargs` input parameter, we use the default check
# instances extracted from the full suite shipped with Deepchecks.
default_checks = {
check.__class__: check for check in full_suite.checks.values()
}
for check_name, check_class in check_classes:
extra_kwargs = check_kwargs.get(check_name, {})
default_check = default_checks.get(check_class)
check: BaseCheck
if extra_kwargs or not default_check:
check = check_class(**check_kwargs)
else:
check = default_check
# extract the condition kwargs from the check kwargs
for arg_name, condition_kwargs in extra_kwargs.items():
if not arg_name.startswith("condition_") or not isinstance(
condition_kwargs, dict
):
continue
condition_method = getattr(check, f"add_{arg_name}", None)
if not condition_method or not callable(condition_method):
logger.warning(
f"Deepchecks check type {check.__class__} has no "
f"condition named {arg_name}. Ignoring the check "
f"argument."
)
continue
condition_method(**condition_kwargs)
# if the check is supported by the suite, add it
if isinstance(check, suite.supported_checks()):
suite.add(check)
else:
logger.warning(
f"Check {check_name} is not supported by the {suite_class} "
"suite. Ignoring the check."
)
if isinstance(suite, ModelComparisonSuite):
return suite.run(
models=models,
train_datasets=train_dataset,
test_datasets=test_dataset,
)
else:
return suite.run(
train_dataset=train_dataset,
test_dataset=test_dataset,
model=models[0] if models else None,
**run_kwargs,
)
def data_validation(
self,
dataset: Union[pd.DataFrame, DataLoader[Any]],
comparison_dataset: Optional[Any] = None,
check_list: Optional[Sequence[str]] = None,
dataset_kwargs: Dict[str, Any] = {},
check_kwargs: Dict[str, Dict[str, Any]] = {},
run_kwargs: Dict[str, Any] = {},
**kwargs: Any,
) -> SuiteResult:
"""Run one or more Deepchecks data validation checks on a dataset.
Call this method to analyze and identify potential integrity problems
with a single dataset (e.g. missing values, conflicting labels, mixed
data types etc.) and dataset comparison checks (e.g. data drift
checks). Dataset comparison checks require that a second dataset be
supplied via the `comparison_dataset` argument.
The `check_list` argument may be used to specify a custom set of
Deepchecks data integrity checks to perform, identified by
`DeepchecksDataIntegrityCheck` and `DeepchecksDataDriftCheck` enum
values. If omitted:
* if the `comparison_dataset` is omitted, a suite with all available
data integrity checks will be performed on the input data. See
`DeepchecksDataIntegrityCheck` for a list of Deepchecks builtin
checks that are compatible with this method.
* if the `comparison_dataset` is supplied, a suite with all
available data drift checks will be performed on the input
data. See `DeepchecksDataDriftCheck` for a list of Deepchecks
builtin checks that are compatible with this method.
Args:
dataset: Target dataset to be validated.
comparison_dataset: Optional second dataset to be used for data
comparison checks (e.g data drift checks).
check_list: Optional list of ZenML Deepchecks check identifiers
specifying the data validation checks to be performed.
`DeepchecksDataIntegrityCheck` enum values should be used for
single data validation checks and `DeepchecksDataDriftCheck`
enum values for data comparison checks. If not supplied, the
entire set of checks applicable to the input dataset(s)
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
kwargs: Additional keyword arguments (unused).
Returns:
A Deepchecks SuiteResult with the results of the validation.
"""
check_enum: Type[DeepchecksValidationCheck]
if comparison_dataset is None:
check_enum = DeepchecksDataIntegrityCheck
else:
check_enum = DeepchecksDataDriftCheck
return self._create_and_run_check_suite(
check_enum=check_enum,
reference_dataset=dataset,
comparison_dataset=comparison_dataset,
check_list=check_list,
dataset_kwargs=dataset_kwargs,
check_kwargs=check_kwargs,
run_kwargs=run_kwargs,
)
def model_validation(
self,
dataset: Union[pd.DataFrame, DataLoader[Any]],
model: Union[ClassifierMixin, Module],
comparison_dataset: Optional[Any] = None,
check_list: Optional[Sequence[str]] = None,
dataset_kwargs: Dict[str, Any] = {},
check_kwargs: Dict[str, Dict[str, Any]] = {},
run_kwargs: Dict[str, Any] = {},
**kwargs: Any,
) -> Any:
"""Run one or more Deepchecks model validation checks.
Call this method to perform model validation checks (e.g. confusion
matrix validation, performance reports, model error analyses, etc).
A second dataset is required for model performance comparison tests
(i.e. tests that identify changes in a model behavior by comparing how
it performs on two different datasets).
The `check_list` argument may be used to specify a custom set of
Deepchecks model validation checks to perform, identified by
`DeepchecksModelValidationCheck` and `DeepchecksModelDriftCheck` enum
values. If omitted:
* if the `comparison_dataset` is omitted, a suite with all available
model validation checks will be performed on the input data. See
`DeepchecksModelValidationCheck` for a list of Deepchecks builtin
checks that are compatible with this method.
* if the `comparison_dataset` is supplied, a suite with all
available model comparison checks will be performed on the input
data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
builtin checks that are compatible with this method.
Args:
dataset: Target dataset to be validated.
model: Target model to be validated.
comparison_dataset: Optional second dataset to be used for model
comparison checks.
check_list: Optional list of ZenML Deepchecks check identifiers
specifying the model validation checks to be performed.
`DeepchecksModelValidationCheck` enum values should be used for
model validation checks and `DeepchecksModelDriftCheck` enum
values for model comparison checks. If not supplied, the
entire set of checks applicable to the input dataset(s)
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks tabular.Dataset or vision.VisionData constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
kwargs: Additional keyword arguments (unused).
Returns:
A Deepchecks SuiteResult with the results of the validation.
"""
check_enum: Type[DeepchecksValidationCheck]
if comparison_dataset is None:
check_enum = DeepchecksModelValidationCheck
else:
check_enum = DeepchecksModelDriftCheck
return self._create_and_run_check_suite(
check_enum=check_enum,
reference_dataset=dataset,
comparison_dataset=comparison_dataset,
models=[model],
check_list=check_list,
dataset_kwargs=dataset_kwargs,
check_kwargs=check_kwargs,
run_kwargs=run_kwargs,
)
FLAVOR (BaseDataValidatorFlavor)
Flavor of the Deepchecks data validator.
Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
class DeepchecksDataValidatorFlavor(BaseDataValidatorFlavor):
"""Flavor of the Deepchecks data validator."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return DEEPCHECKS_DATA_VALIDATOR_FLAVOR
@property
def docs_url(self) -> Optional[str]:
"""A url to point at docs explaining this flavor.
Returns:
A flavor docs url.
"""
return self.generate_default_docs_url()
@property
def sdk_docs_url(self) -> Optional[str]:
"""A url to point at SDK docs explaining this flavor.
Returns:
A flavor SDK docs url.
"""
return self.generate_default_sdk_docs_url()
@property
def logo_url(self) -> str:
"""A url to represent the flavor in the dashboard.
Returns:
The flavor logo.
"""
return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/data_validator/deepchecks.png"
@property
def implementation_class(self) -> Type["DeepchecksDataValidator"]:
"""Implementation class.
Returns:
The implementation class.
"""
from zenml.integrations.deepchecks.data_validators import (
DeepchecksDataValidator,
)
return DeepchecksDataValidator
docs_url: Optional[str]
property
readonly
A url to point at docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor docs url. |
implementation_class: Type[DeepchecksDataValidator]
property
readonly
Implementation class.
Returns:
Type | Description |
---|---|
Type[DeepchecksDataValidator] |
The implementation class. |
logo_url: str
property
readonly
A url to represent the flavor in the dashboard.
Returns:
Type | Description |
---|---|
str |
The flavor logo. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
sdk_docs_url: Optional[str]
property
readonly
A url to point at SDK docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor SDK docs url. |
data_validation(self, dataset, comparison_dataset=None, check_list=None, dataset_kwargs={}, check_kwargs={}, run_kwargs={}, **kwargs)
Run one or more Deepchecks data validation checks on a dataset.
Call this method to analyze and identify potential integrity problems
with a single dataset (e.g. missing values, conflicting labels, mixed
data types etc.) and dataset comparison checks (e.g. data drift
checks). Dataset comparison checks require that a second dataset be
supplied via the comparison_dataset
argument.
The check_list
argument may be used to specify a custom set of
Deepchecks data integrity checks to perform, identified by
DeepchecksDataIntegrityCheck
and DeepchecksDataDriftCheck
enum
values. If omitted:
-
if the
comparison_dataset
is omitted, a suite with all available data integrity checks will be performed on the input data. SeeDeepchecksDataIntegrityCheck
for a list of Deepchecks builtin checks that are compatible with this method. -
if the
comparison_dataset
is supplied, a suite with all available data drift checks will be performed on the input data. SeeDeepchecksDataDriftCheck
for a list of Deepchecks builtin checks that are compatible with this method.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
Union[pandas.DataFrame, torch.utils.data.dataloader.DataLoader[Any]] |
Target dataset to be validated. |
required |
comparison_dataset |
Optional[Any] |
Optional second dataset to be used for data comparison checks (e.g data drift checks). |
None |
check_list |
Optional[Sequence[str]] |
Optional list of ZenML Deepchecks check identifiers
specifying the data validation checks to be performed.
|
None |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks |
{} |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
{} |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
{} |
kwargs |
Any |
Additional keyword arguments (unused). |
{} |
Returns:
Type | Description |
---|---|
deepchecks.core.suite.SuiteResult |
A Deepchecks SuiteResult with the results of the validation. |
Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
def data_validation(
self,
dataset: Union[pd.DataFrame, DataLoader[Any]],
comparison_dataset: Optional[Any] = None,
check_list: Optional[Sequence[str]] = None,
dataset_kwargs: Dict[str, Any] = {},
check_kwargs: Dict[str, Dict[str, Any]] = {},
run_kwargs: Dict[str, Any] = {},
**kwargs: Any,
) -> SuiteResult:
"""Run one or more Deepchecks data validation checks on a dataset.
Call this method to analyze and identify potential integrity problems
with a single dataset (e.g. missing values, conflicting labels, mixed
data types etc.) and dataset comparison checks (e.g. data drift
checks). Dataset comparison checks require that a second dataset be
supplied via the `comparison_dataset` argument.
The `check_list` argument may be used to specify a custom set of
Deepchecks data integrity checks to perform, identified by
`DeepchecksDataIntegrityCheck` and `DeepchecksDataDriftCheck` enum
values. If omitted:
* if the `comparison_dataset` is omitted, a suite with all available
data integrity checks will be performed on the input data. See
`DeepchecksDataIntegrityCheck` for a list of Deepchecks builtin
checks that are compatible with this method.
* if the `comparison_dataset` is supplied, a suite with all
available data drift checks will be performed on the input
data. See `DeepchecksDataDriftCheck` for a list of Deepchecks
builtin checks that are compatible with this method.
Args:
dataset: Target dataset to be validated.
comparison_dataset: Optional second dataset to be used for data
comparison checks (e.g data drift checks).
check_list: Optional list of ZenML Deepchecks check identifiers
specifying the data validation checks to be performed.
`DeepchecksDataIntegrityCheck` enum values should be used for
single data validation checks and `DeepchecksDataDriftCheck`
enum values for data comparison checks. If not supplied, the
entire set of checks applicable to the input dataset(s)
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks `tabular.Dataset` or `vision.VisionData` constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
kwargs: Additional keyword arguments (unused).
Returns:
A Deepchecks SuiteResult with the results of the validation.
"""
check_enum: Type[DeepchecksValidationCheck]
if comparison_dataset is None:
check_enum = DeepchecksDataIntegrityCheck
else:
check_enum = DeepchecksDataDriftCheck
return self._create_and_run_check_suite(
check_enum=check_enum,
reference_dataset=dataset,
comparison_dataset=comparison_dataset,
check_list=check_list,
dataset_kwargs=dataset_kwargs,
check_kwargs=check_kwargs,
run_kwargs=run_kwargs,
)
model_validation(self, dataset, model, comparison_dataset=None, check_list=None, dataset_kwargs={}, check_kwargs={}, run_kwargs={}, **kwargs)
Run one or more Deepchecks model validation checks.
Call this method to perform model validation checks (e.g. confusion matrix validation, performance reports, model error analyses, etc). A second dataset is required for model performance comparison tests (i.e. tests that identify changes in a model behavior by comparing how it performs on two different datasets).
The check_list
argument may be used to specify a custom set of
Deepchecks model validation checks to perform, identified by
DeepchecksModelValidationCheck
and DeepchecksModelDriftCheck
enum
values. If omitted:
* if the `comparison_dataset` is omitted, a suite with all available
model validation checks will be performed on the input data. See
`DeepchecksModelValidationCheck` for a list of Deepchecks builtin
checks that are compatible with this method.
* if the `comparison_dataset` is supplied, a suite with all
available model comparison checks will be performed on the input
data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
builtin checks that are compatible with this method.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
Union[pandas.DataFrame, torch.utils.data.dataloader.DataLoader[Any]] |
Target dataset to be validated. |
required |
model |
Union[sklearn.base.ClassifierMixin, torch.nn.Module] |
Target model to be validated. |
required |
comparison_dataset |
Optional[Any] |
Optional second dataset to be used for model comparison checks. |
None |
check_list |
Optional[Sequence[str]] |
Optional list of ZenML Deepchecks check identifiers
specifying the model validation checks to be performed.
|
None |
dataset_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the Deepchecks tabular.Dataset or vision.VisionData constructor. |
{} |
check_kwargs |
Dict[str, Dict[str, Any]] |
Additional keyword arguments to be passed to the Deepchecks check object constructors. Arguments are grouped for each check and indexed using the full check class name or check enum value as dictionary keys. |
{} |
run_kwargs |
Dict[str, Any] |
Additional keyword arguments to be passed to the
Deepchecks Suite |
{} |
kwargs |
Any |
Additional keyword arguments (unused). |
{} |
Returns:
Type | Description |
---|---|
Any |
A Deepchecks SuiteResult with the results of the validation. |
Source code in zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py
def model_validation(
self,
dataset: Union[pd.DataFrame, DataLoader[Any]],
model: Union[ClassifierMixin, Module],
comparison_dataset: Optional[Any] = None,
check_list: Optional[Sequence[str]] = None,
dataset_kwargs: Dict[str, Any] = {},
check_kwargs: Dict[str, Dict[str, Any]] = {},
run_kwargs: Dict[str, Any] = {},
**kwargs: Any,
) -> Any:
"""Run one or more Deepchecks model validation checks.
Call this method to perform model validation checks (e.g. confusion
matrix validation, performance reports, model error analyses, etc).
A second dataset is required for model performance comparison tests
(i.e. tests that identify changes in a model behavior by comparing how
it performs on two different datasets).
The `check_list` argument may be used to specify a custom set of
Deepchecks model validation checks to perform, identified by
`DeepchecksModelValidationCheck` and `DeepchecksModelDriftCheck` enum
values. If omitted:
* if the `comparison_dataset` is omitted, a suite with all available
model validation checks will be performed on the input data. See
`DeepchecksModelValidationCheck` for a list of Deepchecks builtin
checks that are compatible with this method.
* if the `comparison_dataset` is supplied, a suite with all
available model comparison checks will be performed on the input
data. See `DeepchecksModelValidationCheck` for a list of Deepchecks
builtin checks that are compatible with this method.
Args:
dataset: Target dataset to be validated.
model: Target model to be validated.
comparison_dataset: Optional second dataset to be used for model
comparison checks.
check_list: Optional list of ZenML Deepchecks check identifiers
specifying the model validation checks to be performed.
`DeepchecksModelValidationCheck` enum values should be used for
model validation checks and `DeepchecksModelDriftCheck` enum
values for model comparison checks. If not supplied, the
entire set of checks applicable to the input dataset(s)
will be performed.
dataset_kwargs: Additional keyword arguments to be passed to the
Deepchecks tabular.Dataset or vision.VisionData constructor.
check_kwargs: Additional keyword arguments to be passed to the
Deepchecks check object constructors. Arguments are grouped for
each check and indexed using the full check class name or
check enum value as dictionary keys.
run_kwargs: Additional keyword arguments to be passed to the
Deepchecks Suite `run` method.
kwargs: Additional keyword arguments (unused).
Returns:
A Deepchecks SuiteResult with the results of the validation.
"""
check_enum: Type[DeepchecksValidationCheck]
if comparison_dataset is None:
check_enum = DeepchecksModelValidationCheck
else:
check_enum = DeepchecksModelDriftCheck
return self._create_and_run_check_suite(
check_enum=check_enum,
reference_dataset=dataset,
comparison_dataset=comparison_dataset,
models=[model],
check_list=check_list,
dataset_kwargs=dataset_kwargs,
check_kwargs=check_kwargs,
run_kwargs=run_kwargs,
)
flavors
special
Deepchecks integration flavors.
deepchecks_data_validator_flavor
Deepchecks data validator flavor.
DeepchecksDataValidatorFlavor (BaseDataValidatorFlavor)
Flavor of the Deepchecks data validator.
Source code in zenml/integrations/deepchecks/flavors/deepchecks_data_validator_flavor.py
class DeepchecksDataValidatorFlavor(BaseDataValidatorFlavor):
"""Flavor of the Deepchecks data validator."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return DEEPCHECKS_DATA_VALIDATOR_FLAVOR
@property
def docs_url(self) -> Optional[str]:
"""A url to point at docs explaining this flavor.
Returns:
A flavor docs url.
"""
return self.generate_default_docs_url()
@property
def sdk_docs_url(self) -> Optional[str]:
"""A url to point at SDK docs explaining this flavor.
Returns:
A flavor SDK docs url.
"""
return self.generate_default_sdk_docs_url()
@property
def logo_url(self) -> str:
"""A url to represent the flavor in the dashboard.
Returns:
The flavor logo.
"""
return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/data_validator/deepchecks.png"
@property
def implementation_class(self) -> Type["DeepchecksDataValidator"]:
"""Implementation class.
Returns:
The implementation class.
"""
from zenml.integrations.deepchecks.data_validators import (
DeepchecksDataValidator,
)
return DeepchecksDataValidator
docs_url: Optional[str]
property
readonly
A url to point at docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor docs url. |
implementation_class: Type[DeepchecksDataValidator]
property
readonly
Implementation class.
Returns:
Type | Description |
---|---|
Type[DeepchecksDataValidator] |
The implementation class. |
logo_url: str
property
readonly
A url to represent the flavor in the dashboard.
Returns:
Type | Description |
---|---|
str |
The flavor logo. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
sdk_docs_url: Optional[str]
property
readonly
A url to point at SDK docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor SDK docs url. |
materializers
special
Deepchecks materializers.
deepchecks_dataset_materializer
Implementation of Deepchecks dataset materializer.
DeepchecksDatasetMaterializer (PandasMaterializer)
Materializer to read data to and from Deepchecks dataset.
Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
class DeepchecksDatasetMaterializer(PandasMaterializer):
"""Materializer to read data to and from Deepchecks dataset."""
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (Dataset,)
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.DATA
def load(self, data_type: Type[Any]) -> Dataset:
"""Reads pandas dataframes and creates `deepchecks.Dataset` from it.
Args:
data_type: The type of the data to read.
Returns:
A Deepchecks Dataset.
"""
df = super().load(data_type)
return Dataset(df)
def save(self, dataset: Dataset) -> None:
"""Serializes pandas dataframe within a `Dataset` object.
Args:
dataset: A deepchecks.Dataset object.
"""
super().save(dataset.data)
def save_visualizations(
self, dataset: Dataset
) -> Dict[str, VisualizationType]:
"""Saves visualizations for the given Deepchecks dataset.
Args:
dataset: The Deepchecks dataset to save visualizations for.
Returns:
A dictionary of visualization URIs and their types.
"""
return super().save_visualizations(dataset.data)
def extract_metadata(self, dataset: Dataset) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `Dataset` object.
Args:
dataset: The `Dataset` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return super().extract_metadata(dataset.data)
extract_metadata(self, dataset)
Extract metadata from the given Dataset
object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
deepchecks.tabular.Dataset |
The |
required |
Returns:
Type | Description |
---|---|
Dict[str, MetadataType] |
The extracted metadata as a dictionary. |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def extract_metadata(self, dataset: Dataset) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `Dataset` object.
Args:
dataset: The `Dataset` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return super().extract_metadata(dataset.data)
load(self, data_type)
Reads pandas dataframes and creates deepchecks.Dataset
from it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
The type of the data to read. |
required |
Returns:
Type | Description |
---|---|
deepchecks.tabular.Dataset |
A Deepchecks Dataset. |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def load(self, data_type: Type[Any]) -> Dataset:
"""Reads pandas dataframes and creates `deepchecks.Dataset` from it.
Args:
data_type: The type of the data to read.
Returns:
A Deepchecks Dataset.
"""
df = super().load(data_type)
return Dataset(df)
save(self, dataset)
Serializes pandas dataframe within a Dataset
object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
deepchecks.tabular.Dataset |
A deepchecks.Dataset object. |
required |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def save(self, dataset: Dataset) -> None:
"""Serializes pandas dataframe within a `Dataset` object.
Args:
dataset: A deepchecks.Dataset object.
"""
super().save(dataset.data)
save_visualizations(self, dataset)
Saves visualizations for the given Deepchecks dataset.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
deepchecks.tabular.Dataset |
The Deepchecks dataset to save visualizations for. |
required |
Returns:
Type | Description |
---|---|
Dict[str, zenml.enums.VisualizationType] |
A dictionary of visualization URIs and their types. |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_dataset_materializer.py
def save_visualizations(
self, dataset: Dataset
) -> Dict[str, VisualizationType]:
"""Saves visualizations for the given Deepchecks dataset.
Args:
dataset: The Deepchecks dataset to save visualizations for.
Returns:
A dictionary of visualization URIs and their types.
"""
return super().save_visualizations(dataset.data)
deepchecks_results_materializer
Implementation of Deepchecks suite results materializer.
DeepchecksResultMaterializer (BaseMaterializer)
Materializer to read data to and from CheckResult and SuiteResult objects.
Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
class DeepchecksResultMaterializer(BaseMaterializer):
"""Materializer to read data to and from CheckResult and SuiteResult objects."""
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (
CheckResult,
SuiteResult,
)
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = (
ArtifactType.DATA_ANALYSIS
)
def load(self, data_type: Type[Any]) -> Union[CheckResult, SuiteResult]:
"""Reads a Deepchecks check or suite result from a serialized JSON file.
Args:
data_type: The type of the data to read.
Returns:
A Deepchecks CheckResult or SuiteResult.
Raises:
RuntimeError: if the input data type is not supported.
"""
filepath = os.path.join(self.uri, RESULTS_FILENAME)
json_res = io_utils.read_file_contents_as_string(filepath)
if data_type == SuiteResult:
res = SuiteResult.from_json(json_res)
elif data_type == CheckResult:
res = CheckResult.from_json(json_res)
else:
raise RuntimeError(f"Unknown data type: {data_type}")
return res
def save(self, result: Union[CheckResult, SuiteResult]) -> None:
"""Creates a JSON serialization for a CheckResult or SuiteResult.
Args:
result: A Deepchecks CheckResult or SuiteResult.
"""
filepath = os.path.join(self.uri, RESULTS_FILENAME)
serialized_json = result.to_json(True)
io_utils.write_file_contents_as_string(filepath, serialized_json)
def save_visualizations(
self, result: Union[CheckResult, SuiteResult]
) -> Dict[str, VisualizationType]:
"""Saves visualizations for the given Deepchecks result.
Args:
result: The Deepchecks result to save visualizations for.
Returns:
A dictionary of visualization URIs and their types.
"""
visualization_path = os.path.join(self.uri, HTML_FILENAME)
visualization_path = visualization_path.replace("\\", "/")
with fileio.open(visualization_path, "w") as f:
result.save_as_html(f)
return {visualization_path: VisualizationType.HTML}
def extract_metadata(
self, result: Union[CheckResult, SuiteResult]
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given Deepchecks result.
Args:
result: The Deepchecks result to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
if isinstance(result, CheckResult):
return {
"deepchecks_check_name": result.get_header(),
"deepchecks_check_passed": result.passed_conditions(),
}
elif isinstance(result, SuiteResult):
return {
"deepchecks_suite_name": result.name,
"deepchecks_suite_passed": result.passed(),
}
return {}
extract_metadata(self, result)
Extract metadata from the given Deepchecks result.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
result |
Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult] |
The Deepchecks result to extract metadata from. |
required |
Returns:
Type | Description |
---|---|
Dict[str, MetadataType] |
The extracted metadata as a dictionary. |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def extract_metadata(
self, result: Union[CheckResult, SuiteResult]
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given Deepchecks result.
Args:
result: The Deepchecks result to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
if isinstance(result, CheckResult):
return {
"deepchecks_check_name": result.get_header(),
"deepchecks_check_passed": result.passed_conditions(),
}
elif isinstance(result, SuiteResult):
return {
"deepchecks_suite_name": result.name,
"deepchecks_suite_passed": result.passed(),
}
return {}
load(self, data_type)
Reads a Deepchecks check or suite result from a serialized JSON file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
The type of the data to read. |
required |
Returns:
Type | Description |
---|---|
Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult] |
A Deepchecks CheckResult or SuiteResult. |
Exceptions:
Type | Description |
---|---|
RuntimeError |
if the input data type is not supported. |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def load(self, data_type: Type[Any]) -> Union[CheckResult, SuiteResult]:
"""Reads a Deepchecks check or suite result from a serialized JSON file.
Args:
data_type: The type of the data to read.
Returns:
A Deepchecks CheckResult or SuiteResult.
Raises:
RuntimeError: if the input data type is not supported.
"""
filepath = os.path.join(self.uri, RESULTS_FILENAME)
json_res = io_utils.read_file_contents_as_string(filepath)
if data_type == SuiteResult:
res = SuiteResult.from_json(json_res)
elif data_type == CheckResult:
res = CheckResult.from_json(json_res)
else:
raise RuntimeError(f"Unknown data type: {data_type}")
return res
save(self, result)
Creates a JSON serialization for a CheckResult or SuiteResult.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
result |
Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult] |
A Deepchecks CheckResult or SuiteResult. |
required |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def save(self, result: Union[CheckResult, SuiteResult]) -> None:
"""Creates a JSON serialization for a CheckResult or SuiteResult.
Args:
result: A Deepchecks CheckResult or SuiteResult.
"""
filepath = os.path.join(self.uri, RESULTS_FILENAME)
serialized_json = result.to_json(True)
io_utils.write_file_contents_as_string(filepath, serialized_json)
save_visualizations(self, result)
Saves visualizations for the given Deepchecks result.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
result |
Union[deepchecks.core.check_result.CheckResult, deepchecks.core.suite.SuiteResult] |
The Deepchecks result to save visualizations for. |
required |
Returns:
Type | Description |
---|---|
Dict[str, zenml.enums.VisualizationType] |
A dictionary of visualization URIs and their types. |
Source code in zenml/integrations/deepchecks/materializers/deepchecks_results_materializer.py
def save_visualizations(
self, result: Union[CheckResult, SuiteResult]
) -> Dict[str, VisualizationType]:
"""Saves visualizations for the given Deepchecks result.
Args:
result: The Deepchecks result to save visualizations for.
Returns:
A dictionary of visualization URIs and their types.
"""
visualization_path = os.path.join(self.uri, HTML_FILENAME)
visualization_path = visualization_path.replace("\\", "/")
with fileio.open(visualization_path, "w") as f:
result.save_as_html(f)
return {visualization_path: VisualizationType.HTML}
steps
special
Initialization of the Deepchecks Standard Steps.
deepchecks_data_drift
Implementation of the Deepchecks data drift validation step.
deepchecks_data_integrity
Implementation of the Deepchecks data integrity validation step.
deepchecks_model_drift
Implementation of the Deepchecks model drift validation step.
deepchecks_model_validation
Implementation of the Deepchecks model validation validation step.
validation_checks
Definition of the Deepchecks validation check types.
DeepchecksDataDriftCheck (DeepchecksValidationCheck)
Categories of Deepchecks data drift checks.
This list reflects the set of train-test validation checks provided by Deepchecks:
All these checks inherit from deepchecks.tabular.TrainTestCheck
or
deepchecks.vision.TrainTestCheck
and require two datasets as input.
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksDataDriftCheck(DeepchecksValidationCheck):
"""Categories of Deepchecks data drift checks.
This list reflects the set of train-test validation checks provided by
Deepchecks:
* [for tabular data](https://docs.deepchecks.com/stable/checks_gallery/tabular.html#train-test-validation)
* [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#train-test-validation)
All these checks inherit from `deepchecks.tabular.TrainTestCheck` or
`deepchecks.vision.TrainTestCheck` and require two datasets as input.
"""
TABULAR_CATEGORY_MISMATCH_TRAIN_TEST = source_utils.resolve(
tabular_checks.CategoryMismatchTrainTest
).import_path
TABULAR_DATASET_SIZE_COMPARISON = source_utils.resolve(
tabular_checks.DatasetsSizeComparison
).import_path
TABULAR_DATE_TRAIN_TEST_LEAKAGE_DUPLICATES = source_utils.resolve(
tabular_checks.DateTrainTestLeakageDuplicates
).import_path
TABULAR_DATE_TRAIN_TEST_LEAKAGE_OVERLAP = source_utils.resolve(
tabular_checks.DateTrainTestLeakageOverlap
).import_path
TABULAR_FEATURE_LABEL_CORRELATION_CHANGE = source_utils.resolve(
tabular_checks.FeatureLabelCorrelationChange
).import_path
TABULAR_INDEX_LEAKAGE = source_utils.resolve(
tabular_checks.IndexTrainTestLeakage
).import_path
TABULAR_NEW_LABEL_TRAIN_TEST = source_utils.resolve(
tabular_checks.NewLabelTrainTest
).import_path
TABULAR_STRING_MISMATCH_COMPARISON = source_utils.resolve(
tabular_checks.StringMismatchComparison
).import_path
TABULAR_TRAIN_TEST_FEATURE_DRIFT = source_utils.resolve(
tabular_checks.TrainTestFeatureDrift
).import_path
TABULAR_FEATURE_DRIFT = source_utils.resolve(
tabular_checks.FeatureDrift
).import_path
TABULAR_TRAIN_TEST_LABEL_DRIFT = source_utils.resolve(
tabular_checks.TrainTestLabelDrift
).import_path
TABULAR_LABEL_DRIFT = source_utils.resolve(
tabular_checks.LabelDrift
).import_path
TABULAR_TRAIN_TEST_SAMPLES_MIX = source_utils.resolve(
tabular_checks.TrainTestSamplesMix
).import_path
TABULAR_WHOLE_DATASET_DRIFT = source_utils.resolve(
tabular_checks.WholeDatasetDrift
).import_path
TABULAR_NEW_CATEGORY_TRAIN_TEST = source_utils.resolve(
tabular_checks.NewCategoryTrainTest
).import_path
TABULAR_MULTIVARIATE_DRIFT = source_utils.resolve(
tabular_checks.MultivariateDrift
).import_path
VISION_PROPERTY_LABEL_CORRELATION_CHANGE = source_utils.resolve(
vision_checks.PropertyLabelCorrelationChange
).import_path
VISION_HEATMAP_COMPARISON = source_utils.resolve(
vision_checks.HeatmapComparison
).import_path
VISION_LABEL_DRIFT = source_utils.resolve(
vision_checks.LabelDrift
).import_path
VISION_IMAGE_DATASET_DRIFT = source_utils.resolve(
vision_checks.ImageDatasetDrift
).import_path
VISION_IMAGE_PROPERTY_DRIFT = source_utils.resolve(
vision_checks.ImagePropertyDrift
).import_path
VISION_NEW_LABELS = source_utils.resolve(
vision_checks.NewLabels
).import_path
DeepchecksDataIntegrityCheck (DeepchecksValidationCheck)
Categories of Deepchecks data integrity checks.
This list reflects the set of data integrity checks provided by Deepchecks:
All these checks inherit from deepchecks.tabular.SingleDatasetCheck
or
deepchecks.vision.SingleDatasetCheck
and require a single dataset as input.
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksDataIntegrityCheck(DeepchecksValidationCheck):
"""Categories of Deepchecks data integrity checks.
This list reflects the set of data integrity checks provided by Deepchecks:
* [for tabular data](https://docs.deepchecks.com/stable/tabular/auto_checks/data_integrity/index.html)
* [for computer vision](https://docs.deepchecks.com/stable/vision/auto_checks/data_integrity/index.html)
All these checks inherit from `deepchecks.tabular.SingleDatasetCheck` or
`deepchecks.vision.SingleDatasetCheck` and require a single dataset as input.
"""
TABULAR_COLUMNS_INFO = source_utils.resolve(
tabular_checks.ColumnsInfo
).import_path
TABULAR_CONFLICTING_LABELS = source_utils.resolve(
tabular_checks.ConflictingLabels
).import_path
TABULAR_DATA_DUPLICATES = source_utils.resolve(
tabular_checks.DataDuplicates
).import_path
TABULAR_FEATURE_FEATURE_CORRELATION = source_utils.resolve(
FeatureFeatureCorrelation
).import_path
TABULAR_FEATURE_LABEL_CORRELATION = source_utils.resolve(
tabular_checks.FeatureLabelCorrelation
).import_path
TABULAR_IDENTIFIER_LABEL_CORRELATION = source_utils.resolve(
tabular_checks.IdentifierLabelCorrelation
).import_path
TABULAR_IS_SINGLE_VALUE = source_utils.resolve(
tabular_checks.IsSingleValue
).import_path
TABULAR_MIXED_DATA_TYPES = source_utils.resolve(
tabular_checks.MixedDataTypes
).import_path
TABULAR_MIXED_NULLS = source_utils.resolve(
tabular_checks.MixedNulls
).import_path
TABULAR_OUTLIER_SAMPLE_DETECTION = source_utils.resolve(
tabular_checks.OutlierSampleDetection
).import_path
TABULAR_SPECIAL_CHARS = source_utils.resolve(
tabular_checks.SpecialCharacters
).import_path
TABULAR_STRING_LENGTH_OUT_OF_BOUNDS = source_utils.resolve(
tabular_checks.StringLengthOutOfBounds
).import_path
TABULAR_STRING_MISMATCH = source_utils.resolve(
tabular_checks.StringMismatch
).import_path
TABULAR_CLASS_IMBALANCE = source_utils.resolve(
tabular_checks.ClassImbalance
).import_path
TABULAR_PERCENT_OF_NULLS = source_utils.resolve(
tabular_checks.PercentOfNulls
).import_path
VISION_IMAGE_PROPERTY_OUTLIERS = source_utils.resolve(
vision_checks.ImagePropertyOutliers
).import_path
VISION_LABEL_PROPERTY_OUTLIERS = source_utils.resolve(
vision_checks.LabelPropertyOutliers
).import_path
VISION_PROPERTY_LABEL_CORRELATION = source_utils.resolve(
vision_checks.PropertyLabelCorrelation
).import_path
DeepchecksModelDriftCheck (DeepchecksValidationCheck)
Categories of Deepchecks model drift checks.
This list includes a subset of the model evaluation checks provided by Deepchecks that require two datasets and a mandatory model as input:
All these checks inherit from deepchecks.tabular.TrainTestCheck
or
deepchecks.vision.TrainTestCheck
and require two datasets and a mandatory
model as input.
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksModelDriftCheck(DeepchecksValidationCheck):
"""Categories of Deepchecks model drift checks.
This list includes a subset of the model evaluation checks provided by
Deepchecks that require two datasets and a mandatory model as input:
* [for tabular data](https://docs.deepchecks.com/en/stable/checks_gallery/tabular.html#model-evaluation)
* [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#model-evaluation)
All these checks inherit from `deepchecks.tabular.TrainTestCheck` or
`deepchecks.vision.TrainTestCheck` and require two datasets and a mandatory
model as input.
"""
TABULAR_BOOSTING_OVERFIT = source_utils.resolve(
tabular_checks.BoostingOverfit
).import_path
TABULAR_SIMPLE_MODEL_COMPARISON = source_utils.resolve(
tabular_checks.SimpleModelComparison
).import_path
TABULAR_TRAIN_TEST_PREDICTION_DRIFT = source_utils.resolve(
tabular_checks.TrainTestPredictionDrift
).import_path
TABULAR_PREDICTION_DRIFT = source_utils.resolve(
tabular_checks.PredictionDrift
).import_path
TABULAR_UNUSED_FEATURES = source_utils.resolve(
tabular_checks.UnusedFeatures
).import_path
VISION_CLASS_PERFORMANCE = source_utils.resolve(
vision_checks.ClassPerformance
).import_path
VISION_SIMPLE_MODEL_COMPARISON = source_utils.resolve(
vision_checks.SimpleModelComparison
).import_path
VISION_TRAIN_TEST_PREDICTION_DRIFT = source_utils.resolve(
vision_checks.TrainTestPredictionDrift
).import_path
VISION_PREDICTION_DRIFT = source_utils.resolve(
vision_checks.PredictionDrift
).import_path
DeepchecksModelValidationCheck (DeepchecksValidationCheck)
Categories of Deepchecks model validation checks.
This list includes a subset of the model evaluation checks provided by Deepchecks that require a single dataset and a mandatory model as input:
All these checks inherit from deepchecks.tabular.SingleDatasetCheck
or
`deepchecks.vision.SingleDatasetCheck and require a dataset and a mandatory
model as input.
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksModelValidationCheck(DeepchecksValidationCheck):
"""Categories of Deepchecks model validation checks.
This list includes a subset of the model evaluation checks provided by
Deepchecks that require a single dataset and a mandatory model as input:
* [for tabular data](https://docs.deepchecks.com/en/stable/checks_gallery/tabular.html#model-evaluation)
* [for computer vision](https://docs.deepchecks.com/stable/checks_gallery/vision.html#model-evaluation)
All these checks inherit from `deepchecks.tabular.SingleDatasetCheck` or
`deepchecks.vision.SingleDatasetCheck and require a dataset and a mandatory
model as input.
"""
TABULAR_CALIBRATION_SCORE = source_utils.resolve(
tabular_checks.CalibrationScore
).import_path
TABULAR_CONFUSION_MATRIX_REPORT = source_utils.resolve(
tabular_checks.ConfusionMatrixReport
).import_path
TABULAR_MODEL_INFERENCE_TIME = source_utils.resolve(
tabular_checks.ModelInferenceTime
).import_path
TABULAR_MODEL_INFO = source_utils.resolve(
tabular_checks.ModelInfo
).import_path
TABULAR_PERFORMANCE_BIAS = source_utils.resolve(
tabular_checks.model_evaluation.PerformanceBias
).import_path
TABULAR_REGRESSION_ERROR_DISTRIBUTION = source_utils.resolve(
tabular_checks.RegressionErrorDistribution
).import_path
TABULAR_REGRESSION_SYSTEMATIC_ERROR = source_utils.resolve(
tabular_checks.RegressionSystematicError
).import_path
TABULAR_ROC_REPORT = source_utils.resolve(
tabular_checks.RocReport
).import_path
TABULAR_SEGMENT_PERFORMANCE = source_utils.resolve(
tabular_checks.SegmentPerformance
).import_path
TABULAR_WEAK_SEGMENT_PERFORMANCE = source_utils.resolve(
tabular_checks.WeakSegmentsPerformance
).import_path
TABULAR_SINGLE_DATASET_PERFORMANCE = source_utils.resolve(
tabular_checks.SingleDatasetPerformance
).import_path
TABULAR_TRAIN_TEST_PERFORMANCE = source_utils.resolve(
tabular_checks.TrainTestPerformance
).import_path
TABULAR_MULTI_MODEL_PERFORMANCE_REPORT = source_utils.resolve(
tabular_checks.MultiModelPerformanceReport
).import_path
VISION_CONFUSION_MATRIX_REPORT = source_utils.resolve(
vision_checks.ConfusionMatrixReport
).import_path
VISION_MEAN_AVERAGE_PRECISION_REPORT = source_utils.resolve(
vision_checks.MeanAveragePrecisionReport
).import_path
VISION_MEAN_AVERAGE_RECALL_REPORT = source_utils.resolve(
vision_checks.MeanAverageRecallReport
).import_path
VISION_SINGLE_DATASET_PERFORMANCE = source_utils.resolve(
vision_checks.SingleDatasetPerformance
).import_path
VISION_WEAK_SEGMENT_PERFORMANCE = source_utils.resolve(
vision_checks.WeakSegmentsPerformance
).import_path
DeepchecksValidationCheck (StrEnum)
Base class for all Deepchecks categories of validation checks.
This base class defines some conventions used for all enum values used to identify the various validation checks that can be performed with Deepchecks:
- enum values represent fully formed class paths pointing to Deepchecks BaseCheck subclasses
- all tabular data checks are located under the
deepchecks.tabular.checks
module sub-tree - all computer vision data checks are located under the
deepchecks.vision.checks
module sub-tree
Source code in zenml/integrations/deepchecks/validation_checks.py
class DeepchecksValidationCheck(StrEnum):
"""Base class for all Deepchecks categories of validation checks.
This base class defines some conventions used for all enum values used to
identify the various validation checks that can be performed with
Deepchecks:
* enum values represent fully formed class paths pointing to Deepchecks
BaseCheck subclasses
* all tabular data checks are located under the
`deepchecks.tabular.checks` module sub-tree
* all computer vision data checks are located under the
`deepchecks.vision.checks` module sub-tree
"""
@classmethod
def validate_check_name(cls, check_name: str) -> None:
"""Validate a Deepchecks check identifier.
Args:
check_name: Identifies a builtin Deepchecks check. The identifier
must be formatted as `deepchecks.{tabular|vision}.checks.<...>.<class-name>`.
Raises:
ValueError: If the check identifier does not follow the convention
used by ZenML to identify Deepchecks builtin checks.
"""
if not re.match(
r"^deepchecks\.(tabular|vision)\.checks\.",
check_name,
):
raise ValueError(
f"The supplied Deepcheck check identifier does not follow the "
f"convention used by ZenML: `{check_name}`. The identifier "
f"must be formatted as `deepchecks.<tabular|vision>.checks...` "
f"and must be resolvable to a valid Deepchecks BaseCheck "
f"subclass."
)
@classmethod
def is_tabular_check(cls, check_name: str) -> bool:
"""Check if a validation check is applicable to tabular data.
Args:
check_name: Identifies a builtin Deepchecks check.
Returns:
True if the check is applicable to tabular data, otherwise False.
"""
cls.validate_check_name(check_name)
return check_name.startswith("deepchecks.tabular.")
@classmethod
def is_vision_check(cls, check_name: str) -> bool:
"""Check if a validation check is applicable to computer vision data.
Args:
check_name: Identifies a builtin Deepchecks check.
Returns:
True if the check is applicable to compute vision data, otherwise
False.
"""
cls.validate_check_name(check_name)
return check_name.startswith("deepchecks.vision.")
@classmethod
def get_check_class(cls, check_name: str) -> Type[BaseCheck]:
"""Get the Deepchecks check class associated with an enum value or a custom check name.
Args:
check_name: Identifies a builtin Deepchecks check. The identifier
must be formatted as `deepchecks.{tabular|vision}.checks.<class-name>`
and must be resolvable to a valid Deepchecks BaseCheck class.
Returns:
The Deepchecks check class associated with this enum value.
Raises:
ValueError: If the check name could not be converted to a valid
Deepchecks check class. This can happen for example if the enum
values fall out of sync with the Deepchecks code base or if a
custom check name is supplied that cannot be resolved to a valid
Deepchecks BaseCheck class.
"""
cls.validate_check_name(check_name)
try:
check_class: Type[BaseCheck] = (
source_utils.load_and_validate_class(
check_name, expected_class=BaseCheck
)
)
except AttributeError:
raise ValueError(
f"Could not map the `{check_name}` check identifier to a valid "
f"Deepchecks check class."
)
if check_name not in cls.values():
logger.warning(
f"You are using a custom Deepchecks check identifier that is "
f"not listed in the `{str(cls)}` enum type. This could lead "
f"to unexpected behavior."
)
return check_class
@property
def check_class(self) -> Type[BaseCheck]:
"""Convert the enum value to a valid Deepchecks check class.
Returns:
The Deepchecks check class associated with the enum value.
"""
return self.get_check_class(self.value)