Evidently
zenml.integrations.evidently
special
Initialization of the Evidently integration.
The Evidently integration provides a way to monitor your models in production. It includes a way to detect data drift and different kinds of model performance issues.
The results of Evidently calculations can either be exported as an interactive dashboard (visualized as an html file or in your Jupyter notebook), or as a JSON file.
EvidentlyIntegration (Integration)
Evidently integration for ZenML.
Source code in zenml/integrations/evidently/__init__.py
class EvidentlyIntegration(Integration):
"""[Evidently](https://github.com/evidentlyai/evidently) integration for ZenML."""
NAME = EVIDENTLY
REQUIREMENTS = ["evidently==0.1.54dev0"]
@staticmethod
def activate() -> None:
"""Activate the Deepchecks integration."""
from zenml.integrations.evidently import materializers # noqa
from zenml.integrations.evidently import visualizers # noqa
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Great Expectations integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.evidently.flavors import (
EvidentlyDataValidatorFlavor,
)
return [EvidentlyDataValidatorFlavor]
activate()
staticmethod
Activate the Deepchecks integration.
Source code in zenml/integrations/evidently/__init__.py
@staticmethod
def activate() -> None:
"""Activate the Deepchecks integration."""
from zenml.integrations.evidently import materializers # noqa
from zenml.integrations.evidently import visualizers # noqa
flavors()
classmethod
Declare the stack component flavors for the Great Expectations integration.
Returns:
Type | Description |
---|---|
List[Type[zenml.stack.flavor.Flavor]] |
List of stack component flavors for this integration. |
Source code in zenml/integrations/evidently/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Great Expectations integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.evidently.flavors import (
EvidentlyDataValidatorFlavor,
)
return [EvidentlyDataValidatorFlavor]
data_validators
special
Initialization of the Evidently data validator for ZenML.
evidently_data_validator
Implementation of the Evidently data validator.
EvidentlyDataValidator (BaseDataValidator)
Evidently data validator stack component.
Source code in zenml/integrations/evidently/data_validators/evidently_data_validator.py
class EvidentlyDataValidator(BaseDataValidator):
"""Evidently data validator stack component."""
NAME: ClassVar[str] = "Evidently"
FLAVOR: ClassVar[
Type[BaseDataValidatorFlavor]
] = EvidentlyDataValidatorFlavor
@classmethod
def _unpack_options(
cls, option_list: Sequence[Tuple[str, Dict[str, Any]]]
) -> Sequence[Any]:
"""Unpack Evidently options.
Implements de-serialization for [Evidently options](https://docs.evidentlyai.com/user-guide/customization)
that can be passed as constructor arguments when creating Profile and
Dashboard objects. The convention used is that each item in the list
consists of two elements:
* a string containing the full class path of a `dataclass` based
class with Evidently options
* a dictionary with kwargs used as parameters for the option instance
Example:
```python
options = [
(
"evidently.options.ColorOptions",{
"primary_color": "#5a86ad",
"fill_color": "#fff4f2",
"zero_line_color": "#016795",
"current_data_color": "#c292a1",
"reference_data_color": "#017b92",
}
),
]
```
This is the same as saying:
```python
from evidently.options import ColorOptions
color_scheme = ColorOptions()
color_scheme.primary_color = "#5a86ad"
color_scheme.fill_color = "#fff4f2"
color_scheme.zero_line_color = "#016795"
color_scheme.current_data_color = "#c292a1"
color_scheme.reference_data_color = "#017b92"
```
Args:
option_list: list of packed Evidently options
Returns:
A list of unpacked Evidently options
Raises:
ValueError: if one of the passed Evidently class paths cannot be
resolved to an actual class.
"""
options = []
for option_clspath, option_args in option_list:
try:
option_cls = load_source_path_class(option_clspath)
except AttributeError:
raise ValueError(
f"Could not map the `{option_clspath}` Evidently option "
f"class path to a valid class."
)
option = option_cls(**option_args)
options.append(option)
return options
def data_profiling(
self,
dataset: pd.DataFrame,
comparison_dataset: Optional[pd.DataFrame] = None,
profile_list: Optional[Sequence[str]] = None,
column_mapping: Optional[ColumnMapping] = None,
verbose_level: int = 1,
profile_options: Sequence[Tuple[str, Dict[str, Any]]] = [],
dashboard_options: Sequence[Tuple[str, Dict[str, Any]]] = [],
**kwargs: Any,
) -> Tuple[Profile, Dashboard]:
"""Analyze a dataset and generate a data profile with Evidently.
The method takes in an optional list of Evidently options to be passed
to the profile constructor (`profile_options`) and the dashboard
constructor (`dashboard_options`). Each element in the list must be
composed of two items: the first is a full class path of an Evidently
option `dataclass`, the second is a dictionary of kwargs with the actual
option parameters, e.g.:
```python
options = [
(
"evidently.options.ColorOptions",{
"primary_color": "#5a86ad",
"fill_color": "#fff4f2",
"zero_line_color": "#016795",
"current_data_color": "#c292a1",
"reference_data_color": "#017b92",
}
),
]
```
Args:
dataset: Target dataset to be profiled.
comparison_dataset: Optional dataset to be used for data profiles
that require a baseline for comparison (e.g data drift profiles).
profile_list: Optional list identifying the categories of Evidently
data profiles to be generated.
column_mapping: Properties of the DataFrame columns used
verbose_level: Level of verbosity for the Evidently dashboards. Use
0 for a brief dashboard, 1 for a detailed dashboard.
profile_options: Optional list of options to pass to the
profile constructor.
dashboard_options: Optional list of options to pass to the
dashboard constructor.
**kwargs: Extra keyword arguments (unused).
Returns:
The Evidently Profile and Dashboard objects corresponding to the set
of generated profiles.
"""
sections, tabs = get_profile_sections_and_tabs(
profile_list, verbose_level
)
unpacked_profile_options = self._unpack_options(profile_options)
unpacked_dashboard_options = self._unpack_options(dashboard_options)
dashboard = Dashboard(tabs=tabs, options=unpacked_dashboard_options)
dashboard.calculate(
reference_data=dataset,
current_data=comparison_dataset,
column_mapping=column_mapping,
)
profile = Profile(sections=sections, options=unpacked_profile_options)
profile.calculate(
reference_data=dataset,
current_data=comparison_dataset,
column_mapping=column_mapping,
)
return profile, dashboard
FLAVOR (BaseDataValidatorFlavor)
Evidently data validator flavor.
Source code in zenml/integrations/evidently/data_validators/evidently_data_validator.py
class EvidentlyDataValidatorFlavor(BaseDataValidatorFlavor):
"""Evidently data validator flavor."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return EVIDENTLY_DATA_VALIDATOR_FLAVOR
@property
def implementation_class(self) -> Type["EvidentlyDataValidator"]:
"""Implementation class.
Returns:
The implementation class.
"""
from zenml.integrations.evidently.data_validators import (
EvidentlyDataValidator,
)
return EvidentlyDataValidator
implementation_class: Type[EvidentlyDataValidator]
property
readonly
Implementation class.
Returns:
Type | Description |
---|---|
Type[EvidentlyDataValidator] |
The implementation class. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
data_profiling(self, dataset, comparison_dataset=None, profile_list=None, column_mapping=None, verbose_level=1, profile_options=[], dashboard_options=[], **kwargs)
Analyze a dataset and generate a data profile with Evidently.
The method takes in an optional list of Evidently options to be passed
to the profile constructor (profile_options
) and the dashboard
constructor (dashboard_options
). Each element in the list must be
composed of two items: the first is a full class path of an Evidently
option dataclass
, the second is a dictionary of kwargs with the actual
option parameters, e.g.:
options = [
(
"evidently.options.ColorOptions",{
"primary_color": "#5a86ad",
"fill_color": "#fff4f2",
"zero_line_color": "#016795",
"current_data_color": "#c292a1",
"reference_data_color": "#017b92",
}
),
]
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
DataFrame |
Target dataset to be profiled. |
required |
comparison_dataset |
Optional[pandas.core.frame.DataFrame] |
Optional dataset to be used for data profiles that require a baseline for comparison (e.g data drift profiles). |
None |
profile_list |
Optional[Sequence[str]] |
Optional list identifying the categories of Evidently data profiles to be generated. |
None |
column_mapping |
Optional[evidently.pipeline.column_mapping.ColumnMapping] |
Properties of the DataFrame columns used |
None |
verbose_level |
int |
Level of verbosity for the Evidently dashboards. Use 0 for a brief dashboard, 1 for a detailed dashboard. |
1 |
profile_options |
Sequence[Tuple[str, Dict[str, Any]]] |
Optional list of options to pass to the profile constructor. |
[] |
dashboard_options |
Sequence[Tuple[str, Dict[str, Any]]] |
Optional list of options to pass to the dashboard constructor. |
[] |
**kwargs |
Any |
Extra keyword arguments (unused). |
{} |
Returns:
Type | Description |
---|---|
Tuple[evidently.model_profile.model_profile.Profile, evidently.dashboard.dashboard.Dashboard] |
The Evidently Profile and Dashboard objects corresponding to the set of generated profiles. |
Source code in zenml/integrations/evidently/data_validators/evidently_data_validator.py
def data_profiling(
self,
dataset: pd.DataFrame,
comparison_dataset: Optional[pd.DataFrame] = None,
profile_list: Optional[Sequence[str]] = None,
column_mapping: Optional[ColumnMapping] = None,
verbose_level: int = 1,
profile_options: Sequence[Tuple[str, Dict[str, Any]]] = [],
dashboard_options: Sequence[Tuple[str, Dict[str, Any]]] = [],
**kwargs: Any,
) -> Tuple[Profile, Dashboard]:
"""Analyze a dataset and generate a data profile with Evidently.
The method takes in an optional list of Evidently options to be passed
to the profile constructor (`profile_options`) and the dashboard
constructor (`dashboard_options`). Each element in the list must be
composed of two items: the first is a full class path of an Evidently
option `dataclass`, the second is a dictionary of kwargs with the actual
option parameters, e.g.:
```python
options = [
(
"evidently.options.ColorOptions",{
"primary_color": "#5a86ad",
"fill_color": "#fff4f2",
"zero_line_color": "#016795",
"current_data_color": "#c292a1",
"reference_data_color": "#017b92",
}
),
]
```
Args:
dataset: Target dataset to be profiled.
comparison_dataset: Optional dataset to be used for data profiles
that require a baseline for comparison (e.g data drift profiles).
profile_list: Optional list identifying the categories of Evidently
data profiles to be generated.
column_mapping: Properties of the DataFrame columns used
verbose_level: Level of verbosity for the Evidently dashboards. Use
0 for a brief dashboard, 1 for a detailed dashboard.
profile_options: Optional list of options to pass to the
profile constructor.
dashboard_options: Optional list of options to pass to the
dashboard constructor.
**kwargs: Extra keyword arguments (unused).
Returns:
The Evidently Profile and Dashboard objects corresponding to the set
of generated profiles.
"""
sections, tabs = get_profile_sections_and_tabs(
profile_list, verbose_level
)
unpacked_profile_options = self._unpack_options(profile_options)
unpacked_dashboard_options = self._unpack_options(dashboard_options)
dashboard = Dashboard(tabs=tabs, options=unpacked_dashboard_options)
dashboard.calculate(
reference_data=dataset,
current_data=comparison_dataset,
column_mapping=column_mapping,
)
profile = Profile(sections=sections, options=unpacked_profile_options)
profile.calculate(
reference_data=dataset,
current_data=comparison_dataset,
column_mapping=column_mapping,
)
return profile, dashboard
get_profile_sections_and_tabs(profile_list, verbose_level=1)
Get the profile sections and dashboard tabs for a profile list.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
profile_list |
Optional[Sequence[str]] |
List of identifiers for Evidently profiles. |
required |
verbose_level |
int |
Verbosity level for the rendered dashboard. Use 0 for a brief dashboard, 1 for a detailed dashboard. |
1 |
Returns:
Type | Description |
---|---|
Tuple[List[evidently.model_profile.sections.base_profile_section.ProfileSection], List[evidently.dashboard.tabs.base_tab.Tab]] |
A tuple of two lists of profile sections and tabs. |
Exceptions:
Type | Description |
---|---|
ValueError |
if the profile_section is not supported. |
Source code in zenml/integrations/evidently/data_validators/evidently_data_validator.py
def get_profile_sections_and_tabs(
profile_list: Optional[Sequence[str]],
verbose_level: int = 1,
) -> Tuple[List[ProfileSection], List[Tab]]:
"""Get the profile sections and dashboard tabs for a profile list.
Args:
profile_list: List of identifiers for Evidently profiles.
verbose_level: Verbosity level for the rendered dashboard. Use
0 for a brief dashboard, 1 for a detailed dashboard.
Returns:
A tuple of two lists of profile sections and tabs.
Raises:
ValueError: if the profile_section is not supported.
"""
profile_list = profile_list or list(profile_mapper.keys())
try:
return (
[profile_mapper[profile]() for profile in profile_list],
[
dashboard_mapper[profile](verbose_level=verbose_level)
for profile in profile_list
],
)
except KeyError as e:
nl = "\n"
raise ValueError(
f"Invalid profile sections: {profile_list} \n\n"
f"Valid and supported options are: {nl}- "
f'{f"{nl}- ".join(list(profile_mapper.keys()))}'
) from e
flavors
special
Evidently integration flavors.
evidently_data_validator_flavor
Evidently data validator flavor.
EvidentlyDataValidatorFlavor (BaseDataValidatorFlavor)
Evidently data validator flavor.
Source code in zenml/integrations/evidently/flavors/evidently_data_validator_flavor.py
class EvidentlyDataValidatorFlavor(BaseDataValidatorFlavor):
"""Evidently data validator flavor."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return EVIDENTLY_DATA_VALIDATOR_FLAVOR
@property
def implementation_class(self) -> Type["EvidentlyDataValidator"]:
"""Implementation class.
Returns:
The implementation class.
"""
from zenml.integrations.evidently.data_validators import (
EvidentlyDataValidator,
)
return EvidentlyDataValidator
implementation_class: Type[EvidentlyDataValidator]
property
readonly
Implementation class.
Returns:
Type | Description |
---|---|
Type[EvidentlyDataValidator] |
The implementation class. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
materializers
special
Evidently materializers.
evidently_profile_materializer
Implementation of Evidently profile materializer.
EvidentlyProfileMaterializer (BaseMaterializer)
Materializer to read data to and from an Evidently Profile.
Source code in zenml/integrations/evidently/materializers/evidently_profile_materializer.py
class EvidentlyProfileMaterializer(BaseMaterializer):
"""Materializer to read data to and from an Evidently Profile."""
ASSOCIATED_TYPES = (Profile,)
ASSOCIATED_ARTIFACT_TYPE = ArtifactType.DATA_ANALYSIS
def load(self, data_type: Type[Any]) -> Profile:
"""Reads an Evidently Profile object from a json file.
Args:
data_type: The type of the data to read.
Returns:
The Evidently Profile
Raises:
TypeError: if the json file contains an invalid data type.
"""
super().load(data_type)
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
contents = yaml_utils.read_json(filepath)
if type(contents) != dict:
raise TypeError(
f"Contents {contents} was type {type(contents)} but expected "
f"dictionary"
)
section_types = contents.pop("section_types", [])
sections = []
for section_type in section_types:
section_cls = import_class_by_path(section_type)
section = section_cls()
section._result = contents[section.part_id()]
sections.append(section)
return Profile(sections=sections)
def save(self, data: Profile) -> None:
"""Serialize an Evidently Profile to a json file.
Args:
data: The Evidently Profile to be serialized.
"""
super().save(data)
contents = data.object()
# include the list of profile sections in the serialized dictionary,
# so we'll be able to re-create them during de-serialization
contents["section_types"] = [
resolve_class(stage.__class__) for stage in data.stages
]
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
yaml_utils.write_json(filepath, contents, encoder=NumpyEncoder)
load(self, data_type)
Reads an Evidently Profile object from a json file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
The type of the data to read. |
required |
Returns:
Type | Description |
---|---|
Profile |
The Evidently Profile |
Exceptions:
Type | Description |
---|---|
TypeError |
if the json file contains an invalid data type. |
Source code in zenml/integrations/evidently/materializers/evidently_profile_materializer.py
def load(self, data_type: Type[Any]) -> Profile:
"""Reads an Evidently Profile object from a json file.
Args:
data_type: The type of the data to read.
Returns:
The Evidently Profile
Raises:
TypeError: if the json file contains an invalid data type.
"""
super().load(data_type)
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
contents = yaml_utils.read_json(filepath)
if type(contents) != dict:
raise TypeError(
f"Contents {contents} was type {type(contents)} but expected "
f"dictionary"
)
section_types = contents.pop("section_types", [])
sections = []
for section_type in section_types:
section_cls = import_class_by_path(section_type)
section = section_cls()
section._result = contents[section.part_id()]
sections.append(section)
return Profile(sections=sections)
save(self, data)
Serialize an Evidently Profile to a json file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Profile |
The Evidently Profile to be serialized. |
required |
Source code in zenml/integrations/evidently/materializers/evidently_profile_materializer.py
def save(self, data: Profile) -> None:
"""Serialize an Evidently Profile to a json file.
Args:
data: The Evidently Profile to be serialized.
"""
super().save(data)
contents = data.object()
# include the list of profile sections in the serialized dictionary,
# so we'll be able to re-create them during de-serialization
contents["section_types"] = [
resolve_class(stage.__class__) for stage in data.stages
]
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
yaml_utils.write_json(filepath, contents, encoder=NumpyEncoder)
steps
special
Initialization of the Evidently Standard Steps.
evidently_profile
Implementation of the Evidently Profile Step.
EvidentlyColumnMapping (BaseModel)
pydantic-model
Column mapping configuration for Evidently.
This class is a 1-to-1 serializable analog of Evidently's ColumnMapping data type that can be used as a step configuration field (see https://docs.evidentlyai.com/features/dashboards/column_mapping).
Attributes:
Name | Type | Description |
---|---|---|
target |
Optional[str] |
target column |
prediction |
Union[str, Sequence[str]] |
target column |
datetime |
Optional[str] |
datetime column |
id |
Optional[str] |
id column |
numerical_features |
Optional[List[str]] |
numerical features |
categorical_features |
Optional[List[str]] |
categorical features |
datetime_features |
Optional[List[str]] |
datetime features |
target_names |
Optional[List[str]] |
target column names |
task |
Optional[Literal['classification', 'regression']] |
model task (regression or classification) |
Source code in zenml/integrations/evidently/steps/evidently_profile.py
class EvidentlyColumnMapping(BaseModel):
"""Column mapping configuration for Evidently.
This class is a 1-to-1 serializable analog of Evidently's
ColumnMapping data type that can be used as a step configuration field
(see https://docs.evidentlyai.com/features/dashboards/column_mapping).
Attributes:
target: target column
prediction: target column
datetime: datetime column
id: id column
numerical_features: numerical features
categorical_features: categorical features
datetime_features: datetime features
target_names: target column names
task: model task (regression or classification)
"""
target: Optional[str] = None
prediction: Optional[Union[str, Sequence[str]]] = None
datetime: Optional[str] = None
id: Optional[str] = None
numerical_features: Optional[List[str]] = None
categorical_features: Optional[List[str]] = None
datetime_features: Optional[List[str]] = None
target_names: Optional[List[str]] = None
task: Optional[Literal["classification", "regression"]] = None
def to_evidently_column_mapping(self) -> ColumnMapping:
"""Convert this Pydantic object to an Evidently ColumnMapping object.
Returns:
An Evidently column mapping converted from this Pydantic object.
"""
column_mapping = ColumnMapping()
# preserve the Evidently defaults where possible
column_mapping.target = self.target or column_mapping.target
column_mapping.prediction = self.prediction or column_mapping.prediction
column_mapping.datetime = self.datetime or column_mapping.datetime
column_mapping.id = self.id or column_mapping.id
column_mapping.numerical_features = (
self.numerical_features or column_mapping.numerical_features
)
column_mapping.datetime_features = (
self.datetime_features or column_mapping.datetime_features
)
column_mapping.target_names = (
self.target_names or column_mapping.target_names
)
column_mapping.task = self.task or column_mapping.task
return column_mapping
to_evidently_column_mapping(self)
Convert this Pydantic object to an Evidently ColumnMapping object.
Returns:
Type | Description |
---|---|
ColumnMapping |
An Evidently column mapping converted from this Pydantic object. |
Source code in zenml/integrations/evidently/steps/evidently_profile.py
def to_evidently_column_mapping(self) -> ColumnMapping:
"""Convert this Pydantic object to an Evidently ColumnMapping object.
Returns:
An Evidently column mapping converted from this Pydantic object.
"""
column_mapping = ColumnMapping()
# preserve the Evidently defaults where possible
column_mapping.target = self.target or column_mapping.target
column_mapping.prediction = self.prediction or column_mapping.prediction
column_mapping.datetime = self.datetime or column_mapping.datetime
column_mapping.id = self.id or column_mapping.id
column_mapping.numerical_features = (
self.numerical_features or column_mapping.numerical_features
)
column_mapping.datetime_features = (
self.datetime_features or column_mapping.datetime_features
)
column_mapping.target_names = (
self.target_names or column_mapping.target_names
)
column_mapping.task = self.task or column_mapping.task
return column_mapping
EvidentlyProfileParameters (BaseParameters)
pydantic-model
Parameters class for Evidently profile steps.
Attributes:
Name | Type | Description |
---|---|---|
column_mapping |
Optional[zenml.integrations.evidently.steps.evidently_profile.EvidentlyColumnMapping] |
properties of the DataFrame columns used |
ignored_cols |
Optional[List[str]] |
columns to ignore during the Evidently profile step |
profile_sections |
Optional[Sequence[str]] |
a list identifying the Evidently profile sections to be used. The following are valid options supported by Evidently: - "datadrift" - "categoricaltargetdrift" - "numericaltargetdrift" - "classificationmodelperformance" - "regressionmodelperformance" - "probabilisticmodelperformance" |
verbose_level |
int |
Verbosity level for the Evidently dashboards. Use 0 for a brief dashboard, 1 for a detailed dashboard. |
profile_options |
Sequence[Tuple[str, Dict[str, Any]]] |
Optional list of options to pass to the
profile constructor. See |
dashboard_options |
Sequence[Tuple[str, Dict[str, Any]]] |
Optional list of options to pass to the
dashboard constructor. See |
Source code in zenml/integrations/evidently/steps/evidently_profile.py
class EvidentlyProfileParameters(BaseParameters):
"""Parameters class for Evidently profile steps.
Attributes:
column_mapping: properties of the DataFrame columns used
ignored_cols: columns to ignore during the Evidently profile step
profile_sections: a list identifying the Evidently profile sections to be
used. The following are valid options supported by Evidently:
- "datadrift"
- "categoricaltargetdrift"
- "numericaltargetdrift"
- "classificationmodelperformance"
- "regressionmodelperformance"
- "probabilisticmodelperformance"
verbose_level: Verbosity level for the Evidently dashboards. Use
0 for a brief dashboard, 1 for a detailed dashboard.
profile_options: Optional list of options to pass to the
profile constructor. See `EvidentlyDataValidator._unpack_options`.
dashboard_options: Optional list of options to pass to the
dashboard constructor. See `EvidentlyDataValidator._unpack_options`.
"""
column_mapping: Optional[EvidentlyColumnMapping] = None
ignored_cols: Optional[List[str]] = None
profile_sections: Optional[Sequence[str]] = None
verbose_level: int = 1
profile_options: Sequence[Tuple[str, Dict[str, Any]]] = Field(
default_factory=list
)
dashboard_options: Sequence[Tuple[str, Dict[str, Any]]] = Field(
default_factory=list
)
EvidentlyProfileStep (BaseStep)
Step implementation implementing an Evidently Profile Step.
Source code in zenml/integrations/evidently/steps/evidently_profile.py
class EvidentlyProfileStep(BaseStep):
"""Step implementation implementing an Evidently Profile Step."""
def entrypoint(
self,
reference_dataset: pd.DataFrame,
comparison_dataset: pd.DataFrame,
params: EvidentlyProfileParameters,
) -> Output( # type:ignore[valid-type]
profile=Profile, dashboard=str
):
"""Main entrypoint for the Evidently categorical target drift detection step.
Args:
reference_dataset: a Pandas DataFrame
comparison_dataset: a Pandas DataFrame of new data you wish to
compare against the reference data
params: the parameters for the step
Raises:
ValueError: If ignored_cols is an empty list
ValueError: If column is not found in reference or comparison
dataset
Returns:
profile: Evidently Profile generated for the data drift
dashboard: HTML report extracted from an Evidently Dashboard
generated for the data drift
"""
data_validator = cast(
EvidentlyDataValidator,
EvidentlyDataValidator.get_active_data_validator(),
)
column_mapping = None
if params.ignored_cols is None:
pass
elif not params.ignored_cols:
raise ValueError(
f"Expects None or list of columns in strings, but got {params.ignored_cols}"
)
elif not (
set(params.ignored_cols).issubset(set(reference_dataset.columns))
) or not (
set(params.ignored_cols).issubset(set(comparison_dataset.columns))
):
raise ValueError(
"Column is not found in reference or comparison datasets"
)
else:
reference_dataset = reference_dataset.drop(
labels=list(params.ignored_cols), axis=1
)
comparison_dataset = comparison_dataset.drop(
labels=list(params.ignored_cols), axis=1
)
if params.column_mapping:
column_mapping = params.column_mapping.to_evidently_column_mapping()
profile, dashboard = data_validator.data_profiling(
dataset=reference_dataset,
comparison_dataset=comparison_dataset,
profile_list=params.profile_sections,
column_mapping=column_mapping,
verbose_level=params.verbose_level,
profile_options=params.profile_options,
dashboard_options=params.dashboard_options,
)
return [profile, dashboard.html()]
PARAMETERS_CLASS (BaseParameters)
pydantic-model
Parameters class for Evidently profile steps.
Attributes:
Name | Type | Description |
---|---|---|
column_mapping |
Optional[zenml.integrations.evidently.steps.evidently_profile.EvidentlyColumnMapping] |
properties of the DataFrame columns used |
ignored_cols |
Optional[List[str]] |
columns to ignore during the Evidently profile step |
profile_sections |
Optional[Sequence[str]] |
a list identifying the Evidently profile sections to be used. The following are valid options supported by Evidently: - "datadrift" - "categoricaltargetdrift" - "numericaltargetdrift" - "classificationmodelperformance" - "regressionmodelperformance" - "probabilisticmodelperformance" |
verbose_level |
int |
Verbosity level for the Evidently dashboards. Use 0 for a brief dashboard, 1 for a detailed dashboard. |
profile_options |
Sequence[Tuple[str, Dict[str, Any]]] |
Optional list of options to pass to the
profile constructor. See |
dashboard_options |
Sequence[Tuple[str, Dict[str, Any]]] |
Optional list of options to pass to the
dashboard constructor. See |
Source code in zenml/integrations/evidently/steps/evidently_profile.py
class EvidentlyProfileParameters(BaseParameters):
"""Parameters class for Evidently profile steps.
Attributes:
column_mapping: properties of the DataFrame columns used
ignored_cols: columns to ignore during the Evidently profile step
profile_sections: a list identifying the Evidently profile sections to be
used. The following are valid options supported by Evidently:
- "datadrift"
- "categoricaltargetdrift"
- "numericaltargetdrift"
- "classificationmodelperformance"
- "regressionmodelperformance"
- "probabilisticmodelperformance"
verbose_level: Verbosity level for the Evidently dashboards. Use
0 for a brief dashboard, 1 for a detailed dashboard.
profile_options: Optional list of options to pass to the
profile constructor. See `EvidentlyDataValidator._unpack_options`.
dashboard_options: Optional list of options to pass to the
dashboard constructor. See `EvidentlyDataValidator._unpack_options`.
"""
column_mapping: Optional[EvidentlyColumnMapping] = None
ignored_cols: Optional[List[str]] = None
profile_sections: Optional[Sequence[str]] = None
verbose_level: int = 1
profile_options: Sequence[Tuple[str, Dict[str, Any]]] = Field(
default_factory=list
)
dashboard_options: Sequence[Tuple[str, Dict[str, Any]]] = Field(
default_factory=list
)
entrypoint(self, reference_dataset, comparison_dataset, params)
Main entrypoint for the Evidently categorical target drift detection step.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reference_dataset |
DataFrame |
a Pandas DataFrame |
required |
comparison_dataset |
DataFrame |
a Pandas DataFrame of new data you wish to compare against the reference data |
required |
params |
EvidentlyProfileParameters |
the parameters for the step |
required |
Exceptions:
Type | Description |
---|---|
ValueError |
If ignored_cols is an empty list |
ValueError |
If column is not found in reference or comparison dataset |
Returns:
Type | Description |
---|---|
profile |
Evidently Profile generated for the data drift dashboard: HTML report extracted from an Evidently Dashboard generated for the data drift |
Source code in zenml/integrations/evidently/steps/evidently_profile.py
def entrypoint(
self,
reference_dataset: pd.DataFrame,
comparison_dataset: pd.DataFrame,
params: EvidentlyProfileParameters,
) -> Output( # type:ignore[valid-type]
profile=Profile, dashboard=str
):
"""Main entrypoint for the Evidently categorical target drift detection step.
Args:
reference_dataset: a Pandas DataFrame
comparison_dataset: a Pandas DataFrame of new data you wish to
compare against the reference data
params: the parameters for the step
Raises:
ValueError: If ignored_cols is an empty list
ValueError: If column is not found in reference or comparison
dataset
Returns:
profile: Evidently Profile generated for the data drift
dashboard: HTML report extracted from an Evidently Dashboard
generated for the data drift
"""
data_validator = cast(
EvidentlyDataValidator,
EvidentlyDataValidator.get_active_data_validator(),
)
column_mapping = None
if params.ignored_cols is None:
pass
elif not params.ignored_cols:
raise ValueError(
f"Expects None or list of columns in strings, but got {params.ignored_cols}"
)
elif not (
set(params.ignored_cols).issubset(set(reference_dataset.columns))
) or not (
set(params.ignored_cols).issubset(set(comparison_dataset.columns))
):
raise ValueError(
"Column is not found in reference or comparison datasets"
)
else:
reference_dataset = reference_dataset.drop(
labels=list(params.ignored_cols), axis=1
)
comparison_dataset = comparison_dataset.drop(
labels=list(params.ignored_cols), axis=1
)
if params.column_mapping:
column_mapping = params.column_mapping.to_evidently_column_mapping()
profile, dashboard = data_validator.data_profiling(
dataset=reference_dataset,
comparison_dataset=comparison_dataset,
profile_list=params.profile_sections,
column_mapping=column_mapping,
verbose_level=params.verbose_level,
profile_options=params.profile_options,
dashboard_options=params.dashboard_options,
)
return [profile, dashboard.html()]
evidently_profile_step(step_name, params)
Shortcut function to create a new instance of the EvidentlyProfileConfig step.
The returned EvidentlyProfileStep can be used in a pipeline to run model drift analyses on two input pd.DataFrame datasets and return the results as an Evidently profile object and a rendered dashboard object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
step_name |
str |
The name of the step |
required |
params |
EvidentlyProfileParameters |
The parameters for the step |
required |
Returns:
Type | Description |
---|---|
BaseStep |
a EvidentlyProfileStep step instance |
Source code in zenml/integrations/evidently/steps/evidently_profile.py
def evidently_profile_step(
step_name: str,
params: EvidentlyProfileParameters,
) -> BaseStep:
"""Shortcut function to create a new instance of the EvidentlyProfileConfig step.
The returned EvidentlyProfileStep can be used in a pipeline to
run model drift analyses on two input pd.DataFrame datasets and return the
results as an Evidently profile object and a rendered dashboard object.
Args:
step_name: The name of the step
params: The parameters for the step
Returns:
a EvidentlyProfileStep step instance
"""
return EvidentlyProfileStep(name=step_name, params=params)
visualizers
special
Initialization for Evidently visualizer.
evidently_visualizer
Implementation of the Evidently visualizer.
EvidentlyVisualizer (BaseVisualizer)
The implementation of an Evidently Visualizer.
Source code in zenml/integrations/evidently/visualizers/evidently_visualizer.py
class EvidentlyVisualizer(BaseVisualizer):
"""The implementation of an Evidently Visualizer."""
@abstractmethod
def visualize(self, object: StepView, *args: Any, **kwargs: Any) -> None:
"""Method to visualize components.
Args:
object: StepView fetched from run.get_step().
*args: Additional arguments.
**kwargs: Additional keyword arguments.
"""
for artifact_view in object.outputs.values():
# filter out anything but data artifacts
if (
artifact_view.type == ArtifactType.DATA
and artifact_view.data_type == "builtins.str"
):
artifact = artifact_view.read()
self.generate_facet(artifact)
def generate_facet(self, html_: str) -> None:
"""Generate a Facet Overview.
Args:
html_: HTML represented as a string.
"""
if Environment.in_notebook() or Environment.in_google_colab():
from IPython.core.display import HTML, display
display(HTML(html_))
else:
logger.warning(
"The magic functions are only usable in a Jupyter notebook."
)
with tempfile.NamedTemporaryFile(
mode="w", delete=False, suffix=".html", encoding="utf-8"
) as f:
f.write(html_)
url = f"file:///{f.name}"
logger.info("Opening %s in a new browser.." % f.name)
webbrowser.open(url, new=2)
generate_facet(self, html_)
Generate a Facet Overview.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
html_ |
str |
HTML represented as a string. |
required |
Source code in zenml/integrations/evidently/visualizers/evidently_visualizer.py
def generate_facet(self, html_: str) -> None:
"""Generate a Facet Overview.
Args:
html_: HTML represented as a string.
"""
if Environment.in_notebook() or Environment.in_google_colab():
from IPython.core.display import HTML, display
display(HTML(html_))
else:
logger.warning(
"The magic functions are only usable in a Jupyter notebook."
)
with tempfile.NamedTemporaryFile(
mode="w", delete=False, suffix=".html", encoding="utf-8"
) as f:
f.write(html_)
url = f"file:///{f.name}"
logger.info("Opening %s in a new browser.." % f.name)
webbrowser.open(url, new=2)
visualize(self, object, *args, **kwargs)
Method to visualize components.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
object |
StepView |
StepView fetched from run.get_step(). |
required |
*args |
Any |
Additional arguments. |
() |
**kwargs |
Any |
Additional keyword arguments. |
{} |
Source code in zenml/integrations/evidently/visualizers/evidently_visualizer.py
@abstractmethod
def visualize(self, object: StepView, *args: Any, **kwargs: Any) -> None:
"""Method to visualize components.
Args:
object: StepView fetched from run.get_step().
*args: Additional arguments.
**kwargs: Additional keyword arguments.
"""
for artifact_view in object.outputs.values():
# filter out anything but data artifacts
if (
artifact_view.type == ArtifactType.DATA
and artifact_view.data_type == "builtins.str"
):
artifact = artifact_view.read()
self.generate_facet(artifact)