Whylogs
zenml.integrations.whylogs
special
Initialization of the whylogs integration.
WhylogsIntegration (Integration)
Definition of whylogs integration for ZenML.
Source code in zenml/integrations/whylogs/__init__.py
class WhylogsIntegration(Integration):
"""Definition of [whylogs](https://github.com/whylabs/whylogs) integration for ZenML."""
NAME = WHYLOGS
REQUIREMENTS = ["whylogs[viz]~=1.0.5", "whylogs[whylabs]~=1.0.5"]
REQUIREMENTS_IGNORED_ON_UNINSTALL = ["pandas"]
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.whylogs import materializers # noqa
from zenml.integrations.whylogs import secret_schemas # noqa
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Great Expectations integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.whylogs.flavors import (
WhylogsDataValidatorFlavor,
)
return [WhylogsDataValidatorFlavor]
@classmethod
def get_requirements(cls, target_os: Optional[str] = None) -> List[str]:
"""Method to get the requirements for the integration.
Args:
target_os: The target operating system to get the requirements for.
Returns:
A list of requirements.
"""
from zenml.integrations.pandas import PandasIntegration
return cls.REQUIREMENTS + \
PandasIntegration.get_requirements(target_os=target_os)
activate()
classmethod
Activates the integration.
Source code in zenml/integrations/whylogs/__init__.py
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.whylogs import materializers # noqa
from zenml.integrations.whylogs import secret_schemas # noqa
flavors()
classmethod
Declare the stack component flavors for the Great Expectations integration.
Returns:
Type | Description |
---|---|
List[Type[zenml.stack.flavor.Flavor]] |
List of stack component flavors for this integration. |
Source code in zenml/integrations/whylogs/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Great Expectations integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.whylogs.flavors import (
WhylogsDataValidatorFlavor,
)
return [WhylogsDataValidatorFlavor]
get_requirements(target_os=None)
classmethod
Method to get the requirements for the integration.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
target_os |
Optional[str] |
The target operating system to get the requirements for. |
None |
Returns:
Type | Description |
---|---|
List[str] |
A list of requirements. |
Source code in zenml/integrations/whylogs/__init__.py
@classmethod
def get_requirements(cls, target_os: Optional[str] = None) -> List[str]:
"""Method to get the requirements for the integration.
Args:
target_os: The target operating system to get the requirements for.
Returns:
A list of requirements.
"""
from zenml.integrations.pandas import PandasIntegration
return cls.REQUIREMENTS + \
PandasIntegration.get_requirements(target_os=target_os)
constants
Whylogs integration constants.
data_validators
special
Initialization of the whylogs data validator for ZenML.
whylogs_data_validator
Implementation of the whylogs data validator.
WhylogsDataValidator (BaseDataValidator, AuthenticationMixin)
Whylogs data validator stack component.
Attributes:
Name | Type | Description |
---|---|---|
authentication_secret |
Optional ZenML secret with Whylabs credentials. If configured, all the data profiles returned by all pipeline steps will automatically be uploaded to Whylabs in addition to being stored in the ZenML Artifact Store. |
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
class WhylogsDataValidator(BaseDataValidator, AuthenticationMixin):
"""Whylogs data validator stack component.
Attributes:
authentication_secret: Optional ZenML secret with Whylabs credentials.
If configured, all the data profiles returned by all pipeline steps
will automatically be uploaded to Whylabs in addition to being
stored in the ZenML Artifact Store.
"""
NAME: ClassVar[str] = "whylogs"
FLAVOR: ClassVar[Type[BaseDataValidatorFlavor]] = (
WhylogsDataValidatorFlavor
)
@property
def config(self) -> WhylogsDataValidatorConfig:
"""Returns the `WhylogsDataValidatorConfig` config.
Returns:
The configuration.
"""
return cast(WhylogsDataValidatorConfig, self._config)
@property
def settings_class(self) -> Optional[Type["BaseSettings"]]:
"""Settings class for the Whylogs data validator.
Returns:
The settings class.
"""
return WhylogsDataValidatorSettings
def data_profiling(
self,
dataset: pd.DataFrame,
comparison_dataset: Optional[pd.DataFrame] = None,
profile_list: Optional[Sequence[str]] = None,
dataset_timestamp: Optional[datetime.datetime] = None,
**kwargs: Any,
) -> DatasetProfileView:
"""Analyze a dataset and generate a data profile with whylogs.
Args:
dataset: Target dataset to be profiled.
comparison_dataset: Optional dataset to be used for data profiles
that require a baseline for comparison (e.g data drift profiles).
profile_list: Optional list identifying the categories of whylogs
data profiles to be generated (unused).
dataset_timestamp: timestamp to associate with the generated
dataset profile (Optional). The current time is used if not
supplied.
**kwargs: Extra keyword arguments (unused).
Returns:
A whylogs profile view object.
"""
results = why.log(pandas=dataset)
profile = results.profile()
dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
return profile.view()
def upload_profile_view(
self,
profile_view: DatasetProfileView,
dataset_id: Optional[str] = None,
) -> None:
"""Upload a whylogs data profile view to Whylabs, if configured to do so.
Args:
profile_view: Whylogs profile view to upload.
dataset_id: Optional dataset identifier to use for the uploaded
data profile. If omitted, a dataset identifier will be retrieved
using other means, in order:
* the default dataset identifier configured in the Data
Validator secret
* a dataset ID will be generated automatically based on the
current pipeline/step information.
Raises:
ValueError: If the dataset ID was not provided and could not be
retrieved or inferred from other sources.
"""
secret = self.get_typed_authentication_secret(
expected_schema_type=WhylabsSecretSchema
)
if not secret:
return
dataset_id = dataset_id or secret.whylabs_default_dataset_id
if not dataset_id:
# use the current pipeline name and the step name to generate a
# unique dataset name
try:
# get pipeline name and step name
step_context = get_step_context()
pipeline_name = step_context.pipeline.name
step_name = step_context.step_run.name
dataset_id = f"{pipeline_name}_{step_name}"
except RuntimeError:
raise ValueError(
"A dataset ID was not specified and could not be "
"generated from the current pipeline and step name."
)
# Instantiate WhyLabs Writer
writer = WhyLabsWriter(
org_id=secret.whylabs_default_org_id,
api_key=secret.whylabs_api_key,
dataset_id=dataset_id,
)
# pass a profile view to the writer's write method
writer.write(profile=profile_view)
logger.info(
f"Uploaded data profile for dataset {dataset_id} to Whylabs."
)
config: WhylogsDataValidatorConfig
property
readonly
Returns the WhylogsDataValidatorConfig
config.
Returns:
Type | Description |
---|---|
WhylogsDataValidatorConfig |
The configuration. |
settings_class: Optional[Type[BaseSettings]]
property
readonly
Settings class for the Whylogs data validator.
Returns:
Type | Description |
---|---|
Optional[Type[BaseSettings]] |
The settings class. |
FLAVOR (BaseDataValidatorFlavor)
Whylogs data validator flavor.
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
"""Whylogs data validator flavor."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return WHYLOGS_DATA_VALIDATOR_FLAVOR
@property
def docs_url(self) -> Optional[str]:
"""A url to point at docs explaining this flavor.
Returns:
A flavor docs url.
"""
return self.generate_default_docs_url()
@property
def sdk_docs_url(self) -> Optional[str]:
"""A url to point at SDK docs explaining this flavor.
Returns:
A flavor SDK docs url.
"""
return self.generate_default_sdk_docs_url()
@property
def logo_url(self) -> str:
"""A url to represent the flavor in the dashboard.
Returns:
The flavor logo.
"""
return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/data_validator/whylogs.png"
@property
def config_class(self) -> Type[WhylogsDataValidatorConfig]:
"""Returns `WhylogsDataValidatorConfig` config class.
Returns:
The config class.
"""
return WhylogsDataValidatorConfig
@property
def implementation_class(self) -> Type["WhylogsDataValidator"]:
"""Implementation class for this flavor.
Returns:
The implementation class.
"""
from zenml.integrations.whylogs.data_validators import (
WhylogsDataValidator,
)
return WhylogsDataValidator
config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]
property
readonly
Returns WhylogsDataValidatorConfig
config class.
Returns:
Type | Description |
---|---|
Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] |
The config class. |
docs_url: Optional[str]
property
readonly
A url to point at docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor docs url. |
implementation_class: Type[WhylogsDataValidator]
property
readonly
Implementation class for this flavor.
Returns:
Type | Description |
---|---|
Type[WhylogsDataValidator] |
The implementation class. |
logo_url: str
property
readonly
A url to represent the flavor in the dashboard.
Returns:
Type | Description |
---|---|
str |
The flavor logo. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
sdk_docs_url: Optional[str]
property
readonly
A url to point at SDK docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor SDK docs url. |
data_profiling(self, dataset, comparison_dataset=None, profile_list=None, dataset_timestamp=None, **kwargs)
Analyze a dataset and generate a data profile with whylogs.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
pandas.DataFrame |
Target dataset to be profiled. |
required |
comparison_dataset |
Optional[pandas.DataFrame] |
Optional dataset to be used for data profiles that require a baseline for comparison (e.g data drift profiles). |
None |
profile_list |
Optional[Sequence[str]] |
Optional list identifying the categories of whylogs data profiles to be generated (unused). |
None |
dataset_timestamp |
Optional[datetime.datetime] |
timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied. |
None |
**kwargs |
Any |
Extra keyword arguments (unused). |
{} |
Returns:
Type | Description |
---|---|
whylogs.core.DatasetProfileView |
A whylogs profile view object. |
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def data_profiling(
self,
dataset: pd.DataFrame,
comparison_dataset: Optional[pd.DataFrame] = None,
profile_list: Optional[Sequence[str]] = None,
dataset_timestamp: Optional[datetime.datetime] = None,
**kwargs: Any,
) -> DatasetProfileView:
"""Analyze a dataset and generate a data profile with whylogs.
Args:
dataset: Target dataset to be profiled.
comparison_dataset: Optional dataset to be used for data profiles
that require a baseline for comparison (e.g data drift profiles).
profile_list: Optional list identifying the categories of whylogs
data profiles to be generated (unused).
dataset_timestamp: timestamp to associate with the generated
dataset profile (Optional). The current time is used if not
supplied.
**kwargs: Extra keyword arguments (unused).
Returns:
A whylogs profile view object.
"""
results = why.log(pandas=dataset)
profile = results.profile()
dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
return profile.view()
upload_profile_view(self, profile_view, dataset_id=None)
Upload a whylogs data profile view to Whylabs, if configured to do so.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
profile_view |
whylogs.core.DatasetProfileView |
Whylogs profile view to upload. |
required |
dataset_id |
Optional[str] |
Optional dataset identifier to use for the uploaded data profile. If omitted, a dataset identifier will be retrieved using other means, in order: * the default dataset identifier configured in the Data Validator secret * a dataset ID will be generated automatically based on the current pipeline/step information. |
None |
Exceptions:
Type | Description |
---|---|
ValueError |
If the dataset ID was not provided and could not be retrieved or inferred from other sources. |
Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py
def upload_profile_view(
self,
profile_view: DatasetProfileView,
dataset_id: Optional[str] = None,
) -> None:
"""Upload a whylogs data profile view to Whylabs, if configured to do so.
Args:
profile_view: Whylogs profile view to upload.
dataset_id: Optional dataset identifier to use for the uploaded
data profile. If omitted, a dataset identifier will be retrieved
using other means, in order:
* the default dataset identifier configured in the Data
Validator secret
* a dataset ID will be generated automatically based on the
current pipeline/step information.
Raises:
ValueError: If the dataset ID was not provided and could not be
retrieved or inferred from other sources.
"""
secret = self.get_typed_authentication_secret(
expected_schema_type=WhylabsSecretSchema
)
if not secret:
return
dataset_id = dataset_id or secret.whylabs_default_dataset_id
if not dataset_id:
# use the current pipeline name and the step name to generate a
# unique dataset name
try:
# get pipeline name and step name
step_context = get_step_context()
pipeline_name = step_context.pipeline.name
step_name = step_context.step_run.name
dataset_id = f"{pipeline_name}_{step_name}"
except RuntimeError:
raise ValueError(
"A dataset ID was not specified and could not be "
"generated from the current pipeline and step name."
)
# Instantiate WhyLabs Writer
writer = WhyLabsWriter(
org_id=secret.whylabs_default_org_id,
api_key=secret.whylabs_api_key,
dataset_id=dataset_id,
)
# pass a profile view to the writer's write method
writer.write(profile=profile_view)
logger.info(
f"Uploaded data profile for dataset {dataset_id} to Whylabs."
)
flavors
special
WhyLabs whylogs integration flavors.
whylogs_data_validator_flavor
WhyLabs whylogs data validator flavor.
WhylogsDataValidatorConfig (BaseDataValidatorConfig, AuthenticationConfigMixin, WhylogsDataValidatorSettings)
Config for the whylogs data validator.
Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorConfig(
BaseDataValidatorConfig,
AuthenticationConfigMixin,
WhylogsDataValidatorSettings,
):
"""Config for the whylogs data validator."""
WhylogsDataValidatorFlavor (BaseDataValidatorFlavor)
Whylogs data validator flavor.
Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
"""Whylogs data validator flavor."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return WHYLOGS_DATA_VALIDATOR_FLAVOR
@property
def docs_url(self) -> Optional[str]:
"""A url to point at docs explaining this flavor.
Returns:
A flavor docs url.
"""
return self.generate_default_docs_url()
@property
def sdk_docs_url(self) -> Optional[str]:
"""A url to point at SDK docs explaining this flavor.
Returns:
A flavor SDK docs url.
"""
return self.generate_default_sdk_docs_url()
@property
def logo_url(self) -> str:
"""A url to represent the flavor in the dashboard.
Returns:
The flavor logo.
"""
return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/data_validator/whylogs.png"
@property
def config_class(self) -> Type[WhylogsDataValidatorConfig]:
"""Returns `WhylogsDataValidatorConfig` config class.
Returns:
The config class.
"""
return WhylogsDataValidatorConfig
@property
def implementation_class(self) -> Type["WhylogsDataValidator"]:
"""Implementation class for this flavor.
Returns:
The implementation class.
"""
from zenml.integrations.whylogs.data_validators import (
WhylogsDataValidator,
)
return WhylogsDataValidator
config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]
property
readonly
Returns WhylogsDataValidatorConfig
config class.
Returns:
Type | Description |
---|---|
Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] |
The config class. |
docs_url: Optional[str]
property
readonly
A url to point at docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor docs url. |
implementation_class: Type[WhylogsDataValidator]
property
readonly
Implementation class for this flavor.
Returns:
Type | Description |
---|---|
Type[WhylogsDataValidator] |
The implementation class. |
logo_url: str
property
readonly
A url to represent the flavor in the dashboard.
Returns:
Type | Description |
---|---|
str |
The flavor logo. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
sdk_docs_url: Optional[str]
property
readonly
A url to point at SDK docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor SDK docs url. |
WhylogsDataValidatorSettings (BaseSettings)
Settings for the Whylogs data validator.
Attributes:
Name | Type | Description |
---|---|---|
enable_whylabs |
bool |
If set to |
dataset_id |
Optional[str] |
Dataset ID to use when uploading profiles to Whylabs. |
Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py
class WhylogsDataValidatorSettings(BaseSettings):
"""Settings for the Whylogs data validator.
Attributes:
enable_whylabs: If set to `True` for a step, all the whylogs data
profile views returned by the step will automatically be uploaded
to the Whylabs platform if Whylabs credentials are configured.
dataset_id: Dataset ID to use when uploading profiles to Whylabs.
"""
enable_whylabs: bool = False
dataset_id: Optional[str] = None
materializers
special
Initialization of the whylogs materializer.
whylogs_materializer
Implementation of the whylogs materializer.
WhylogsMaterializer (BaseMaterializer)
Materializer to read/write whylogs dataset profile views.
Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
class WhylogsMaterializer(BaseMaterializer):
"""Materializer to read/write whylogs dataset profile views."""
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (DatasetProfileView,)
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = (
ArtifactType.DATA_ANALYSIS
)
def load(self, data_type: Type[Any]) -> DatasetProfileView:
"""Reads and returns a whylogs dataset profile view.
Args:
data_type: The type of the data to read.
Returns:
A loaded whylogs dataset profile view object.
"""
filepath = os.path.join(self.uri, PROFILE_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
profile_view = DatasetProfileView.read(temp_file)
# Cleanup and return
fileio.rmtree(temp_dir)
return profile_view
def save(self, profile_view: DatasetProfileView) -> None:
"""Writes a whylogs dataset profile view.
Args:
profile_view: A whylogs dataset profile view object.
"""
filepath = os.path.join(self.uri, PROFILE_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
profile_view.write(temp_file)
# Copy it into artifact store
fileio.copy(temp_file, filepath)
fileio.rmtree(temp_dir)
try:
self._upload_to_whylabs(profile_view)
except Exception as e:
logger.error(
"Failed to upload whylogs profile view to Whylabs: %s", e
)
def save_visualizations(
self,
profile_view: DatasetProfileView,
) -> Dict[str, VisualizationType]:
"""Saves visualizations for the given whylogs dataset profile view.
Args:
profile_view: The whylogs dataset profile view to visualize.
Returns:
A dictionary of visualization URIs and their types.
"""
# currently, whylogs doesn't support visualizing a single profile, so
# we trick it by using the same profile twice, both as reference and
# target, in a drift report
visualization = NotebookProfileVisualizer()
visualization.set_profiles(
target_profile_view=profile_view,
reference_profile_view=profile_view,
)
rendered_html = visualization.summary_drift_report()
filepath = os.path.join(self.uri, HTML_FILENAME)
filepath = filepath.replace("\\", "/")
with fileio.open(filepath, "w") as f:
f.write(rendered_html.data)
return {filepath: VisualizationType.HTML}
def _upload_to_whylabs(self, profile_view: DatasetProfileView) -> None:
"""Uploads a whylogs dataset profile view to Whylabs.
Args:
profile_view: A whylogs dataset profile view object.
"""
from zenml.integrations.whylogs.data_validators import (
WhylogsDataValidator,
)
from zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor import (
WhylogsDataValidatorSettings,
)
try:
data_validator = WhylogsDataValidator.get_active_data_validator()
except TypeError:
# no whylogs data validator is active
return
if not isinstance(data_validator, WhylogsDataValidator):
# the active data validator is not a whylogs data validator
return
try:
step_context = get_step_context()
except RuntimeError:
# we are not running as part of a pipeline
return
settings = cast(
WhylogsDataValidatorSettings,
data_validator.get_settings(step_context.step_run),
)
if not settings.enable_whylabs:
# whylabs is not enabled in the data validator
return
data_validator.upload_profile_view(
profile_view, dataset_id=settings.dataset_id
)
load(self, data_type)
Reads and returns a whylogs dataset profile view.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
The type of the data to read. |
required |
Returns:
Type | Description |
---|---|
whylogs.core.DatasetProfileView |
A loaded whylogs dataset profile view object. |
Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
def load(self, data_type: Type[Any]) -> DatasetProfileView:
"""Reads and returns a whylogs dataset profile view.
Args:
data_type: The type of the data to read.
Returns:
A loaded whylogs dataset profile view object.
"""
filepath = os.path.join(self.uri, PROFILE_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
profile_view = DatasetProfileView.read(temp_file)
# Cleanup and return
fileio.rmtree(temp_dir)
return profile_view
save(self, profile_view)
Writes a whylogs dataset profile view.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
profile_view |
whylogs.core.DatasetProfileView |
A whylogs dataset profile view object. |
required |
Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
def save(self, profile_view: DatasetProfileView) -> None:
"""Writes a whylogs dataset profile view.
Args:
profile_view: A whylogs dataset profile view object.
"""
filepath = os.path.join(self.uri, PROFILE_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)
profile_view.write(temp_file)
# Copy it into artifact store
fileio.copy(temp_file, filepath)
fileio.rmtree(temp_dir)
try:
self._upload_to_whylabs(profile_view)
except Exception as e:
logger.error(
"Failed to upload whylogs profile view to Whylabs: %s", e
)
save_visualizations(self, profile_view)
Saves visualizations for the given whylogs dataset profile view.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
profile_view |
whylogs.core.DatasetProfileView |
The whylogs dataset profile view to visualize. |
required |
Returns:
Type | Description |
---|---|
Dict[str, zenml.enums.VisualizationType] |
A dictionary of visualization URIs and their types. |
Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py
def save_visualizations(
self,
profile_view: DatasetProfileView,
) -> Dict[str, VisualizationType]:
"""Saves visualizations for the given whylogs dataset profile view.
Args:
profile_view: The whylogs dataset profile view to visualize.
Returns:
A dictionary of visualization URIs and their types.
"""
# currently, whylogs doesn't support visualizing a single profile, so
# we trick it by using the same profile twice, both as reference and
# target, in a drift report
visualization = NotebookProfileVisualizer()
visualization.set_profiles(
target_profile_view=profile_view,
reference_profile_view=profile_view,
)
rendered_html = visualization.summary_drift_report()
filepath = os.path.join(self.uri, HTML_FILENAME)
filepath = filepath.replace("\\", "/")
with fileio.open(filepath, "w") as f:
f.write(rendered_html.data)
return {filepath: VisualizationType.HTML}
secret_schemas
special
Initialization for the Whylabs secret schema.
This schema can be used to configure a ZenML secret to authenticate ZenML to use the Whylabs platform to automatically log all whylogs data profiles generated and by pipeline steps.
whylabs_secret_schema
Implementation for Seldon secret schemas.
WhylabsSecretSchema (BaseSecretSchema)
Whylabs credentials.
Attributes:
Name | Type | Description |
---|---|---|
whylabs_default_org_id |
str |
the Whylabs organization ID. |
whylabs_api_key |
str |
Whylabs API key. |
whylabs_default_dataset_id |
Optional[str] |
default Whylabs dataset ID to use when logging data profiles. |
Source code in zenml/integrations/whylogs/secret_schemas/whylabs_secret_schema.py
class WhylabsSecretSchema(BaseSecretSchema):
"""Whylabs credentials.
Attributes:
whylabs_default_org_id: the Whylabs organization ID.
whylabs_api_key: Whylabs API key.
whylabs_default_dataset_id: default Whylabs dataset ID to use when
logging data profiles.
"""
whylabs_default_org_id: str
whylabs_api_key: str
whylabs_default_dataset_id: Optional[str] = None
steps
special
Initialization of the whylogs steps.
whylogs_profiler
Implementation of the whylogs profiler step.
get_whylogs_profiler_step(dataset_timestamp=None, dataset_id=None, enable_whylabs=True)
Shortcut function to create a new instance of the WhylogsProfilerStep step.
The returned WhylogsProfilerStep can be used in a pipeline to generate a whylogs DatasetProfileView from a given pd.DataFrame and save it as an artifact.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset_timestamp |
Optional[datetime.datetime] |
The timestamp of the dataset. |
None |
dataset_id |
Optional[str] |
Optional dataset ID to use to upload the profile to Whylabs. |
None |
enable_whylabs |
bool |
Whether to upload the generated profile to Whylabs. |
True |
Returns:
Type | Description |
---|---|
BaseStep |
a WhylogsProfilerStep step instance |
Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py
def get_whylogs_profiler_step(
dataset_timestamp: Optional[datetime.datetime] = None,
dataset_id: Optional[str] = None,
enable_whylabs: bool = True,
) -> BaseStep:
"""Shortcut function to create a new instance of the WhylogsProfilerStep step.
The returned WhylogsProfilerStep can be used in a pipeline to generate a
whylogs DatasetProfileView from a given pd.DataFrame and save it as an
artifact.
Args:
dataset_timestamp: The timestamp of the dataset.
dataset_id: Optional dataset ID to use to upload the profile to Whylabs.
enable_whylabs: Whether to upload the generated profile to Whylabs.
Returns:
a WhylogsProfilerStep step instance
"""
key = settings_utils.get_flavor_setting_key(WhylogsDataValidatorFlavor())
settings = WhylogsDataValidatorSettings(
enable_whylabs=enable_whylabs, dataset_id=dataset_id
)
step_instance = whylogs_profiler_step.with_options(
parameters={"dataset_timestamp": dataset_timestamp},
settings={key: settings},
)
return step_instance