Whylogs

`zenml.integrations.whylogs` `special`

Initialization of the whylogs integration.

`WhylogsIntegration (Integration)`

Definition of whylogs integration for ZenML.

Source code in zenml/integrations/whylogs/__init__.py

class WhylogsIntegration(Integration):
    """Definition of [whylogs](https://github.com/whylabs/whylogs) integration for ZenML."""

    NAME = WHYLOGS
    REQUIREMENTS = ["whylogs[viz]~=1.0.5", "whylogs[whylabs]~=1.0.5"]

    REQUIREMENTS_IGNORED_ON_UNINSTALL = ["pandas"]

    @classmethod
    def activate(cls) -> None:
        """Activates the integration."""
        from zenml.integrations.whylogs import materializers  # noqa
        from zenml.integrations.whylogs import secret_schemas  # noqa

    @classmethod
    def flavors(cls) -> List[Type[Flavor]]:
        """Declare the stack component flavors for the Great Expectations integration.

        Returns:
            List of stack component flavors for this integration.
        """
        from zenml.integrations.whylogs.flavors import (
            WhylogsDataValidatorFlavor,
        )

        return [WhylogsDataValidatorFlavor]

    @classmethod
    def get_requirements(cls, target_os: Optional[str] = None) -> List[str]:
        """Method to get the requirements for the integration.

        Args:
            target_os: The target operating system to get the requirements for.

        Returns:
            A list of requirements.
        """
        from zenml.integrations.pandas import PandasIntegration

        return cls.REQUIREMENTS + \
            PandasIntegration.get_requirements(target_os=target_os)

`activate()` `classmethod`

Activates the integration.

Source code in zenml/integrations/whylogs/__init__.py

@classmethod
def activate(cls) -> None:
    """Activates the integration."""
    from zenml.integrations.whylogs import materializers  # noqa
    from zenml.integrations.whylogs import secret_schemas  # noqa

`flavors()` `classmethod`

Declare the stack component flavors for the Great Expectations integration.

Returns:

Type	Description
`List[Type[zenml.stack.flavor.Flavor]]`	List of stack component flavors for this integration.

Source code in zenml/integrations/whylogs/__init__.py

@classmethod
def flavors(cls) -> List[Type[Flavor]]:
    """Declare the stack component flavors for the Great Expectations integration.

    Returns:
        List of stack component flavors for this integration.
    """
    from zenml.integrations.whylogs.flavors import (
        WhylogsDataValidatorFlavor,
    )

    return [WhylogsDataValidatorFlavor]

`get_requirements(target_os=None)` `classmethod`

Method to get the requirements for the integration.

Parameters:

Name	Type	Description	Default
`target_os`	`Optional[str]`	The target operating system to get the requirements for.	`None`

Returns:

Type	Description
`List[str]`	A list of requirements.

Source code in zenml/integrations/whylogs/__init__.py

@classmethod
def get_requirements(cls, target_os: Optional[str] = None) -> List[str]:
    """Method to get the requirements for the integration.

    Args:
        target_os: The target operating system to get the requirements for.

    Returns:
        A list of requirements.
    """
    from zenml.integrations.pandas import PandasIntegration

    return cls.REQUIREMENTS + \
        PandasIntegration.get_requirements(target_os=target_os)

`constants`

Whylogs integration constants.

`data_validators` `special`

Initialization of the whylogs data validator for ZenML.

`whylogs_data_validator`

Implementation of the whylogs data validator.

`WhylogsDataValidator (BaseDataValidator, AuthenticationMixin)`

Whylogs data validator stack component.

Attributes:

Name	Type	Description
`authentication_secret`		Optional ZenML secret with Whylabs credentials. If configured, all the data profiles returned by all pipeline steps will automatically be uploaded to Whylabs in addition to being stored in the ZenML Artifact Store.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py

class WhylogsDataValidator(BaseDataValidator, AuthenticationMixin):
    """Whylogs data validator stack component.

    Attributes:
        authentication_secret: Optional ZenML secret with Whylabs credentials.
            If configured, all the data profiles returned by all pipeline steps
            will automatically be uploaded to Whylabs in addition to being
            stored in the ZenML Artifact Store.
    """

    NAME: ClassVar[str] = "whylogs"
    FLAVOR: ClassVar[Type[BaseDataValidatorFlavor]] = (
        WhylogsDataValidatorFlavor
    )

    @property
    def config(self) -> WhylogsDataValidatorConfig:
        """Returns the `WhylogsDataValidatorConfig` config.

        Returns:
            The configuration.
        """
        return cast(WhylogsDataValidatorConfig, self._config)

    @property
    def settings_class(self) -> Optional[Type["BaseSettings"]]:
        """Settings class for the Whylogs data validator.

        Returns:
            The settings class.
        """
        return WhylogsDataValidatorSettings

    def data_profiling(
        self,
        dataset: pd.DataFrame,
        comparison_dataset: Optional[pd.DataFrame] = None,
        profile_list: Optional[Sequence[str]] = None,
        dataset_timestamp: Optional[datetime.datetime] = None,
        **kwargs: Any,
    ) -> DatasetProfileView:
        """Analyze a dataset and generate a data profile with whylogs.

        Args:
            dataset: Target dataset to be profiled.
            comparison_dataset: Optional dataset to be used for data profiles
                that require a baseline for comparison (e.g data drift profiles).
            profile_list: Optional list identifying the categories of whylogs
                data profiles to be generated (unused).
            dataset_timestamp: timestamp to associate with the generated
                dataset profile (Optional). The current time is used if not
                supplied.
            **kwargs: Extra keyword arguments (unused).

        Returns:
            A whylogs profile view object.
        """
        results = why.log(pandas=dataset)
        profile = results.profile()
        dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
        profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
        return profile.view()

    def upload_profile_view(
        self,
        profile_view: DatasetProfileView,
        dataset_id: Optional[str] = None,
    ) -> None:
        """Upload a whylogs data profile view to Whylabs, if configured to do so.

        Args:
            profile_view: Whylogs profile view to upload.
            dataset_id: Optional dataset identifier to use for the uploaded
                data profile. If omitted, a dataset identifier will be retrieved
                using other means, in order:
                    * the default dataset identifier configured in the Data
                    Validator secret
                    * a dataset ID will be generated automatically based on the
                    current pipeline/step information.

        Raises:
            ValueError: If the dataset ID was not provided and could not be
                retrieved or inferred from other sources.
        """
        secret = self.get_typed_authentication_secret(
            expected_schema_type=WhylabsSecretSchema
        )
        if not secret:
            return

        dataset_id = dataset_id or secret.whylabs_default_dataset_id

        if not dataset_id:
            # use the current pipeline name and the step name to generate a
            # unique dataset name
            try:
                # get pipeline name and step name
                step_context = get_step_context()
                pipeline_name = step_context.pipeline.name
                step_name = step_context.step_run.name
                dataset_id = f"{pipeline_name}_{step_name}"
            except RuntimeError:
                raise ValueError(
                    "A dataset ID was not specified and could not be "
                    "generated from the current pipeline and step name."
                )

        # Instantiate WhyLabs Writer
        writer = WhyLabsWriter(
            org_id=secret.whylabs_default_org_id,
            api_key=secret.whylabs_api_key,
            dataset_id=dataset_id,
        )

        # pass a profile view to the writer's write method
        writer.write(profile=profile_view)

        logger.info(
            f"Uploaded data profile for dataset {dataset_id} to Whylabs."
        )

`config: WhylogsDataValidatorConfig` `property` `readonly`

Returns the WhylogsDataValidatorConfig config.

Returns:

Type	Description
`WhylogsDataValidatorConfig`	The configuration.

`settings_class: Optional[Type[BaseSettings]]` `property` `readonly`

Settings class for the Whylogs data validator.

Returns:

Type	Description
`Optional[Type[BaseSettings]]`	The settings class.

`FLAVOR (BaseDataValidatorFlavor)`

Whylogs data validator flavor.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py

class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
    """Whylogs data validator flavor."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return WHYLOGS_DATA_VALIDATOR_FLAVOR

    @property
    def docs_url(self) -> Optional[str]:
        """A url to point at docs explaining this flavor.

        Returns:
            A flavor docs url.
        """
        return self.generate_default_docs_url()

    @property
    def sdk_docs_url(self) -> Optional[str]:
        """A url to point at SDK docs explaining this flavor.

        Returns:
            A flavor SDK docs url.
        """
        return self.generate_default_sdk_docs_url()

    @property
    def logo_url(self) -> str:
        """A url to represent the flavor in the dashboard.

        Returns:
            The flavor logo.
        """
        return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/data_validator/whylogs.png"

    @property
    def config_class(self) -> Type[WhylogsDataValidatorConfig]:
        """Returns `WhylogsDataValidatorConfig` config class.

        Returns:
                The config class.
        """
        return WhylogsDataValidatorConfig

    @property
    def implementation_class(self) -> Type["WhylogsDataValidator"]:
        """Implementation class for this flavor.

        Returns:
            The implementation class.
        """
        from zenml.integrations.whylogs.data_validators import (
            WhylogsDataValidator,
        )

        return WhylogsDataValidator

config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] property readonly

Returns WhylogsDataValidatorConfig config class.

Returns:

Type	Description
`Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]`	The config class.

docs_url: Optional[str] property readonly

A url to point at docs explaining this flavor.

Returns:

Type	Description
`Optional[str]`	A flavor docs url.

implementation_class: Type[WhylogsDataValidator] property readonly

Implementation class for this flavor.

Returns:

Type	Description
`Type[WhylogsDataValidator]`	The implementation class.

logo_url: str property readonly

A url to represent the flavor in the dashboard.

Returns:

Type	Description
`str`	The flavor logo.

name: str property readonly

Name of the flavor.

Returns:

Type	Description
`str`	The name of the flavor.

sdk_docs_url: Optional[str] property readonly

A url to point at SDK docs explaining this flavor.

Returns:

Type	Description
`Optional[str]`	A flavor SDK docs url.

`data_profiling(self, dataset, comparison_dataset=None, profile_list=None, dataset_timestamp=None, **kwargs)`

Analyze a dataset and generate a data profile with whylogs.

Parameters:

Name	Type	Description	Default
`dataset`	`pandas.DataFrame`	Target dataset to be profiled.	required
`comparison_dataset`	`Optional[pandas.DataFrame]`	Optional dataset to be used for data profiles that require a baseline for comparison (e.g data drift profiles).	`None`
`profile_list`	`Optional[Sequence[str]]`	Optional list identifying the categories of whylogs data profiles to be generated (unused).	`None`
`dataset_timestamp`	`Optional[datetime.datetime]`	timestamp to associate with the generated dataset profile (Optional). The current time is used if not supplied.	`None`
`**kwargs`	`Any`	Extra keyword arguments (unused).	`{}`

Returns:

Type	Description
`whylogs.core.DatasetProfileView`	A whylogs profile view object.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py

def data_profiling(
    self,
    dataset: pd.DataFrame,
    comparison_dataset: Optional[pd.DataFrame] = None,
    profile_list: Optional[Sequence[str]] = None,
    dataset_timestamp: Optional[datetime.datetime] = None,
    **kwargs: Any,
) -> DatasetProfileView:
    """Analyze a dataset and generate a data profile with whylogs.

    Args:
        dataset: Target dataset to be profiled.
        comparison_dataset: Optional dataset to be used for data profiles
            that require a baseline for comparison (e.g data drift profiles).
        profile_list: Optional list identifying the categories of whylogs
            data profiles to be generated (unused).
        dataset_timestamp: timestamp to associate with the generated
            dataset profile (Optional). The current time is used if not
            supplied.
        **kwargs: Extra keyword arguments (unused).

    Returns:
        A whylogs profile view object.
    """
    results = why.log(pandas=dataset)
    profile = results.profile()
    dataset_timestamp = dataset_timestamp or datetime.datetime.utcnow()
    profile.set_dataset_timestamp(dataset_timestamp=dataset_timestamp)
    return profile.view()

`upload_profile_view(self, profile_view, dataset_id=None)`

Upload a whylogs data profile view to Whylabs, if configured to do so.

Parameters:

Name	Type	Description	Default
`profile_view`	`whylogs.core.DatasetProfileView`	Whylogs profile view to upload.	required
`dataset_id`	`Optional[str]`	Optional dataset identifier to use for the uploaded data profile. If omitted, a dataset identifier will be retrieved using other means, in order: * the default dataset identifier configured in the Data Validator secret * a dataset ID will be generated automatically based on the current pipeline/step information.	`None`

Exceptions:

Type	Description
`ValueError`	If the dataset ID was not provided and could not be retrieved or inferred from other sources.

Source code in zenml/integrations/whylogs/data_validators/whylogs_data_validator.py

def upload_profile_view(
    self,
    profile_view: DatasetProfileView,
    dataset_id: Optional[str] = None,
) -> None:
    """Upload a whylogs data profile view to Whylabs, if configured to do so.

    Args:
        profile_view: Whylogs profile view to upload.
        dataset_id: Optional dataset identifier to use for the uploaded
            data profile. If omitted, a dataset identifier will be retrieved
            using other means, in order:
                * the default dataset identifier configured in the Data
                Validator secret
                * a dataset ID will be generated automatically based on the
                current pipeline/step information.

    Raises:
        ValueError: If the dataset ID was not provided and could not be
            retrieved or inferred from other sources.
    """
    secret = self.get_typed_authentication_secret(
        expected_schema_type=WhylabsSecretSchema
    )
    if not secret:
        return

    dataset_id = dataset_id or secret.whylabs_default_dataset_id

    if not dataset_id:
        # use the current pipeline name and the step name to generate a
        # unique dataset name
        try:
            # get pipeline name and step name
            step_context = get_step_context()
            pipeline_name = step_context.pipeline.name
            step_name = step_context.step_run.name
            dataset_id = f"{pipeline_name}_{step_name}"
        except RuntimeError:
            raise ValueError(
                "A dataset ID was not specified and could not be "
                "generated from the current pipeline and step name."
            )

    # Instantiate WhyLabs Writer
    writer = WhyLabsWriter(
        org_id=secret.whylabs_default_org_id,
        api_key=secret.whylabs_api_key,
        dataset_id=dataset_id,
    )

    # pass a profile view to the writer's write method
    writer.write(profile=profile_view)

    logger.info(
        f"Uploaded data profile for dataset {dataset_id} to Whylabs."
    )

`flavors` `special`

WhyLabs whylogs integration flavors.

`whylogs_data_validator_flavor`

WhyLabs whylogs data validator flavor.

`WhylogsDataValidatorConfig (BaseDataValidatorConfig, AuthenticationConfigMixin, WhylogsDataValidatorSettings)`

Config for the whylogs data validator.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py

class WhylogsDataValidatorConfig(
    BaseDataValidatorConfig,
    AuthenticationConfigMixin,
    WhylogsDataValidatorSettings,
):
    """Config for the whylogs data validator."""

`WhylogsDataValidatorFlavor (BaseDataValidatorFlavor)`

Whylogs data validator flavor.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py

class WhylogsDataValidatorFlavor(BaseDataValidatorFlavor):
    """Whylogs data validator flavor."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return WHYLOGS_DATA_VALIDATOR_FLAVOR

    @property
    def docs_url(self) -> Optional[str]:
        """A url to point at docs explaining this flavor.

        Returns:
            A flavor docs url.
        """
        return self.generate_default_docs_url()

    @property
    def sdk_docs_url(self) -> Optional[str]:
        """A url to point at SDK docs explaining this flavor.

        Returns:
            A flavor SDK docs url.
        """
        return self.generate_default_sdk_docs_url()

    @property
    def logo_url(self) -> str:
        """A url to represent the flavor in the dashboard.

        Returns:
            The flavor logo.
        """
        return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/data_validator/whylogs.png"

    @property
    def config_class(self) -> Type[WhylogsDataValidatorConfig]:
        """Returns `WhylogsDataValidatorConfig` config class.

        Returns:
                The config class.
        """
        return WhylogsDataValidatorConfig

    @property
    def implementation_class(self) -> Type["WhylogsDataValidator"]:
        """Implementation class for this flavor.

        Returns:
            The implementation class.
        """
        from zenml.integrations.whylogs.data_validators import (
            WhylogsDataValidator,
        )

        return WhylogsDataValidator

`config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]` `property` `readonly`

Returns WhylogsDataValidatorConfig config class.

Returns:

Type	Description
`Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]`	The config class.

`docs_url: Optional[str]` `property` `readonly`

A url to point at docs explaining this flavor.

Returns:

Type	Description
`Optional[str]`	A flavor docs url.

`implementation_class: Type[WhylogsDataValidator]` `property` `readonly`

Implementation class for this flavor.

Returns:

Type	Description
`Type[WhylogsDataValidator]`	The implementation class.

`logo_url: str` `property` `readonly`

A url to represent the flavor in the dashboard.

Returns:

Type	Description
`str`	The flavor logo.

`name: str` `property` `readonly`

Name of the flavor.

Returns:

Type	Description
`str`	The name of the flavor.

`sdk_docs_url: Optional[str]` `property` `readonly`

A url to point at SDK docs explaining this flavor.

Returns:

Type	Description
`Optional[str]`	A flavor SDK docs url.

`WhylogsDataValidatorSettings (BaseSettings)`

Settings for the Whylogs data validator.

Attributes:

Name	Type	Description
`enable_whylabs`	`bool`	If set to `True` for a step, all the whylogs data profile views returned by the step will automatically be uploaded to the Whylabs platform if Whylabs credentials are configured.
`dataset_id`	`Optional[str]`	Dataset ID to use when uploading profiles to Whylabs.

Source code in zenml/integrations/whylogs/flavors/whylogs_data_validator_flavor.py

class WhylogsDataValidatorSettings(BaseSettings):
    """Settings for the Whylogs data validator.

    Attributes:
        enable_whylabs: If set to `True` for a step, all the whylogs data
            profile views returned by the step will automatically be uploaded
            to the Whylabs platform if Whylabs credentials are configured.
        dataset_id: Dataset ID to use when uploading profiles to Whylabs.
    """

    enable_whylabs: bool = False
    dataset_id: Optional[str] = None

`materializers` `special`

Initialization of the whylogs materializer.

`whylogs_materializer`

Implementation of the whylogs materializer.

`WhylogsMaterializer (BaseMaterializer)`

Materializer to read/write whylogs dataset profile views.

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py

class WhylogsMaterializer(BaseMaterializer):
    """Materializer to read/write whylogs dataset profile views."""

    ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (DatasetProfileView,)
    ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = (
        ArtifactType.DATA_ANALYSIS
    )

    def load(self, data_type: Type[Any]) -> DatasetProfileView:
        """Reads and returns a whylogs dataset profile view.

        Args:
            data_type: The type of the data to read.

        Returns:
            A loaded whylogs dataset profile view object.
        """
        filepath = os.path.join(self.uri, PROFILE_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)
        profile_view = DatasetProfileView.read(temp_file)

        # Cleanup and return
        fileio.rmtree(temp_dir)

        return profile_view

    def save(self, profile_view: DatasetProfileView) -> None:
        """Writes a whylogs dataset profile view.

        Args:
            profile_view: A whylogs dataset profile view object.
        """
        filepath = os.path.join(self.uri, PROFILE_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

        profile_view.write(temp_file)

        # Copy it into artifact store
        fileio.copy(temp_file, filepath)
        fileio.rmtree(temp_dir)

        try:
            self._upload_to_whylabs(profile_view)
        except Exception as e:
            logger.error(
                "Failed to upload whylogs profile view to Whylabs: %s", e
            )

    def save_visualizations(
        self,
        profile_view: DatasetProfileView,
    ) -> Dict[str, VisualizationType]:
        """Saves visualizations for the given whylogs dataset profile view.

        Args:
            profile_view: The whylogs dataset profile view to visualize.

        Returns:
            A dictionary of visualization URIs and their types.
        """
        # currently, whylogs doesn't support visualizing a single profile, so
        # we trick it by using the same profile twice, both as reference and
        # target, in a drift report
        visualization = NotebookProfileVisualizer()
        visualization.set_profiles(
            target_profile_view=profile_view,
            reference_profile_view=profile_view,
        )
        rendered_html = visualization.summary_drift_report()
        filepath = os.path.join(self.uri, HTML_FILENAME)
        filepath = filepath.replace("\\", "/")
        with fileio.open(filepath, "w") as f:
            f.write(rendered_html.data)
        return {filepath: VisualizationType.HTML}

    def _upload_to_whylabs(self, profile_view: DatasetProfileView) -> None:
        """Uploads a whylogs dataset profile view to Whylabs.

        Args:
            profile_view: A whylogs dataset profile view object.
        """
        from zenml.integrations.whylogs.data_validators import (
            WhylogsDataValidator,
        )
        from zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor import (
            WhylogsDataValidatorSettings,
        )

        try:
            data_validator = WhylogsDataValidator.get_active_data_validator()
        except TypeError:
            # no whylogs data validator is active
            return

        if not isinstance(data_validator, WhylogsDataValidator):
            # the active data validator is not a whylogs data validator
            return

        try:
            step_context = get_step_context()
        except RuntimeError:
            # we are not running as part of a pipeline
            return

        settings = cast(
            WhylogsDataValidatorSettings,
            data_validator.get_settings(step_context.step_run),
        )

        if not settings.enable_whylabs:
            # whylabs is not enabled in the data validator
            return
        data_validator.upload_profile_view(
            profile_view, dataset_id=settings.dataset_id
        )

`load(self, data_type)`

Reads and returns a whylogs dataset profile view.

Parameters:

Name	Type	Description	Default
`data_type`	`Type[Any]`	The type of the data to read.	required

Returns:

Type	Description
`whylogs.core.DatasetProfileView`	A loaded whylogs dataset profile view object.

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py

def load(self, data_type: Type[Any]) -> DatasetProfileView:
    """Reads and returns a whylogs dataset profile view.

    Args:
        data_type: The type of the data to read.

    Returns:
        A loaded whylogs dataset profile view object.
    """
    filepath = os.path.join(self.uri, PROFILE_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)
    profile_view = DatasetProfileView.read(temp_file)

    # Cleanup and return
    fileio.rmtree(temp_dir)

    return profile_view

`save(self, profile_view)`

Writes a whylogs dataset profile view.

Parameters:

Name	Type	Description	Default
`profile_view`	`whylogs.core.DatasetProfileView`	A whylogs dataset profile view object.	required

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py

def save(self, profile_view: DatasetProfileView) -> None:
    """Writes a whylogs dataset profile view.

    Args:
        profile_view: A whylogs dataset profile view object.
    """
    filepath = os.path.join(self.uri, PROFILE_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME)

    profile_view.write(temp_file)

    # Copy it into artifact store
    fileio.copy(temp_file, filepath)
    fileio.rmtree(temp_dir)

    try:
        self._upload_to_whylabs(profile_view)
    except Exception as e:
        logger.error(
            "Failed to upload whylogs profile view to Whylabs: %s", e
        )

`save_visualizations(self, profile_view)`

Saves visualizations for the given whylogs dataset profile view.

Parameters:

Name	Type	Description	Default
`profile_view`	`whylogs.core.DatasetProfileView`	The whylogs dataset profile view to visualize.	required

Returns:

Type	Description
`Dict[str, zenml.enums.VisualizationType]`	A dictionary of visualization URIs and their types.

Source code in zenml/integrations/whylogs/materializers/whylogs_materializer.py

def save_visualizations(
    self,
    profile_view: DatasetProfileView,
) -> Dict[str, VisualizationType]:
    """Saves visualizations for the given whylogs dataset profile view.

    Args:
        profile_view: The whylogs dataset profile view to visualize.

    Returns:
        A dictionary of visualization URIs and their types.
    """
    # currently, whylogs doesn't support visualizing a single profile, so
    # we trick it by using the same profile twice, both as reference and
    # target, in a drift report
    visualization = NotebookProfileVisualizer()
    visualization.set_profiles(
        target_profile_view=profile_view,
        reference_profile_view=profile_view,
    )
    rendered_html = visualization.summary_drift_report()
    filepath = os.path.join(self.uri, HTML_FILENAME)
    filepath = filepath.replace("\\", "/")
    with fileio.open(filepath, "w") as f:
        f.write(rendered_html.data)
    return {filepath: VisualizationType.HTML}

`secret_schemas` `special`

Initialization for the Whylabs secret schema.

This schema can be used to configure a ZenML secret to authenticate ZenML to use the Whylabs platform to automatically log all whylogs data profiles generated and by pipeline steps.

`whylabs_secret_schema`

Implementation for Seldon secret schemas.

`WhylabsSecretSchema (BaseSecretSchema)`

Whylabs credentials.

Attributes:

Name	Type	Description
`whylabs_default_org_id`	`str`	the Whylabs organization ID.
`whylabs_api_key`	`str`	Whylabs API key.
`whylabs_default_dataset_id`	`Optional[str]`	default Whylabs dataset ID to use when logging data profiles.

Source code in zenml/integrations/whylogs/secret_schemas/whylabs_secret_schema.py

class WhylabsSecretSchema(BaseSecretSchema):
    """Whylabs credentials.

    Attributes:
        whylabs_default_org_id: the Whylabs organization ID.
        whylabs_api_key: Whylabs API key.
        whylabs_default_dataset_id: default Whylabs dataset ID to use when
            logging data profiles.
    """

    whylabs_default_org_id: str
    whylabs_api_key: str
    whylabs_default_dataset_id: Optional[str] = None

`steps` `special`

Initialization of the whylogs steps.

`whylogs_profiler`

Implementation of the whylogs profiler step.

`get_whylogs_profiler_step(dataset_timestamp=None, dataset_id=None, enable_whylabs=True)`

Shortcut function to create a new instance of the WhylogsProfilerStep step.

The returned WhylogsProfilerStep can be used in a pipeline to generate a whylogs DatasetProfileView from a given pd.DataFrame and save it as an artifact.

Parameters:

Name	Type	Description	Default
`dataset_timestamp`	`Optional[datetime.datetime]`	The timestamp of the dataset.	`None`
`dataset_id`	`Optional[str]`	Optional dataset ID to use to upload the profile to Whylabs.	`None`
`enable_whylabs`	`bool`	Whether to upload the generated profile to Whylabs.	`True`

Returns:

Type	Description
`BaseStep`	a WhylogsProfilerStep step instance

Source code in zenml/integrations/whylogs/steps/whylogs_profiler.py

def get_whylogs_profiler_step(
    dataset_timestamp: Optional[datetime.datetime] = None,
    dataset_id: Optional[str] = None,
    enable_whylabs: bool = True,
) -> BaseStep:
    """Shortcut function to create a new instance of the WhylogsProfilerStep step.

    The returned WhylogsProfilerStep can be used in a pipeline to generate a
    whylogs DatasetProfileView from a given pd.DataFrame and save it as an
    artifact.

    Args:
        dataset_timestamp: The timestamp of the dataset.
        dataset_id: Optional dataset ID to use to upload the profile to Whylabs.
        enable_whylabs: Whether to upload the generated profile to Whylabs.

    Returns:
        a WhylogsProfilerStep step instance
    """
    key = settings_utils.get_flavor_setting_key(WhylogsDataValidatorFlavor())
    settings = WhylogsDataValidatorSettings(
        enable_whylabs=enable_whylabs, dataset_id=dataset_id
    )
    step_instance = whylogs_profiler_step.with_options(
        parameters={"dataset_timestamp": dataset_timestamp},
        settings={key: settings},
    )
    return step_instance

Whylogs

zenml.integrations.whylogs special

WhylogsIntegration (Integration)

activate() classmethod

flavors() classmethod

get_requirements(target_os=None) classmethod

constants

data_validators special

whylogs_data_validator

WhylogsDataValidator (BaseDataValidator, AuthenticationMixin)

config: WhylogsDataValidatorConfig property readonly

settings_class: Optional[Type[BaseSettings]] property readonly

FLAVOR (BaseDataValidatorFlavor)

data_profiling(self, dataset, comparison_dataset=None, profile_list=None, dataset_timestamp=None, **kwargs)

upload_profile_view(self, profile_view, dataset_id=None)

flavors special

whylogs_data_validator_flavor

WhylogsDataValidatorConfig (BaseDataValidatorConfig, AuthenticationConfigMixin, WhylogsDataValidatorSettings)

WhylogsDataValidatorFlavor (BaseDataValidatorFlavor)

config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig] property readonly

docs_url: Optional[str] property readonly

implementation_class: Type[WhylogsDataValidator] property readonly

logo_url: str property readonly

name: str property readonly

sdk_docs_url: Optional[str] property readonly

WhylogsDataValidatorSettings (BaseSettings)

materializers special

whylogs_materializer

WhylogsMaterializer (BaseMaterializer)

load(self, data_type)

save(self, profile_view)

save_visualizations(self, profile_view)

secret_schemas special

whylabs_secret_schema

WhylabsSecretSchema (BaseSecretSchema)

steps special

whylogs_profiler

get_whylogs_profiler_step(dataset_timestamp=None, dataset_id=None, enable_whylabs=True)

`zenml.integrations.whylogs` `special`

`WhylogsIntegration (Integration)`

`activate()` `classmethod`

`flavors()` `classmethod`

`get_requirements(target_os=None)` `classmethod`

`constants`

`data_validators` `special`

`whylogs_data_validator`

`WhylogsDataValidator (BaseDataValidator, AuthenticationMixin)`

`config: WhylogsDataValidatorConfig` `property` `readonly`

`settings_class: Optional[Type[BaseSettings]]` `property` `readonly`

`FLAVOR (BaseDataValidatorFlavor)`

`data_profiling(self, dataset, comparison_dataset=None, profile_list=None, dataset_timestamp=None, **kwargs)`

`upload_profile_view(self, profile_view, dataset_id=None)`

`flavors` `special`

`whylogs_data_validator_flavor`

`WhylogsDataValidatorConfig (BaseDataValidatorConfig, AuthenticationConfigMixin, WhylogsDataValidatorSettings)`

`WhylogsDataValidatorFlavor (BaseDataValidatorFlavor)`

`config_class: Type[zenml.integrations.whylogs.flavors.whylogs_data_validator_flavor.WhylogsDataValidatorConfig]` `property` `readonly`

`docs_url: Optional[str]` `property` `readonly`

`implementation_class: Type[WhylogsDataValidator]` `property` `readonly`

`logo_url: str` `property` `readonly`

`name: str` `property` `readonly`

`sdk_docs_url: Optional[str]` `property` `readonly`

`WhylogsDataValidatorSettings (BaseSettings)`

`materializers` `special`

`whylogs_materializer`

`WhylogsMaterializer (BaseMaterializer)`

`load(self, data_type)`

`save(self, profile_view)`

`save_visualizations(self, profile_view)`

`secret_schemas` `special`

`whylabs_secret_schema`

`WhylabsSecretSchema (BaseSecretSchema)`

`steps` `special`

`whylogs_profiler`

`get_whylogs_profiler_step(dataset_timestamp=None, dataset_id=None, enable_whylabs=True)`