Facets

`zenml.integrations.facets`

Facets integration for ZenML.

`FACETS = 'facets'` `module-attribute`

`PANDAS = 'pandas'` `module-attribute`

`FacetsIntegration`

Bases: Integration

Definition of Facets integration for ZenML.

`activate() -> None` `classmethod`

Activate the Facets integration.

Source code in src/zenml/integrations/facets/__init__.py

@classmethod
def activate(cls) -> None:
    """Activate the Facets integration."""
    from zenml.integrations.facets import materializers  # noqa

`get_requirements(target_os: Optional[str] = None) -> List[str]` `classmethod`

Method to get the requirements for the integration.

Parameters:

Name	Type	Description	Default
`target_os`	`Optional[str]`	The target operating system to get the requirements for.	`None`

Returns:

Type	Description
`List[str]`	A list of requirements.

Source code in src/zenml/integrations/facets/__init__.py

@classmethod
def get_requirements(cls, target_os: Optional[str] = None) -> List[str]:
    """Method to get the requirements for the integration.

    Args:
        target_os: The target operating system to get the requirements for.

    Returns:
        A list of requirements.
    """
    from zenml.integrations.pandas import PandasIntegration

    return cls.REQUIREMENTS + \
        PandasIntegration.get_requirements(target_os=target_os)

`Integration`

Base class for integration in ZenML.

`activate() -> None` `classmethod`

Abstract method to activate the integration.

Source code in src/zenml/integrations/integration.py

@classmethod
def activate(cls) -> None:
    """Abstract method to activate the integration."""

`check_installation() -> bool` `classmethod`

Method to check whether the required packages are installed.

Returns:

Type	Description
`bool`	True if all required packages are installed, False otherwise.

Source code in src/zenml/integrations/integration.py

@classmethod
def check_installation(cls) -> bool:
    """Method to check whether the required packages are installed.

    Returns:
        True if all required packages are installed, False otherwise.
    """
    for r in cls.get_requirements():
        try:
            # First check if the base package is installed
            dist = pkg_resources.get_distribution(r)

            # Next, check if the dependencies (including extras) are
            # installed
            deps: List[Requirement] = []

            _, extras = parse_requirement(r)
            if extras:
                extra_list = extras[1:-1].split(",")
                for extra in extra_list:
                    try:
                        requirements = dist.requires(extras=[extra])  # type: ignore[arg-type]
                    except pkg_resources.UnknownExtra as e:
                        logger.debug(f"Unknown extra: {str(e)}")
                        return False
                    deps.extend(requirements)
            else:
                deps = dist.requires()

            for ri in deps:
                try:
                    # Remove the "extra == ..." part from the requirement string
                    cleaned_req = re.sub(
                        r"; extra == \"\w+\"", "", str(ri)
                    )
                    pkg_resources.get_distribution(cleaned_req)
                except pkg_resources.DistributionNotFound as e:
                    logger.debug(
                        f"Unable to find required dependency "
                        f"'{e.req}' for requirement '{r}' "
                        f"necessary for integration '{cls.NAME}'."
                    )
                    return False
                except pkg_resources.VersionConflict as e:
                    logger.debug(
                        f"Package version '{e.dist}' does not match "
                        f"version '{e.req}' required by '{r}' "
                        f"necessary for integration '{cls.NAME}'."
                    )
                    return False

        except pkg_resources.DistributionNotFound as e:
            logger.debug(
                f"Unable to find required package '{e.req}' for "
                f"integration {cls.NAME}."
            )
            return False
        except pkg_resources.VersionConflict as e:
            logger.debug(
                f"Package version '{e.dist}' does not match version "
                f"'{e.req}' necessary for integration {cls.NAME}."
            )
            return False

    logger.debug(
        f"Integration {cls.NAME} is installed correctly with "
        f"requirements {cls.get_requirements()}."
    )
    return True

`flavors() -> List[Type[Flavor]]` `classmethod`

Abstract method to declare new stack component flavors.

Returns:

Type	Description
`List[Type[Flavor]]`	A list of new stack component flavors.

Source code in src/zenml/integrations/integration.py

@classmethod
def flavors(cls) -> List[Type[Flavor]]:
    """Abstract method to declare new stack component flavors.

    Returns:
        A list of new stack component flavors.
    """
    return []

`get_requirements(target_os: Optional[str] = None) -> List[str]` `classmethod`

Method to get the requirements for the integration.

Parameters:

Name	Type	Description	Default
`target_os`	`Optional[str]`	The target operating system to get the requirements for.	`None`

Returns:

Type	Description
`List[str]`	A list of requirements.

Source code in src/zenml/integrations/integration.py

@classmethod
def get_requirements(cls, target_os: Optional[str] = None) -> List[str]:
    """Method to get the requirements for the integration.

    Args:
        target_os: The target operating system to get the requirements for.

    Returns:
        A list of requirements.
    """
    return cls.REQUIREMENTS

`get_uninstall_requirements(target_os: Optional[str] = None) -> List[str]` `classmethod`

Method to get the uninstall requirements for the integration.

Parameters:

Name	Type	Description	Default
`target_os`	`Optional[str]`	The target operating system to get the requirements for.	`None`

Returns:

Type	Description
`List[str]`	A list of requirements.

Source code in src/zenml/integrations/integration.py

@classmethod
def get_uninstall_requirements(
    cls, target_os: Optional[str] = None
) -> List[str]:
    """Method to get the uninstall requirements for the integration.

    Args:
        target_os: The target operating system to get the requirements for.

    Returns:
        A list of requirements.
    """
    ret = []
    for each in cls.get_requirements(target_os=target_os):
        is_ignored = False
        for ignored in cls.REQUIREMENTS_IGNORED_ON_UNINSTALL:
            if each.startswith(ignored):
                is_ignored = True
                break
        if not is_ignored:
            ret.append(each)
    return ret

`plugin_flavors() -> List[Type[BasePluginFlavor]]` `classmethod`

Abstract method to declare new plugin flavors.

Returns:

Type	Description
`List[Type[BasePluginFlavor]]`	A list of new plugin flavors.

Source code in src/zenml/integrations/integration.py

@classmethod
def plugin_flavors(cls) -> List[Type["BasePluginFlavor"]]:
    """Abstract method to declare new plugin flavors.

    Returns:
        A list of new plugin flavors.
    """
    return []

`materializers`

Facets Materializers.

`FacetsMaterializer(uri: str, artifact_store: Optional[BaseArtifactStore] = None)`

Bases: BaseMaterializer

Materializer to save Facets visualizations.

This materializer is used to visualize and compare dataset statistics using Facets. In contrast to other materializers, this materializer only saves the visualization and not the data itself.

Source code in src/zenml/materializers/base_materializer.py

def __init__(
    self, uri: str, artifact_store: Optional[BaseArtifactStore] = None
):
    """Initializes a materializer with the given URI.

    Args:
        uri: The URI where the artifact data will be stored.
        artifact_store: The artifact store used to store this artifact.
    """
    self.uri = uri
    self._artifact_store = artifact_store

save_visualizations(data: FacetsComparison) -> Dict[str, VisualizationType]

Save a Facets visualization of the data.

Parameters:

Name	Type	Description	Default
`data`	`FacetsComparison`	The data to visualize.	required

Returns:

Type	Description
`Dict[str, VisualizationType]`	A dictionary of visualization URIs and their types.

Source code in src/zenml/integrations/facets/materializers/facets_materializer.py

def save_visualizations(
    self, data: FacetsComparison
) -> Dict[str, VisualizationType]:
    """Save a Facets visualization of the data.

    Args:
        data: The data to visualize.

    Returns:
        A dictionary of visualization URIs and their types.
    """
    proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
        data.datasets
    )
    protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
    template = os.path.join(
        os.path.abspath(os.path.dirname(__file__)),
        "stats.html",
    )
    html = io_utils.read_file_contents_as_string(template)
    html = html.replace("protostr", protostr)
    visualization_path = os.path.join(self.uri, VISUALIZATION_FILENAME)
    visualization_path = visualization_path.replace("\\", "/")
    with fileio.open(visualization_path, "w") as f:
        f.write(html)
    return {visualization_path: VisualizationType.HTML}

`facets_materializer`

Implementation of the FacetsMaterializer.

FacetsMaterializer(uri: str, artifact_store: Optional[BaseArtifactStore] = None)

Bases: BaseMaterializer

Materializer to save Facets visualizations.

This materializer is used to visualize and compare dataset statistics using Facets. In contrast to other materializers, this materializer only saves the visualization and not the data itself.

Source code in src/zenml/materializers/base_materializer.py

def __init__(
    self, uri: str, artifact_store: Optional[BaseArtifactStore] = None
):
    """Initializes a materializer with the given URI.

    Args:
        uri: The URI where the artifact data will be stored.
        artifact_store: The artifact store used to store this artifact.
    """
    self.uri = uri
    self._artifact_store = artifact_store

save_visualizations(data: FacetsComparison) -> Dict[str, VisualizationType]

Save a Facets visualization of the data.

Parameters:

Name	Type	Description	Default
`data`	`FacetsComparison`	The data to visualize.	required

Returns:

Type	Description
`Dict[str, VisualizationType]`	A dictionary of visualization URIs and their types.

Source code in src/zenml/integrations/facets/materializers/facets_materializer.py

def save_visualizations(
    self, data: FacetsComparison
) -> Dict[str, VisualizationType]:
    """Save a Facets visualization of the data.

    Args:
        data: The data to visualize.

    Returns:
        A dictionary of visualization URIs and their types.
    """
    proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
        data.datasets
    )
    protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
    template = os.path.join(
        os.path.abspath(os.path.dirname(__file__)),
        "stats.html",
    )
    html = io_utils.read_file_contents_as_string(template)
    html = html.replace("protostr", protostr)
    visualization_path = os.path.join(self.uri, VISUALIZATION_FILENAME)
    visualization_path = visualization_path.replace("\\", "/")
    with fileio.open(visualization_path, "w") as f:
        f.write(html)
    return {visualization_path: VisualizationType.HTML}

`models`

Models used by the Facets integration.

`FacetsComparison`

Bases: BaseModel

Facets comparison model.

Returning this from any step will automatically visualize the datasets statistics using Facets.

Attributes:

Name	Type	Description
`datasets`	`List[Dict[str, Union[str, DataFrame]]]`	List of datasets to compare. Should be in the format `[{"name": "dataset_name", "table": pd.DataFrame}, ...]`.

`steps`

Facets Standard Steps.

`FacetsComparison`

Bases: BaseModel

Facets comparison model.

Returning this from any step will automatically visualize the datasets statistics using Facets.

Attributes:

Name	Type	Description
`datasets`	`List[Dict[str, Union[str, DataFrame]]]`	List of datasets to compare. Should be in the format `[{"name": "dataset_name", "table": pd.DataFrame}, ...]`.

`facets_dict_visualization_step(dataframes: Dict[str, pd.DataFrame]) -> FacetsComparison`

Visualize and compare dataset statistics with Facets.

Parameters:

Name	Type	Description	Default
`dataframes`	`Dict[str, DataFrame]`	Dict of dataframes whose statistics should be compared, mapping names to dataframes.	required

Returns:

Type	Description
`FacetsComparison`	`FacetsComparison` object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py

@step
def facets_dict_visualization_step(
    dataframes: Dict[str, pd.DataFrame],
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        dataframes: Dict of dataframes whose statistics should be compared,
            mapping names to dataframes.

    Returns:
        `FacetsComparison` object.
    """
    datasets: List[Dict[str, Union[str, pd.DataFrame]]] = []
    for name, df in dataframes.items():
        datasets.append({"name": name, "table": df})
    return FacetsComparison(datasets=datasets)

`facets_list_visualization_step(dataframes: List[pd.DataFrame]) -> FacetsComparison`

Visualize and compare dataset statistics with Facets.

Parameters:

Name	Type	Description	Default
`dataframes`	`List[DataFrame]`	List of dataframes whose statistics should be compared.	required

Returns:

Type	Description
`FacetsComparison`	`FacetsComparison` object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py

@step
def facets_list_visualization_step(
    dataframes: List[pd.DataFrame],
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        dataframes: List of dataframes whose statistics should be compared.

    Returns:
        `FacetsComparison` object.
    """
    datasets: List[Dict[str, Union[str, pd.DataFrame]]] = []
    for i, df in enumerate(dataframes):
        datasets.append({"name": f"dataset_{i}", "table": df})
    return FacetsComparison(datasets=datasets)

`facets_visualization_step(reference: pd.DataFrame, comparison: pd.DataFrame) -> FacetsComparison`

Visualize and compare dataset statistics with Facets.

Parameters:

Name	Type	Description	Default
`reference`	`DataFrame`	Reference dataset.	required
`comparison`	`DataFrame`	Dataset to compare to reference.	required

Returns:

Type	Description
`FacetsComparison`	`FacetsComparison` object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py

@step
def facets_visualization_step(
    reference: pd.DataFrame, comparison: pd.DataFrame
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        reference: Reference dataset.
        comparison: Dataset to compare to reference.

    Returns:
        `FacetsComparison` object.
    """
    return FacetsComparison(
        datasets=[
            {"name": "reference", "table": reference},
            {"name": "comparison", "table": comparison},
        ]
    )

`facets_visualization_steps`

Facets Standard Steps.

facets_dict_visualization_step(dataframes: Dict[str, pd.DataFrame]) -> FacetsComparison

Visualize and compare dataset statistics with Facets.

Parameters:

Name	Type	Description	Default
`dataframes`	`Dict[str, DataFrame]`	Dict of dataframes whose statistics should be compared, mapping names to dataframes.	required

Returns:

Type	Description
`FacetsComparison`	`FacetsComparison` object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py

@step
def facets_dict_visualization_step(
    dataframes: Dict[str, pd.DataFrame],
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        dataframes: Dict of dataframes whose statistics should be compared,
            mapping names to dataframes.

    Returns:
        `FacetsComparison` object.
    """
    datasets: List[Dict[str, Union[str, pd.DataFrame]]] = []
    for name, df in dataframes.items():
        datasets.append({"name": name, "table": df})
    return FacetsComparison(datasets=datasets)

facets_list_visualization_step(dataframes: List[pd.DataFrame]) -> FacetsComparison

Visualize and compare dataset statistics with Facets.

Parameters:

Name	Type	Description	Default
`dataframes`	`List[DataFrame]`	List of dataframes whose statistics should be compared.	required

Returns:

Type	Description
`FacetsComparison`	`FacetsComparison` object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py

@step
def facets_list_visualization_step(
    dataframes: List[pd.DataFrame],
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        dataframes: List of dataframes whose statistics should be compared.

    Returns:
        `FacetsComparison` object.
    """
    datasets: List[Dict[str, Union[str, pd.DataFrame]]] = []
    for i, df in enumerate(dataframes):
        datasets.append({"name": f"dataset_{i}", "table": df})
    return FacetsComparison(datasets=datasets)

facets_visualization_step(reference: pd.DataFrame, comparison: pd.DataFrame) -> FacetsComparison

Visualize and compare dataset statistics with Facets.

Parameters:

Name	Type	Description	Default
`reference`	`DataFrame`	Reference dataset.	required
`comparison`	`DataFrame`	Dataset to compare to reference.	required

Returns:

Type	Description
`FacetsComparison`	`FacetsComparison` object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py

@step
def facets_visualization_step(
    reference: pd.DataFrame, comparison: pd.DataFrame
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        reference: Reference dataset.
        comparison: Dataset to compare to reference.

    Returns:
        `FacetsComparison` object.
    """
    return FacetsComparison(
        datasets=[
            {"name": "reference", "table": reference},
            {"name": "comparison", "table": comparison},
        ]
    )

Facets

`zenml.integrations.facets`

Attributes

`FACETS = 'facets'` `module-attribute`

`PANDAS = 'pandas'` `module-attribute`

Classes

`FacetsIntegration`

Functions

`activate() -> None` `classmethod`

`get_requirements(target_os: Optional[str] = None) -> List[str]` `classmethod`

`Integration`

Functions

`activate() -> None` `classmethod`

`check_installation() -> bool` `classmethod`

`flavors() -> List[Type[Flavor]]` `classmethod`

`get_requirements(target_os: Optional[str] = None) -> List[str]` `classmethod`

`get_uninstall_requirements(target_os: Optional[str] = None) -> List[str]` `classmethod`

`plugin_flavors() -> List[Type[BasePluginFlavor]]` `classmethod`

Modules

`materializers`

Classes

`FacetsMaterializer(uri: str, artifact_store: Optional[BaseArtifactStore] = None)`

Modules

`facets_materializer`

`models`

Classes

`FacetsComparison`

`steps`

Classes

`FacetsComparison`

Functions

`facets_dict_visualization_step(dataframes: Dict[str, pd.DataFrame]) -> FacetsComparison`

`facets_list_visualization_step(dataframes: List[pd.DataFrame]) -> FacetsComparison`

`facets_visualization_step(reference: pd.DataFrame, comparison: pd.DataFrame) -> FacetsComparison`

Modules

`facets_visualization_steps`

Facets

zenml.integrations.facets

Attributes

FACETS = 'facets' module-attribute

PANDAS = 'pandas' module-attribute

Classes

FacetsIntegration

Functions

activate() -> None classmethod

get_requirements(target_os: Optional[str] = None) -> List[str] classmethod

Integration

Functions

activate() -> None classmethod

check_installation() -> bool classmethod

flavors() -> List[Type[Flavor]] classmethod

get_requirements(target_os: Optional[str] = None) -> List[str] classmethod

get_uninstall_requirements(target_os: Optional[str] = None) -> List[str] classmethod

plugin_flavors() -> List[Type[BasePluginFlavor]] classmethod

Modules

materializers

Classes

FacetsMaterializer(uri: str, artifact_store: Optional[BaseArtifactStore] = None)

Modules

facets_materializer

models

Classes

FacetsComparison

steps

Classes

FacetsComparison

Functions

facets_dict_visualization_step(dataframes: Dict[str, pd.DataFrame]) -> FacetsComparison

facets_list_visualization_step(dataframes: List[pd.DataFrame]) -> FacetsComparison

facets_visualization_step(reference: pd.DataFrame, comparison: pd.DataFrame) -> FacetsComparison

Modules

facets_visualization_steps

`zenml.integrations.facets`

`FACETS = 'facets'` `module-attribute`

`PANDAS = 'pandas'` `module-attribute`

`FacetsIntegration`

`activate() -> None` `classmethod`

`get_requirements(target_os: Optional[str] = None) -> List[str]` `classmethod`

`Integration`

`activate() -> None` `classmethod`

`check_installation() -> bool` `classmethod`

`flavors() -> List[Type[Flavor]]` `classmethod`

`get_requirements(target_os: Optional[str] = None) -> List[str]` `classmethod`

`get_uninstall_requirements(target_os: Optional[str] = None) -> List[str]` `classmethod`

`plugin_flavors() -> List[Type[BasePluginFlavor]]` `classmethod`

`materializers`

`FacetsMaterializer(uri: str, artifact_store: Optional[BaseArtifactStore] = None)`

`facets_materializer`

`models`

`FacetsComparison`

`steps`

`FacetsComparison`

`facets_dict_visualization_step(dataframes: Dict[str, pd.DataFrame]) -> FacetsComparison`

`facets_list_visualization_step(dataframes: List[pd.DataFrame]) -> FacetsComparison`

`facets_visualization_step(reference: pd.DataFrame, comparison: pd.DataFrame) -> FacetsComparison`

`facets_visualization_steps`