Skip to content

Facets

zenml.integrations.facets

Facets integration for ZenML.

Attributes

FACETS = 'facets' module-attribute

PANDAS = 'pandas' module-attribute

Classes

FacetsIntegration

Bases: Integration

Definition of Facets integration for ZenML.

Functions
activate() -> None classmethod

Activate the Facets integration.

Source code in src/zenml/integrations/facets/__init__.py
28
29
30
31
@classmethod
def activate(cls) -> None:
    """Activate the Facets integration."""
    from zenml.integrations.facets import materializers  # noqa
get_requirements(target_os: Optional[str] = None) -> List[str] classmethod

Method to get the requirements for the integration.

Parameters:

Name Type Description Default
target_os Optional[str]

The target operating system to get the requirements for.

None

Returns:

Type Description
List[str]

A list of requirements.

Source code in src/zenml/integrations/facets/__init__.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
@classmethod
def get_requirements(cls, target_os: Optional[str] = None) -> List[str]:
    """Method to get the requirements for the integration.

    Args:
        target_os: The target operating system to get the requirements for.

    Returns:
        A list of requirements.
    """
    from zenml.integrations.pandas import PandasIntegration

    return cls.REQUIREMENTS + \
        PandasIntegration.get_requirements(target_os=target_os)

Integration

Base class for integration in ZenML.

Functions
activate() -> None classmethod

Abstract method to activate the integration.

Source code in src/zenml/integrations/integration.py
170
171
172
@classmethod
def activate(cls) -> None:
    """Abstract method to activate the integration."""
check_installation() -> bool classmethod

Method to check whether the required packages are installed.

Returns:

Type Description
bool

True if all required packages are installed, False otherwise.

Source code in src/zenml/integrations/integration.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
@classmethod
def check_installation(cls) -> bool:
    """Method to check whether the required packages are installed.

    Returns:
        True if all required packages are installed, False otherwise.
    """
    for r in cls.get_requirements():
        try:
            # First check if the base package is installed
            dist = pkg_resources.get_distribution(r)

            # Next, check if the dependencies (including extras) are
            # installed
            deps: List[Requirement] = []

            _, extras = parse_requirement(r)
            if extras:
                extra_list = extras[1:-1].split(",")
                for extra in extra_list:
                    try:
                        requirements = dist.requires(extras=[extra])  # type: ignore[arg-type]
                    except pkg_resources.UnknownExtra as e:
                        logger.debug(f"Unknown extra: {str(e)}")
                        return False
                    deps.extend(requirements)
            else:
                deps = dist.requires()

            for ri in deps:
                try:
                    # Remove the "extra == ..." part from the requirement string
                    cleaned_req = re.sub(
                        r"; extra == \"\w+\"", "", str(ri)
                    )
                    pkg_resources.get_distribution(cleaned_req)
                except pkg_resources.DistributionNotFound as e:
                    logger.debug(
                        f"Unable to find required dependency "
                        f"'{e.req}' for requirement '{r}' "
                        f"necessary for integration '{cls.NAME}'."
                    )
                    return False
                except pkg_resources.VersionConflict as e:
                    logger.debug(
                        f"Package version '{e.dist}' does not match "
                        f"version '{e.req}' required by '{r}' "
                        f"necessary for integration '{cls.NAME}'."
                    )
                    return False

        except pkg_resources.DistributionNotFound as e:
            logger.debug(
                f"Unable to find required package '{e.req}' for "
                f"integration {cls.NAME}."
            )
            return False
        except pkg_resources.VersionConflict as e:
            logger.debug(
                f"Package version '{e.dist}' does not match version "
                f"'{e.req}' necessary for integration {cls.NAME}."
            )
            return False

    logger.debug(
        f"Integration {cls.NAME} is installed correctly with "
        f"requirements {cls.get_requirements()}."
    )
    return True
flavors() -> List[Type[Flavor]] classmethod

Abstract method to declare new stack component flavors.

Returns:

Type Description
List[Type[Flavor]]

A list of new stack component flavors.

Source code in src/zenml/integrations/integration.py
174
175
176
177
178
179
180
181
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
    """Abstract method to declare new stack component flavors.

    Returns:
        A list of new stack component flavors.
    """
    return []
get_requirements(target_os: Optional[str] = None) -> List[str] classmethod

Method to get the requirements for the integration.

Parameters:

Name Type Description Default
target_os Optional[str]

The target operating system to get the requirements for.

None

Returns:

Type Description
List[str]

A list of requirements.

Source code in src/zenml/integrations/integration.py
135
136
137
138
139
140
141
142
143
144
145
@classmethod
def get_requirements(cls, target_os: Optional[str] = None) -> List[str]:
    """Method to get the requirements for the integration.

    Args:
        target_os: The target operating system to get the requirements for.

    Returns:
        A list of requirements.
    """
    return cls.REQUIREMENTS
get_uninstall_requirements(target_os: Optional[str] = None) -> List[str] classmethod

Method to get the uninstall requirements for the integration.

Parameters:

Name Type Description Default
target_os Optional[str]

The target operating system to get the requirements for.

None

Returns:

Type Description
List[str]

A list of requirements.

Source code in src/zenml/integrations/integration.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
@classmethod
def get_uninstall_requirements(
    cls, target_os: Optional[str] = None
) -> List[str]:
    """Method to get the uninstall requirements for the integration.

    Args:
        target_os: The target operating system to get the requirements for.

    Returns:
        A list of requirements.
    """
    ret = []
    for each in cls.get_requirements(target_os=target_os):
        is_ignored = False
        for ignored in cls.REQUIREMENTS_IGNORED_ON_UNINSTALL:
            if each.startswith(ignored):
                is_ignored = True
                break
        if not is_ignored:
            ret.append(each)
    return ret
plugin_flavors() -> List[Type[BasePluginFlavor]] classmethod

Abstract method to declare new plugin flavors.

Returns:

Type Description
List[Type[BasePluginFlavor]]

A list of new plugin flavors.

Source code in src/zenml/integrations/integration.py
183
184
185
186
187
188
189
190
@classmethod
def plugin_flavors(cls) -> List[Type["BasePluginFlavor"]]:
    """Abstract method to declare new plugin flavors.

    Returns:
        A list of new plugin flavors.
    """
    return []

Modules

materializers

Facets Materializers.

Classes
FacetsMaterializer(uri: str, artifact_store: Optional[BaseArtifactStore] = None)

Bases: BaseMaterializer

Materializer to save Facets visualizations.

This materializer is used to visualize and compare dataset statistics using Facets. In contrast to other materializers, this materializer only saves the visualization and not the data itself.

Source code in src/zenml/materializers/base_materializer.py
125
126
127
128
129
130
131
132
133
134
135
def __init__(
    self, uri: str, artifact_store: Optional[BaseArtifactStore] = None
):
    """Initializes a materializer with the given URI.

    Args:
        uri: The URI where the artifact data will be stored.
        artifact_store: The artifact store used to store this artifact.
    """
    self.uri = uri
    self._artifact_store = artifact_store
Functions
save_visualizations(data: FacetsComparison) -> Dict[str, VisualizationType]

Save a Facets visualization of the data.

Parameters:

Name Type Description Default
data FacetsComparison

The data to visualize.

required

Returns:

Type Description
Dict[str, VisualizationType]

A dictionary of visualization URIs and their types.

Source code in src/zenml/integrations/facets/materializers/facets_materializer.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def save_visualizations(
    self, data: FacetsComparison
) -> Dict[str, VisualizationType]:
    """Save a Facets visualization of the data.

    Args:
        data: The data to visualize.

    Returns:
        A dictionary of visualization URIs and their types.
    """
    proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
        data.datasets
    )
    protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
    template = os.path.join(
        os.path.abspath(os.path.dirname(__file__)),
        "stats.html",
    )
    html = io_utils.read_file_contents_as_string(template)
    html = html.replace("protostr", protostr)
    visualization_path = os.path.join(self.uri, VISUALIZATION_FILENAME)
    visualization_path = visualization_path.replace("\\", "/")
    with fileio.open(visualization_path, "w") as f:
        f.write(html)
    return {visualization_path: VisualizationType.HTML}
Modules
facets_materializer

Implementation of the FacetsMaterializer.

Classes
FacetsMaterializer(uri: str, artifact_store: Optional[BaseArtifactStore] = None)

Bases: BaseMaterializer

Materializer to save Facets visualizations.

This materializer is used to visualize and compare dataset statistics using Facets. In contrast to other materializers, this materializer only saves the visualization and not the data itself.

Source code in src/zenml/materializers/base_materializer.py
125
126
127
128
129
130
131
132
133
134
135
def __init__(
    self, uri: str, artifact_store: Optional[BaseArtifactStore] = None
):
    """Initializes a materializer with the given URI.

    Args:
        uri: The URI where the artifact data will be stored.
        artifact_store: The artifact store used to store this artifact.
    """
    self.uri = uri
    self._artifact_store = artifact_store
Functions
save_visualizations(data: FacetsComparison) -> Dict[str, VisualizationType]

Save a Facets visualization of the data.

Parameters:

Name Type Description Default
data FacetsComparison

The data to visualize.

required

Returns:

Type Description
Dict[str, VisualizationType]

A dictionary of visualization URIs and their types.

Source code in src/zenml/integrations/facets/materializers/facets_materializer.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def save_visualizations(
    self, data: FacetsComparison
) -> Dict[str, VisualizationType]:
    """Save a Facets visualization of the data.

    Args:
        data: The data to visualize.

    Returns:
        A dictionary of visualization URIs and their types.
    """
    proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
        data.datasets
    )
    protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
    template = os.path.join(
        os.path.abspath(os.path.dirname(__file__)),
        "stats.html",
    )
    html = io_utils.read_file_contents_as_string(template)
    html = html.replace("protostr", protostr)
    visualization_path = os.path.join(self.uri, VISUALIZATION_FILENAME)
    visualization_path = visualization_path.replace("\\", "/")
    with fileio.open(visualization_path, "w") as f:
        f.write(html)
    return {visualization_path: VisualizationType.HTML}
Modules

models

Models used by the Facets integration.

Classes
FacetsComparison

Bases: BaseModel

Facets comparison model.

Returning this from any step will automatically visualize the datasets statistics using Facets.

Attributes:

Name Type Description
datasets List[Dict[str, Union[str, DataFrame]]]

List of datasets to compare. Should be in the format [{"name": "dataset_name", "table": pd.DataFrame}, ...].

steps

Facets Standard Steps.

Classes
FacetsComparison

Bases: BaseModel

Facets comparison model.

Returning this from any step will automatically visualize the datasets statistics using Facets.

Attributes:

Name Type Description
datasets List[Dict[str, Union[str, DataFrame]]]

List of datasets to compare. Should be in the format [{"name": "dataset_name", "table": pd.DataFrame}, ...].

Functions
facets_dict_visualization_step(dataframes: Dict[str, pd.DataFrame]) -> FacetsComparison

Visualize and compare dataset statistics with Facets.

Parameters:

Name Type Description Default
dataframes Dict[str, DataFrame]

Dict of dataframes whose statistics should be compared, mapping names to dataframes.

required

Returns:

Type Description
FacetsComparison

FacetsComparison object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
@step
def facets_dict_visualization_step(
    dataframes: Dict[str, pd.DataFrame],
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        dataframes: Dict of dataframes whose statistics should be compared,
            mapping names to dataframes.

    Returns:
        `FacetsComparison` object.
    """
    datasets: List[Dict[str, Union[str, pd.DataFrame]]] = []
    for name, df in dataframes.items():
        datasets.append({"name": name, "table": df})
    return FacetsComparison(datasets=datasets)
facets_list_visualization_step(dataframes: List[pd.DataFrame]) -> FacetsComparison

Visualize and compare dataset statistics with Facets.

Parameters:

Name Type Description Default
dataframes List[DataFrame]

List of dataframes whose statistics should be compared.

required

Returns:

Type Description
FacetsComparison

FacetsComparison object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
@step
def facets_list_visualization_step(
    dataframes: List[pd.DataFrame],
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        dataframes: List of dataframes whose statistics should be compared.

    Returns:
        `FacetsComparison` object.
    """
    datasets: List[Dict[str, Union[str, pd.DataFrame]]] = []
    for i, df in enumerate(dataframes):
        datasets.append({"name": f"dataset_{i}", "table": df})
    return FacetsComparison(datasets=datasets)
facets_visualization_step(reference: pd.DataFrame, comparison: pd.DataFrame) -> FacetsComparison

Visualize and compare dataset statistics with Facets.

Parameters:

Name Type Description Default
reference DataFrame

Reference dataset.

required
comparison DataFrame

Dataset to compare to reference.

required

Returns:

Type Description
FacetsComparison

FacetsComparison object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
@step
def facets_visualization_step(
    reference: pd.DataFrame, comparison: pd.DataFrame
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        reference: Reference dataset.
        comparison: Dataset to compare to reference.

    Returns:
        `FacetsComparison` object.
    """
    return FacetsComparison(
        datasets=[
            {"name": "reference", "table": reference},
            {"name": "comparison", "table": comparison},
        ]
    )
Modules
facets_visualization_steps

Facets Standard Steps.

Classes Functions
facets_dict_visualization_step(dataframes: Dict[str, pd.DataFrame]) -> FacetsComparison

Visualize and compare dataset statistics with Facets.

Parameters:

Name Type Description Default
dataframes Dict[str, DataFrame]

Dict of dataframes whose statistics should be compared, mapping names to dataframes.

required

Returns:

Type Description
FacetsComparison

FacetsComparison object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
@step
def facets_dict_visualization_step(
    dataframes: Dict[str, pd.DataFrame],
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        dataframes: Dict of dataframes whose statistics should be compared,
            mapping names to dataframes.

    Returns:
        `FacetsComparison` object.
    """
    datasets: List[Dict[str, Union[str, pd.DataFrame]]] = []
    for name, df in dataframes.items():
        datasets.append({"name": name, "table": df})
    return FacetsComparison(datasets=datasets)
facets_list_visualization_step(dataframes: List[pd.DataFrame]) -> FacetsComparison

Visualize and compare dataset statistics with Facets.

Parameters:

Name Type Description Default
dataframes List[DataFrame]

List of dataframes whose statistics should be compared.

required

Returns:

Type Description
FacetsComparison

FacetsComparison object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
@step
def facets_list_visualization_step(
    dataframes: List[pd.DataFrame],
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        dataframes: List of dataframes whose statistics should be compared.

    Returns:
        `FacetsComparison` object.
    """
    datasets: List[Dict[str, Union[str, pd.DataFrame]]] = []
    for i, df in enumerate(dataframes):
        datasets.append({"name": f"dataset_{i}", "table": df})
    return FacetsComparison(datasets=datasets)
facets_visualization_step(reference: pd.DataFrame, comparison: pd.DataFrame) -> FacetsComparison

Visualize and compare dataset statistics with Facets.

Parameters:

Name Type Description Default
reference DataFrame

Reference dataset.

required
comparison DataFrame

Dataset to compare to reference.

required

Returns:

Type Description
FacetsComparison

FacetsComparison object.

Source code in src/zenml/integrations/facets/steps/facets_visualization_steps.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
@step
def facets_visualization_step(
    reference: pd.DataFrame, comparison: pd.DataFrame
) -> FacetsComparison:
    """Visualize and compare dataset statistics with Facets.

    Args:
        reference: Reference dataset.
        comparison: Dataset to compare to reference.

    Returns:
        `FacetsComparison` object.
    """
    return FacetsComparison(
        datasets=[
            {"name": "reference", "table": reference},
            {"name": "comparison", "table": comparison},
        ]
    )