Facets
zenml.integrations.facets
special
Facets integration for ZenML.
FacetsIntegration (Integration)
Definition of Facets integration for ZenML.
Source code in zenml/integrations/facets/__init__.py
class FacetsIntegration(Integration):
"""Definition of Facets integration for ZenML."""
NAME = FACETS
REQUIREMENTS = ["facets-overview>=1.0.0", "pandas"]
@staticmethod
def activate() -> None:
"""Activate the Facets integration."""
from zenml.integrations.facets import materializers # noqa
activate()
staticmethod
Activate the Facets integration.
Source code in zenml/integrations/facets/__init__.py
@staticmethod
def activate() -> None:
"""Activate the Facets integration."""
from zenml.integrations.facets import materializers # noqa
materializers
special
Facets Materializers.
facets_materializer
Implementation of the FacetsMaterializer.
FacetsMaterializer (BaseMaterializer)
Materializer to save Facets visualizations.
This materializer is used to visualize and compare dataset statistics using Facets. In contrast to other materializers, this materializer only saves the visualization and not the data itself.
Source code in zenml/integrations/facets/materializers/facets_materializer.py
class FacetsMaterializer(BaseMaterializer):
"""Materializer to save Facets visualizations.
This materializer is used to visualize and compare dataset statistics using
Facets. In contrast to other materializers, this materializer only saves
the visualization and not the data itself.
"""
ASSOCIATED_TYPES = (FacetsComparison,)
ASSOCIATED_ARTIFACT_TYPE = ArtifactType.DATA_ANALYSIS
def save_visualizations(
self, data: FacetsComparison
) -> Dict[str, VisualizationType]:
"""Save a Facets visualization of the data.
Args:
data: The data to visualize.
Returns:
A dictionary of visualization URIs and their types.
"""
proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
data.datasets
)
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
template = os.path.join(
os.path.abspath(os.path.dirname(__file__)),
"stats.html",
)
html = io_utils.read_file_contents_as_string(template)
html = html.replace("protostr", protostr)
visualization_path = os.path.join(self.uri, VISUALIZATION_FILENAME)
with fileio.open(visualization_path, "w") as f:
f.write(html)
return {visualization_path: VisualizationType.HTML}
save_visualizations(self, data)
Save a Facets visualization of the data.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
FacetsComparison |
The data to visualize. |
required |
Returns:
Type | Description |
---|---|
Dict[str, zenml.enums.VisualizationType] |
A dictionary of visualization URIs and their types. |
Source code in zenml/integrations/facets/materializers/facets_materializer.py
def save_visualizations(
self, data: FacetsComparison
) -> Dict[str, VisualizationType]:
"""Save a Facets visualization of the data.
Args:
data: The data to visualize.
Returns:
A dictionary of visualization URIs and their types.
"""
proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
data.datasets
)
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
template = os.path.join(
os.path.abspath(os.path.dirname(__file__)),
"stats.html",
)
html = io_utils.read_file_contents_as_string(template)
html = html.replace("protostr", protostr)
visualization_path = os.path.join(self.uri, VISUALIZATION_FILENAME)
with fileio.open(visualization_path, "w") as f:
f.write(html)
return {visualization_path: VisualizationType.HTML}
models
Models used by the Facets integration.
FacetsComparison (BaseModel)
pydantic-model
Facets comparison model.
Returning this from any step will automatically visualize the datasets statistics using Facets.
Attributes:
Name | Type | Description |
---|---|---|
datasets |
List[Dict[str, Union[str, pandas.core.frame.DataFrame]]] |
List of datasets to compare. Should be in the format
|
Source code in zenml/integrations/facets/models.py
class FacetsComparison(BaseModel):
"""Facets comparison model.
Returning this from any step will automatically visualize the datasets
statistics using Facets.
Attributes:
datasets: List of datasets to compare. Should be in the format
`[{"name": "dataset_name", "table": pd.DataFrame}, ...]`.
"""
datasets: List[Dict[str, Union[str, pd.DataFrame]]]
class Config:
"""Pydantic config."""
arbitrary_types_allowed = True
Config
Pydantic config.
Source code in zenml/integrations/facets/models.py
class Config:
"""Pydantic config."""
arbitrary_types_allowed = True
steps
special
Facets Standard Steps.
facets_visualization_steps
Facets Standard Steps.
facets_dict_visualization_step (BaseStep)
Visualize and compare dataset statistics with Facets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataframes |
Dict of dataframes whose statistics should be compared, mapping names to dataframes. |
required |
Returns:
Type | Description |
---|---|
|
entrypoint(dataframes)
staticmethod
Visualize and compare dataset statistics with Facets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataframes |
Dict[str, pandas.core.frame.DataFrame] |
Dict of dataframes whose statistics should be compared, mapping names to dataframes. |
required |
Returns:
Type | Description |
---|---|
FacetsComparison |
|
Source code in zenml/integrations/facets/steps/facets_visualization_steps.py
@step
def facets_dict_visualization_step(
dataframes: Dict[str, pd.DataFrame]
) -> FacetsComparison:
"""Visualize and compare dataset statistics with Facets.
Args:
dataframes: Dict of dataframes whose statistics should be compared,
mapping names to dataframes.
Returns:
`FacetsComparison` object.
"""
datasets: List[Dict[str, Union[str, pd.DataFrame]]] = []
for name, df in dataframes.items():
datasets.append({"name": name, "table": df})
return FacetsComparison(datasets=datasets)
facets_list_visualization_step (BaseStep)
Visualize and compare dataset statistics with Facets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataframes |
List of dataframes whose statistics should be compared. |
required |
Returns:
Type | Description |
---|---|
|
entrypoint(dataframes)
staticmethod
Visualize and compare dataset statistics with Facets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataframes |
List[pandas.core.frame.DataFrame] |
List of dataframes whose statistics should be compared. |
required |
Returns:
Type | Description |
---|---|
FacetsComparison |
|
Source code in zenml/integrations/facets/steps/facets_visualization_steps.py
@step
def facets_list_visualization_step(
dataframes: List[pd.DataFrame],
) -> FacetsComparison:
"""Visualize and compare dataset statistics with Facets.
Args:
dataframes: List of dataframes whose statistics should be compared.
Returns:
`FacetsComparison` object.
"""
datasets: List[Dict[str, Union[str, pd.DataFrame]]] = []
for i, df in enumerate(dataframes):
datasets.append({"name": f"dataset_{i}", "table": df})
return FacetsComparison(datasets=datasets)
facets_visualization_step (BaseStep)
Visualize and compare dataset statistics with Facets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reference |
Reference dataset. |
required | |
comparison |
Dataset to compare to reference. |
required |
Returns:
Type | Description |
---|---|
|
entrypoint(reference, comparison)
staticmethod
Visualize and compare dataset statistics with Facets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reference |
DataFrame |
Reference dataset. |
required |
comparison |
DataFrame |
Dataset to compare to reference. |
required |
Returns:
Type | Description |
---|---|
FacetsComparison |
|
Source code in zenml/integrations/facets/steps/facets_visualization_steps.py
@step
def facets_visualization_step(
reference: pd.DataFrame, comparison: pd.DataFrame
) -> FacetsComparison:
"""Visualize and compare dataset statistics with Facets.
Args:
reference: Reference dataset.
comparison: Dataset to compare to reference.
Returns:
`FacetsComparison` object.
"""
return FacetsComparison(
datasets=[
{"name": "reference", "table": reference},
{"name": "comparison", "table": comparison},
]
)