Facets
zenml.integrations.facets
special
Facets integration for ZenML.
The Facets integration provides a simple way to visualize post-execution objects
like PipelineView
, PipelineRunView
and StepView
. These objects can be
extended using the BaseVisualization
class. This integration requires
facets-overview
be installed in your Python environment.
FacetsIntegration (Integration)
Definition of Facet integration for ZenML.
Source code in zenml/integrations/facets/__init__.py
class FacetsIntegration(Integration):
"""Definition of [Facet](https://pair-code.github.io/facets/) integration for ZenML."""
NAME = FACETS
REQUIREMENTS = ["facets-overview>=1.0.0", "IPython"]
visualizers
special
Initialization of the Facet Visualizer.
facet_statistics_visualizer
Implementation of the Facet Statistics Visualizer.
FacetStatisticsVisualizer (BaseVisualizer)
Visualize and compare dataset statistics with Facets.
Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py
class FacetStatisticsVisualizer(BaseVisualizer):
"""Visualize and compare dataset statistics with Facets."""
@abstractmethod
def visualize(
self,
object: Union[StepView, Dict[str, Union[ArtifactView, pd.DataFrame]]],
magic: bool = False,
*args: Any,
**kwargs: Any,
) -> None:
"""Method to visualize components.
Args:
object: Either a StepView fetched from run.get_step() whose outputs
are all datasets that should be visualized, or a dict that maps
dataset names to datasets.
magic: Whether to render in a Jupyter notebook or not.
*args: Additional arguments.
**kwargs: Additional keyword arguments.
"""
data_dict = object.outputs if isinstance(object, StepView) else object
datasets = []
for dataset_name, data in data_dict.items():
df = data.read() if isinstance(data, ArtifactView) else data
if type(df) is not pd.DataFrame:
logger.warning(
"`%s` is not a pd.DataFrame. You can only visualize "
"statistics of steps that output pandas DataFrames. "
"Skipping this output.." % dataset_name
)
else:
datasets.append({"name": dataset_name, "table": df})
html_ = self.generate_html(datasets)
self.generate_facet(html_, magic)
def generate_html(self, datasets: List[Dict[Text, pd.DataFrame]]) -> str:
"""Generates html for facet.
Args:
datasets: List of dicts of DataFrames to be visualized as stats.
Returns:
HTML template with proto string embedded.
"""
proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
datasets
)
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
template = os.path.join(
os.path.abspath(os.path.dirname(__file__)),
"stats.html",
)
html_template = io_utils.read_file_contents_as_string(template)
html_ = html_template.replace("protostr", protostr)
return html_
def generate_facet(self, html_: str, magic: bool = False) -> None:
"""Generate a Facet Overview.
Args:
html_: HTML represented as a string.
magic: Whether to magically materialize facet in a notebook.
Raises:
EnvironmentError: If magic is True and not in a notebook.
"""
if magic:
if not (
Environment.in_notebook() or Environment.in_google_colab()
):
raise EnvironmentError(
"The magic functions are only usable in a Jupyter notebook."
)
display(HTML(html_))
else:
with tempfile.NamedTemporaryFile(
delete=False, suffix=".html"
) as f:
io_utils.write_file_contents_as_string(f.name, html_)
url = f"file:///{f.name}"
logger.info("Opening %s in a new browser.." % f.name)
webbrowser.open(url, new=2)
generate_facet(self, html_, magic=False)
Generate a Facet Overview.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
html_ |
str |
HTML represented as a string. |
required |
magic |
bool |
Whether to magically materialize facet in a notebook. |
False |
Exceptions:
Type | Description |
---|---|
EnvironmentError |
If magic is True and not in a notebook. |
Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py
def generate_facet(self, html_: str, magic: bool = False) -> None:
"""Generate a Facet Overview.
Args:
html_: HTML represented as a string.
magic: Whether to magically materialize facet in a notebook.
Raises:
EnvironmentError: If magic is True and not in a notebook.
"""
if magic:
if not (
Environment.in_notebook() or Environment.in_google_colab()
):
raise EnvironmentError(
"The magic functions are only usable in a Jupyter notebook."
)
display(HTML(html_))
else:
with tempfile.NamedTemporaryFile(
delete=False, suffix=".html"
) as f:
io_utils.write_file_contents_as_string(f.name, html_)
url = f"file:///{f.name}"
logger.info("Opening %s in a new browser.." % f.name)
webbrowser.open(url, new=2)
generate_html(self, datasets)
Generates html for facet.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
datasets |
List[Dict[str, pandas.core.frame.DataFrame]] |
List of dicts of DataFrames to be visualized as stats. |
required |
Returns:
Type | Description |
---|---|
str |
HTML template with proto string embedded. |
Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py
def generate_html(self, datasets: List[Dict[Text, pd.DataFrame]]) -> str:
"""Generates html for facet.
Args:
datasets: List of dicts of DataFrames to be visualized as stats.
Returns:
HTML template with proto string embedded.
"""
proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(
datasets
)
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
template = os.path.join(
os.path.abspath(os.path.dirname(__file__)),
"stats.html",
)
html_template = io_utils.read_file_contents_as_string(template)
html_ = html_template.replace("protostr", protostr)
return html_
visualize(self, object, magic=False, *args, **kwargs)
Method to visualize components.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
object |
Union[zenml.post_execution.step.StepView, Dict[str, Union[zenml.post_execution.artifact.ArtifactView, pandas.core.frame.DataFrame]]] |
Either a StepView fetched from run.get_step() whose outputs are all datasets that should be visualized, or a dict that maps dataset names to datasets. |
required |
magic |
bool |
Whether to render in a Jupyter notebook or not. |
False |
*args |
Any |
Additional arguments. |
() |
**kwargs |
Any |
Additional keyword arguments. |
{} |
Source code in zenml/integrations/facets/visualizers/facet_statistics_visualizer.py
@abstractmethod
def visualize(
self,
object: Union[StepView, Dict[str, Union[ArtifactView, pd.DataFrame]]],
magic: bool = False,
*args: Any,
**kwargs: Any,
) -> None:
"""Method to visualize components.
Args:
object: Either a StepView fetched from run.get_step() whose outputs
are all datasets that should be visualized, or a dict that maps
dataset names to datasets.
magic: Whether to render in a Jupyter notebook or not.
*args: Additional arguments.
**kwargs: Additional keyword arguments.
"""
data_dict = object.outputs if isinstance(object, StepView) else object
datasets = []
for dataset_name, data in data_dict.items():
df = data.read() if isinstance(data, ArtifactView) else data
if type(df) is not pd.DataFrame:
logger.warning(
"`%s` is not a pd.DataFrame. You can only visualize "
"statistics of steps that output pandas DataFrames. "
"Skipping this output.." % dataset_name
)
else:
datasets.append({"name": dataset_name, "table": df})
html_ = self.generate_html(datasets)
self.generate_facet(html_, magic)