Skip to content

Pigeon

zenml.integrations.pigeon special

Initialization of the Pigeon integration.

PigeonIntegration (Integration)

Definition of Pigeon integration for ZenML.

Source code in zenml/integrations/pigeon/__init__.py
class PigeonIntegration(Integration):
    """Definition of Pigeon integration for ZenML."""

    NAME = PIGEON
    REQUIREMENTS = ["ipywidgets>=8.0.0"]

    @classmethod
    def flavors(cls) -> List[Type[Flavor]]:
        """Declare the stack component flavors for the Pigeon integration.

        Returns:
            List of stack component flavors for this integration.
        """
        from zenml.integrations.pigeon.flavors import (
            PigeonAnnotatorFlavor,
        )

        return [PigeonAnnotatorFlavor]

flavors() classmethod

Declare the stack component flavors for the Pigeon integration.

Returns:

Type Description
List[Type[zenml.stack.flavor.Flavor]]

List of stack component flavors for this integration.

Source code in zenml/integrations/pigeon/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
    """Declare the stack component flavors for the Pigeon integration.

    Returns:
        List of stack component flavors for this integration.
    """
    from zenml.integrations.pigeon.flavors import (
        PigeonAnnotatorFlavor,
    )

    return [PigeonAnnotatorFlavor]

annotators special

Initialization of the Pigeon annotators submodule.

pigeon_annotator

Pigeon annotator.

Credit for the implementation of this code to @agermanidis in the Pigeon package and library. This code has been slightly modified to fit the ZenML framework. We use the modified code directly here because the original package (and code) is no longer compatible with more recent versions of ipywidgets.

https://github.com/agermanidis/pigeon

PigeonAnnotator (BaseAnnotator)

Annotator for using Pigeon in Jupyter notebooks.

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
class PigeonAnnotator(BaseAnnotator):
    """Annotator for using Pigeon in Jupyter notebooks."""

    @property
    def config(self) -> PigeonAnnotatorConfig:
        """Get the Pigeon annotator config.

        Returns:
            The Pigeon annotator config.
        """
        return cast(PigeonAnnotatorConfig, self._config)

    def get_url(self) -> str:
        """Get the URL of the Pigeon annotator.

        Raises:
            NotImplementedError: Pigeon annotator does not have a URL.
        """
        raise NotImplementedError("Pigeon annotator does not have a URL.")

    def get_url_for_dataset(self, dataset_name: str) -> str:
        """Get the URL of the Pigeon annotator for a specific dataset.

        Args:
            dataset_name: Name of the dataset (annotation file).

        Raises:
            NotImplementedError: Pigeon annotator does not have a URL.
        """
        raise NotImplementedError("Pigeon annotator does not have a URL.")

    def get_datasets(self) -> List[str]:
        """Get a list of datasets (annotation files) in the output directory.

        Returns:
            A list of dataset names (annotation file names) (or empty list when no datasets are present).
        """
        output_dir = self.config.output_dir
        try:
            return [f for f in os.listdir(output_dir) if f.endswith(".txt")]
        except FileNotFoundError:
            return []

    def get_dataset_names(self) -> List[str]:
        """List dataset names (annotation file names) in the output directory.

        Returns:
            A list of dataset names (annotation file names).
        """
        return self.get_datasets()

    def get_dataset_stats(self, dataset_name: str) -> Tuple[int, int]:
        """List labeled and unlabeled examples in a dataset (annotation file).

        Args:
            dataset_name: Name of the dataset (annotation file).

        Returns:
            A tuple containing (num_labeled_examples, num_unlabeled_examples).
        """
        dataset_path = os.path.join(self.config.output_dir, dataset_name)
        num_labeled_examples = 0
        # Placeholder as logic to determine this is not implemented
        num_unlabeled_examples = 0

        try:
            with open(dataset_path, "r") as file:
                num_labeled_examples = sum(1 for _ in file)
        except FileNotFoundError:
            logger.error(f"File not found: {dataset_path}")

        return num_labeled_examples, num_unlabeled_examples

    def _annotate(
        self,
        data: List[Any],
        options: List[str],
        display_fn: Optional[Any] = None,
    ) -> List[Tuple[Any, Any]]:
        """Internal method to build an interactive widget for annotating.

        Args:
            data: List of examples to annotate.
            options: List of labels to choose from.
            display_fn: Optional function to display examples.

        Returns:
            A list of tuples containing (example, label) for each annotated example.
        """
        examples = list(data)
        annotations = []
        current_index = 0
        out = widgets.Output()

        def show_next() -> None:
            nonlocal current_index
            if current_index >= len(examples):
                with out:
                    clear_output(wait=True)
                    logger.info("Annotation done.")
                return
            with out:
                clear_output(wait=True)
                if display_fn:
                    display_fn(examples[current_index])
                else:
                    display(examples[current_index])

        def add_annotation(btn: widgets.Button) -> None:
            """Add an annotation to the list of annotations.

            Args:
                btn: The button that triggered the event.
            """
            nonlocal current_index
            annotation = btn.description
            annotations.append((examples[current_index], annotation))
            current_index += 1
            show_next()

        def submit_annotations(btn: widgets.Button) -> None:
            """Submit all annotations and save them to a file.

            Args:
                btn: The button that triggered the event.
            """
            self._save_annotations(annotations)
            with out:
                clear_output(wait=True)
                logger.info("Annotations saved.")

        count_label = widgets.Label()
        display(count_label)

        buttons = []
        for label in options:
            btn = widgets.Button(description=label)
            btn.on_click(add_annotation)
            buttons.append(btn)

        submit_btn = widgets.Button(
            description="Save labels", button_style="success"
        )
        submit_btn.on_click(submit_annotations)
        buttons.append(submit_btn)

        navigation_box = widgets.HBox(buttons)
        display(navigation_box)
        display(out)
        show_next()

        return annotations

    def launch(self, **kwargs: Any) -> None:
        """Launch the Pigeon annotator in the Jupyter notebook.

        Args:
            **kwargs: Additional keyword arguments to pass to the annotation client.

        Raises:
            NotImplementedError: Pigeon annotator does not support launching with a URL.
        """
        raise NotImplementedError(
            "Pigeon annotator does not support launching with a URL."
        )

    def annotate(
        self,
        data: List[Any],
        options: List[str],
        display_fn: Optional[Any] = None,
    ) -> List[Tuple[Any, Any]]:
        """Annotate with the Pigeon annotator in the Jupyter notebook.

        Args:
            data: List of examples to annotate.
            options: List of labels to choose from.
            display_fn: Optional function to display examples.

        Returns:
            A list of tuples containing (example, label) for each annotated example.
        """
        annotations = self._annotate(data, options, display_fn)
        return annotations

    def _save_annotations(self, annotations: List[Tuple[Any, Any]]) -> None:
        """Save annotations to a file with a unique date-time suffix.

        Args:
            annotations: List of tuples containing (example, label) for each annotated example.
        """
        output_dir = self.config.output_dir
        os.makedirs(output_dir, exist_ok=True)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_file = os.path.join(output_dir, f"annotations_{timestamp}.json")
        with open(output_file, "w") as f:
            json.dump(annotations, f)

    def add_dataset(self, **kwargs: Any) -> Any:
        """Add a dataset (annotation file) to the Pigeon annotator.

        Args:
            **kwargs: keyword arguments.

        Raises:
            NotImplementedError: Pigeon annotator does not support adding datasets.
        """
        raise NotImplementedError(
            "Pigeon annotator does not support adding datasets."
        )

    def delete_dataset(self, **kwargs: Any) -> None:
        """Delete a dataset (annotation file).

        Takes the `dataset_name` argument from the kwargs.

        Args:
            **kwargs: Keyword arguments containing the `dataset_name` to delete.

        Raises:
            ValueError: Dataset name is required to delete a dataset.
        """
        dataset_name = kwargs.get("dataset_name")
        if not dataset_name:
            raise ValueError(
                "Dataset name (`dataset_name`) is required to delete a dataset."
            )
        dataset_path = os.path.join(self.config.output_dir, dataset_name)
        os.remove(dataset_path)

    def get_dataset(self, **kwargs: Any) -> List[Tuple[Any, Any]]:
        """Get the annotated examples from a dataset (annotation file).

        Takes the `dataset_name` argument from the kwargs.

        Args:
            **kwargs: Keyword arguments containing the `dataset_name` to retrieve.

        Returns:
            A list of tuples containing (example, label) for each annotated
            example.

        Raises:
            ValueError: Dataset name is required to retrieve a dataset.
        """
        dataset_name = kwargs.get("dataset_name")
        if not dataset_name:
            raise ValueError(
                "Dataset name (`dataset_name`) is required to retrieve a dataset."
            )
        dataset_path = os.path.join(self.config.output_dir, dataset_name)
        with open(dataset_path, "r") as f:
            annotations = json.load(f)
        return cast(List[Tuple[Any, Any]], annotations)

    def get_labeled_data(self, **kwargs: Any) -> List[Tuple[Any, Any]]:
        """Get the labeled examples from a dataset (annotation file).

        Takes the `dataset_name` argument from the kwargs.

        Args:
            **kwargs: Keyword arguments containing the `dataset_name` to retrieve.

        Returns:
            A list of tuples containing (example, label) for each labeled
            example.

        Raises:
            ValueError: Dataset name is required to retrieve labeled data.
        """
        if dataset_name := kwargs.get("dataset_name"):
            return self.get_dataset(dataset_name=dataset_name)
        else:
            raise ValueError(
                "Dataset name (`dataset_name`) is required to retrieve labeled data."
            )

    def get_unlabeled_data(self, **kwargs: Any) -> Any:
        """Get the unlabeled examples from a dataset (annotation file).

        Args:
            **kwargs: keyword arguments.

        Raises:
            NotImplementedError: Pigeon annotator does not support retrieving unlabeled data.
        """
        raise NotImplementedError(
            "Pigeon annotator does not support retrieving unlabeled data."
        )
config: PigeonAnnotatorConfig property readonly

Get the Pigeon annotator config.

Returns:

Type Description
PigeonAnnotatorConfig

The Pigeon annotator config.

add_dataset(self, **kwargs)

Add a dataset (annotation file) to the Pigeon annotator.

Parameters:

Name Type Description Default
**kwargs Any

keyword arguments.

{}

Exceptions:

Type Description
NotImplementedError

Pigeon annotator does not support adding datasets.

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def add_dataset(self, **kwargs: Any) -> Any:
    """Add a dataset (annotation file) to the Pigeon annotator.

    Args:
        **kwargs: keyword arguments.

    Raises:
        NotImplementedError: Pigeon annotator does not support adding datasets.
    """
    raise NotImplementedError(
        "Pigeon annotator does not support adding datasets."
    )
annotate(self, data, options, display_fn=None)

Annotate with the Pigeon annotator in the Jupyter notebook.

Parameters:

Name Type Description Default
data List[Any]

List of examples to annotate.

required
options List[str]

List of labels to choose from.

required
display_fn Optional[Any]

Optional function to display examples.

None

Returns:

Type Description
List[Tuple[Any, Any]]

A list of tuples containing (example, label) for each annotated example.

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def annotate(
    self,
    data: List[Any],
    options: List[str],
    display_fn: Optional[Any] = None,
) -> List[Tuple[Any, Any]]:
    """Annotate with the Pigeon annotator in the Jupyter notebook.

    Args:
        data: List of examples to annotate.
        options: List of labels to choose from.
        display_fn: Optional function to display examples.

    Returns:
        A list of tuples containing (example, label) for each annotated example.
    """
    annotations = self._annotate(data, options, display_fn)
    return annotations
delete_dataset(self, **kwargs)

Delete a dataset (annotation file).

Takes the dataset_name argument from the kwargs.

Parameters:

Name Type Description Default
**kwargs Any

Keyword arguments containing the dataset_name to delete.

{}

Exceptions:

Type Description
ValueError

Dataset name is required to delete a dataset.

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def delete_dataset(self, **kwargs: Any) -> None:
    """Delete a dataset (annotation file).

    Takes the `dataset_name` argument from the kwargs.

    Args:
        **kwargs: Keyword arguments containing the `dataset_name` to delete.

    Raises:
        ValueError: Dataset name is required to delete a dataset.
    """
    dataset_name = kwargs.get("dataset_name")
    if not dataset_name:
        raise ValueError(
            "Dataset name (`dataset_name`) is required to delete a dataset."
        )
    dataset_path = os.path.join(self.config.output_dir, dataset_name)
    os.remove(dataset_path)
get_dataset(self, **kwargs)

Get the annotated examples from a dataset (annotation file).

Takes the dataset_name argument from the kwargs.

Parameters:

Name Type Description Default
**kwargs Any

Keyword arguments containing the dataset_name to retrieve.

{}

Returns:

Type Description
List[Tuple[Any, Any]]

A list of tuples containing (example, label) for each annotated example.

Exceptions:

Type Description
ValueError

Dataset name is required to retrieve a dataset.

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def get_dataset(self, **kwargs: Any) -> List[Tuple[Any, Any]]:
    """Get the annotated examples from a dataset (annotation file).

    Takes the `dataset_name` argument from the kwargs.

    Args:
        **kwargs: Keyword arguments containing the `dataset_name` to retrieve.

    Returns:
        A list of tuples containing (example, label) for each annotated
        example.

    Raises:
        ValueError: Dataset name is required to retrieve a dataset.
    """
    dataset_name = kwargs.get("dataset_name")
    if not dataset_name:
        raise ValueError(
            "Dataset name (`dataset_name`) is required to retrieve a dataset."
        )
    dataset_path = os.path.join(self.config.output_dir, dataset_name)
    with open(dataset_path, "r") as f:
        annotations = json.load(f)
    return cast(List[Tuple[Any, Any]], annotations)
get_dataset_names(self)

List dataset names (annotation file names) in the output directory.

Returns:

Type Description
List[str]

A list of dataset names (annotation file names).

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def get_dataset_names(self) -> List[str]:
    """List dataset names (annotation file names) in the output directory.

    Returns:
        A list of dataset names (annotation file names).
    """
    return self.get_datasets()
get_dataset_stats(self, dataset_name)

List labeled and unlabeled examples in a dataset (annotation file).

Parameters:

Name Type Description Default
dataset_name str

Name of the dataset (annotation file).

required

Returns:

Type Description
Tuple[int, int]

A tuple containing (num_labeled_examples, num_unlabeled_examples).

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def get_dataset_stats(self, dataset_name: str) -> Tuple[int, int]:
    """List labeled and unlabeled examples in a dataset (annotation file).

    Args:
        dataset_name: Name of the dataset (annotation file).

    Returns:
        A tuple containing (num_labeled_examples, num_unlabeled_examples).
    """
    dataset_path = os.path.join(self.config.output_dir, dataset_name)
    num_labeled_examples = 0
    # Placeholder as logic to determine this is not implemented
    num_unlabeled_examples = 0

    try:
        with open(dataset_path, "r") as file:
            num_labeled_examples = sum(1 for _ in file)
    except FileNotFoundError:
        logger.error(f"File not found: {dataset_path}")

    return num_labeled_examples, num_unlabeled_examples
get_datasets(self)

Get a list of datasets (annotation files) in the output directory.

Returns:

Type Description
List[str]

A list of dataset names (annotation file names) (or empty list when no datasets are present).

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def get_datasets(self) -> List[str]:
    """Get a list of datasets (annotation files) in the output directory.

    Returns:
        A list of dataset names (annotation file names) (or empty list when no datasets are present).
    """
    output_dir = self.config.output_dir
    try:
        return [f for f in os.listdir(output_dir) if f.endswith(".txt")]
    except FileNotFoundError:
        return []
get_labeled_data(self, **kwargs)

Get the labeled examples from a dataset (annotation file).

Takes the dataset_name argument from the kwargs.

Parameters:

Name Type Description Default
**kwargs Any

Keyword arguments containing the dataset_name to retrieve.

{}

Returns:

Type Description
List[Tuple[Any, Any]]

A list of tuples containing (example, label) for each labeled example.

Exceptions:

Type Description
ValueError

Dataset name is required to retrieve labeled data.

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def get_labeled_data(self, **kwargs: Any) -> List[Tuple[Any, Any]]:
    """Get the labeled examples from a dataset (annotation file).

    Takes the `dataset_name` argument from the kwargs.

    Args:
        **kwargs: Keyword arguments containing the `dataset_name` to retrieve.

    Returns:
        A list of tuples containing (example, label) for each labeled
        example.

    Raises:
        ValueError: Dataset name is required to retrieve labeled data.
    """
    if dataset_name := kwargs.get("dataset_name"):
        return self.get_dataset(dataset_name=dataset_name)
    else:
        raise ValueError(
            "Dataset name (`dataset_name`) is required to retrieve labeled data."
        )
get_unlabeled_data(self, **kwargs)

Get the unlabeled examples from a dataset (annotation file).

Parameters:

Name Type Description Default
**kwargs Any

keyword arguments.

{}

Exceptions:

Type Description
NotImplementedError

Pigeon annotator does not support retrieving unlabeled data.

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def get_unlabeled_data(self, **kwargs: Any) -> Any:
    """Get the unlabeled examples from a dataset (annotation file).

    Args:
        **kwargs: keyword arguments.

    Raises:
        NotImplementedError: Pigeon annotator does not support retrieving unlabeled data.
    """
    raise NotImplementedError(
        "Pigeon annotator does not support retrieving unlabeled data."
    )
get_url(self)

Get the URL of the Pigeon annotator.

Exceptions:

Type Description
NotImplementedError

Pigeon annotator does not have a URL.

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def get_url(self) -> str:
    """Get the URL of the Pigeon annotator.

    Raises:
        NotImplementedError: Pigeon annotator does not have a URL.
    """
    raise NotImplementedError("Pigeon annotator does not have a URL.")
get_url_for_dataset(self, dataset_name)

Get the URL of the Pigeon annotator for a specific dataset.

Parameters:

Name Type Description Default
dataset_name str

Name of the dataset (annotation file).

required

Exceptions:

Type Description
NotImplementedError

Pigeon annotator does not have a URL.

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def get_url_for_dataset(self, dataset_name: str) -> str:
    """Get the URL of the Pigeon annotator for a specific dataset.

    Args:
        dataset_name: Name of the dataset (annotation file).

    Raises:
        NotImplementedError: Pigeon annotator does not have a URL.
    """
    raise NotImplementedError("Pigeon annotator does not have a URL.")
launch(self, **kwargs)

Launch the Pigeon annotator in the Jupyter notebook.

Parameters:

Name Type Description Default
**kwargs Any

Additional keyword arguments to pass to the annotation client.

{}

Exceptions:

Type Description
NotImplementedError

Pigeon annotator does not support launching with a URL.

Source code in zenml/integrations/pigeon/annotators/pigeon_annotator.py
def launch(self, **kwargs: Any) -> None:
    """Launch the Pigeon annotator in the Jupyter notebook.

    Args:
        **kwargs: Additional keyword arguments to pass to the annotation client.

    Raises:
        NotImplementedError: Pigeon annotator does not support launching with a URL.
    """
    raise NotImplementedError(
        "Pigeon annotator does not support launching with a URL."
    )

flavors special

Pigeon integration flavors.

pigeon_annotator_flavor

Pigeon annotator flavor.

PigeonAnnotatorConfig (BaseAnnotatorConfig, PigeonAnnotatorSettings, AuthenticationConfigMixin) pydantic-model

Config for the Pigeon annotator.

Attributes:

Name Type Description
output_dir str

The directory to store the annotations.

notebook_only ClassVar[bool]

Whether the annotator only works within a notebook.

Source code in zenml/integrations/pigeon/flavors/pigeon_annotator_flavor.py
class PigeonAnnotatorConfig(  # type: ignore[misc] # https://github.com/pydantic/pydantic/issues/4173
    BaseAnnotatorConfig, PigeonAnnotatorSettings, AuthenticationConfigMixin
):
    """Config for the Pigeon annotator.

    Attributes:
        output_dir: The directory to store the annotations.
        notebook_only: Whether the annotator only works within a notebook.
    """

    output_dir: str = "annotations"
PigeonAnnotatorFlavor (BaseAnnotatorFlavor)

Pigeon annotator flavor.

Source code in zenml/integrations/pigeon/flavors/pigeon_annotator_flavor.py
class PigeonAnnotatorFlavor(BaseAnnotatorFlavor):
    """Pigeon annotator flavor."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return PIGEON_ANNOTATOR_FLAVOR

    @property
    def docs_url(self) -> Optional[str]:
        """A url to point at docs explaining this flavor.

        Returns:
            A flavor docs url.
        """
        return self.generate_default_docs_url()

    @property
    def sdk_docs_url(self) -> Optional[str]:
        """A url to point at SDK docs explaining this flavor.

        Returns:
            A flavor SDK docs url.
        """
        return self.generate_default_sdk_docs_url()

    @property
    def logo_url(self) -> str:
        """A url to represent the flavor in the dashboard.

        Returns:
            The flavor logo.
        """
        return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/annotator/pigeon.png"

    @property
    def config_class(self) -> Type[PigeonAnnotatorConfig]:
        """Returns `PigeonAnnotatorConfig` config class.

        Returns:
            The config class.
        """
        return PigeonAnnotatorConfig

    @property
    def implementation_class(self) -> Type["PigeonAnnotator"]:
        """Implementation class for this flavor.

        Returns:
            The implementation class.
        """
        from zenml.integrations.pigeon.annotators import PigeonAnnotator

        return PigeonAnnotator
config_class: Type[zenml.integrations.pigeon.flavors.pigeon_annotator_flavor.PigeonAnnotatorConfig] property readonly

Returns PigeonAnnotatorConfig config class.

Returns:

Type Description
Type[zenml.integrations.pigeon.flavors.pigeon_annotator_flavor.PigeonAnnotatorConfig]

The config class.

docs_url: Optional[str] property readonly

A url to point at docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor docs url.

implementation_class: Type[PigeonAnnotator] property readonly

Implementation class for this flavor.

Returns:

Type Description
Type[PigeonAnnotator]

The implementation class.

logo_url: str property readonly

A url to represent the flavor in the dashboard.

Returns:

Type Description
str

The flavor logo.

name: str property readonly

Name of the flavor.

Returns:

Type Description
str

The name of the flavor.

sdk_docs_url: Optional[str] property readonly

A url to point at SDK docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor SDK docs url.

PigeonAnnotatorSettings (BaseSettings) pydantic-model

Settings for the Pigeon annotator.

Source code in zenml/integrations/pigeon/flavors/pigeon_annotator_flavor.py
class PigeonAnnotatorSettings(BaseSettings):
    """Settings for the Pigeon annotator."""