Skip to content

Argilla

zenml.integrations.argilla special

Initialization of the Argilla integration.

ArgillaIntegration (Integration)

Definition of Argilla integration for ZenML.

Source code in zenml/integrations/argilla/__init__.py
class ArgillaIntegration(Integration):
    """Definition of Argilla integration for ZenML."""

    NAME = ARGILLA
    REQUIREMENTS = [
        "argilla>=2.0.0",
    ]

    @classmethod
    def flavors(cls) -> List[Type[Flavor]]:
        """Declare the stack component flavors for the Argilla integration.

        Returns:
            List of stack component flavors for this integration.
        """
        from zenml.integrations.argilla.flavors import (
            ArgillaAnnotatorFlavor,
        )

        return [ArgillaAnnotatorFlavor]

flavors() classmethod

Declare the stack component flavors for the Argilla integration.

Returns:

Type Description
List[Type[zenml.stack.flavor.Flavor]]

List of stack component flavors for this integration.

Source code in zenml/integrations/argilla/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
    """Declare the stack component flavors for the Argilla integration.

    Returns:
        List of stack component flavors for this integration.
    """
    from zenml.integrations.argilla.flavors import (
        ArgillaAnnotatorFlavor,
    )

    return [ArgillaAnnotatorFlavor]

annotators special

Initialization of the Argilla annotators submodule.

argilla_annotator

Implementation of the Argilla annotation integration.

ArgillaAnnotator (BaseAnnotator, AuthenticationMixin)

Class to interact with the Argilla annotation interface.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
class ArgillaAnnotator(BaseAnnotator, AuthenticationMixin):
    """Class to interact with the Argilla annotation interface."""

    @property
    def config(self) -> ArgillaAnnotatorConfig:
        """Returns the `ArgillaAnnotatorConfig` config.

        Returns:
            The configuration.
        """
        return cast(ArgillaAnnotatorConfig, self._config)

    @property
    def settings_class(self) -> Type[ArgillaAnnotatorSettings]:
        """Settings class for the Argilla annotator.

        Returns:
            The settings class.
        """
        return ArgillaAnnotatorSettings

    def get_url(self) -> str:
        """Gets the top-level URL of the annotation interface.

        Returns:
            The URL of the annotation interface.
        """
        return (
            f"{self.config.instance_url}:{self.config.port}"
            if self.config.port
            else self.config.instance_url
        )

    def _get_client(self) -> ArgillaClient:
        """Gets the Argilla client.

        Returns:
            Argilla client.
        """
        config = self.config
        init_kwargs = {"api_url": self.get_url()}

        # Set the API key from the secret or using settings
        authentication_secret = self.get_authentication_secret()
        if config.api_key and authentication_secret:
            api_key = config.api_key
            logger.debug(
                "Both API key and authentication secret are provided. Using API key from settings as priority."
            )
        elif authentication_secret:
            api_key = authentication_secret.secret_values.get("api_key", "")
            logger.debug("Using API key from secret.")
        elif config.api_key is not None:
            api_key = config.api_key
            logger.debug("Using API key from settings.")

        if api_key:
            init_kwargs["api_key"] = api_key

        if config.headers is not None:
            init_kwargs["headers"] = json.loads(config.headers)
        if config.httpx_extra_kwargs is not None:
            init_kwargs["httpx_extra_kwargs"] = json.loads(
                config.httpx_extra_kwargs
            )

        try:
            _ = rg.Argilla(**init_kwargs).me
        except ArgillaAPIError as e:
            logger.error(f"Failed to verify the Argilla instance: {str(e)}")
        return rg.Argilla(**init_kwargs)

    def get_url_for_dataset(self, dataset_name: str, **kwargs: Any) -> str:
        """Gets the URL of the annotation interface for the given dataset.

        Args:
            dataset_name: The name of the dataset.
            **kwargs: Additional keyword arguments to pass to the Argilla client.
                -workspace: The name of the workspace. By default, the first available.

        Returns:
            The URL of of the dataset annotation interface.
        """
        workspace = kwargs.get("workspace")

        dataset_id = self.get_dataset(
            dataset_name=dataset_name, workspace=workspace
        ).id
        return f"{self.get_url()}/dataset/{dataset_id}/annotation-mode"

    def get_datasets(self, **kwargs: Any) -> List[Any]:
        """Gets the datasets currently available for annotation.

        Args:
            **kwargs: Additional keyword arguments to pass to the Argilla client.
                -workspace: The name of the workspace. By default, the first available.
                    If set, only the datasets in the workspace will be returned.

        Returns:
            A list of datasets.
        """
        workspace = kwargs.get("workspace")

        if workspace is None:
            datasets = list(self._get_client().datasets)
        else:
            datasets = list(self._get_client().workspaces(workspace).datasets)

        return datasets

    def get_dataset_names(self, **kwargs: Any) -> List[str]:
        """Gets the names of the datasets.

        Args:
            **kwargs: Additional keyword arguments to pass to the Argilla client.
                -workspace: The name of the workspace. By default, the first available.
                    If set, only the dataset names in the workspace will be returned.

        Returns:
            A list of dataset names.
        """
        workspace = kwargs.get("workspace")

        if workspace is None:
            dataset_names = [dataset.name for dataset in self.get_datasets()]
        else:
            dataset_names = [
                dataset.name
                for dataset in self.get_datasets(workspace=workspace)
            ]

        return dataset_names

    def _get_data_by_status(
        self, dataset_name: str, status: str, workspace: Optional[str]
    ) -> Any:
        """Gets the dataset containing the data with the specified status.

        Args:
            dataset_name: The name of the dataset.
            status: The response status to filter by ('completed' for labeled,
                'pending' for unlabeled).
            workspace: The name of the workspace. By default, the first available.

        Returns:
            The list of records with the specified status.
        """
        dataset = self.get_dataset(
            dataset_name=dataset_name, workspace=workspace
        )

        query = rg.Query(filter=rg.Filter([("status", "==", status)]))

        return dataset.records(
            query=query,
            with_suggestions=True,
            with_vectors=True,
            with_responses=True,
        ).to_list()

    def get_dataset_stats(
        self, dataset_name: str, **kwargs: Any
    ) -> Tuple[int, int]:
        """Gets the statistics of the given dataset.

        Args:
            dataset_name: The name of the dataset.
            **kwargs: Additional keyword arguments to pass to the Argilla client.
                -workspace: The name of the workspace. By default, the first available.

        Returns:
            A tuple containing (labeled_task_count, unlabeled_task_count) for
                the dataset.
        """
        workspace = kwargs.get("workspace")

        labeled_task_count = len(
            self._get_data_by_status(
                dataset_name=dataset_name,
                status="completed",
                workspace=workspace,
            )
        )
        unlabeled_task_count = len(
            self._get_data_by_status(
                dataset_name=dataset_name,
                status="pending",
                workspace=workspace,
            )
        )

        return (labeled_task_count, unlabeled_task_count)

    def launch(self, **kwargs: Any) -> None:
        """Launches the annotation interface.

        Args:
            **kwargs: Additional keyword arguments to pass to the Argilla client.
        """
        url = kwargs.get("api_url") or self.get_url()

        if self._get_client():
            webbrowser.open(url, new=1, autoraise=True)
        else:
            logger.warning(
                "Could not launch annotation interface"
                "because the connection could not be established."
            )

    def add_dataset(self, **kwargs: Any) -> Any:
        """Create a dataset for annotation.

        Args:
            **kwargs: Additional keyword arguments to pass to the Argilla client.
                -dataset_name: The name of the dataset.
                -settings: The settings for the dataset.
                -workspace: The name of the workspace. By default, the first available.

        Returns:
            An Argilla dataset object.

        Raises:
            ValueError: if `dataset_name` or `settings` aren't provided.
            RuntimeError: if the workspace creation fails.
            RuntimeError: if the dataset creation fails.
        """
        dataset_name = kwargs.get("dataset_name")
        settings = kwargs.get("settings")
        workspace = kwargs.get("workspace")

        if dataset_name is None or settings is None:
            raise ValueError(
                "`dataset_name` and `settings` keyword arguments are required."
            )

        if workspace is None and not self._get_client().workspaces:
            workspace_to_create = rg.Workspace(name="argilla")
            try:
                workspace = workspace_to_create.create()
            except Exception as e:
                raise RuntimeError(
                    "Failed to create the `argilla` workspace."
                ) from e

        try:
            dataset = rg.Dataset(
                name=dataset_name, workspace=workspace, settings=settings
            )
            logger.info(f"Creating the dataset '{dataset_name}' in Argilla...")
            dataset.create()
            logger.info(f"Dataset '{dataset_name}' successfully created.")
            return self.get_dataset(
                dataset_name=dataset_name, workspace=workspace
            )
        except Exception as e:
            logger.error(
                f"Failed to create dataset '{dataset_name}' in Argilla: {str(e)}"
            )
            raise RuntimeError(
                f"Failed to create the dataset '{dataset_name}' in Argilla: {str(e)}"
            ) from e

    def add_records(
        self,
        dataset_name: str,
        records: Union[Any, List[Dict[str, Any]]],
        workspace: Optional[str] = None,
        mapping: Optional[Dict[str, str]] = None,
    ) -> Any:
        """Add records to an Argilla dataset for annotation.

        Args:
            dataset_name: The name of the dataset.
            records: The records to add to the dataset.
            workspace: The name of the workspace. By default, the first available.
            mapping: The mapping of the records to the dataset fields. By default, None.

        Raises:
            RuntimeError: If the records cannot be loaded to Argilla.
        """
        dataset = self.get_dataset(
            dataset_name=dataset_name, workspace=workspace
        )

        try:
            logger.info(
                f"Loading the records to '{dataset_name}' in Argilla..."
            )
            dataset.records.log(records=records, mapping=mapping)
            logger.info(
                f"Records loaded successfully to Argilla for '{dataset_name}'."
            )
        except Exception as e:
            logger.error(
                f"Failed to load the records to Argilla for '{dataset_name}': {str(e)}"
            )
            raise RuntimeError(
                f"Failed to load the records to Argilla: {str(e)}"
            ) from e

    def get_dataset(self, **kwargs: Any) -> Any:
        """Gets the dataset with the given name.

        Args:
            **kwargs: Additional keyword arguments to pass to the Argilla client.
                -dataset_name: The name of the dataset.
                -workspace: The name of the workspace. By default, the first available.

        Returns:
            The Argilla Dataset for the given name and workspace, if specified.

        Raises:
            ValueError: If the dataset name is not provided or if the dataset
                does not exist.
        """
        dataset_name = kwargs.get("dataset_name")
        workspace = kwargs.get("workspace")

        if not dataset_name:
            raise ValueError("`dataset_name` keyword argument is required.")

        try:
            dataset = self._get_client().datasets(
                name=dataset_name, workspace=workspace
            )
            if dataset is None:
                logger.error(f"Dataset '{dataset_name}' not found.")
            else:
                return dataset
        except ValueError as e:
            logger.error(f"Dataset '{dataset_name}' not found.")
            raise ValueError(f"Dataset '{dataset_name}' not found.") from e

    def delete_dataset(self, **kwargs: Any) -> None:
        """Deletes a dataset from the annotation interface.

        Args:
            **kwargs: Additional keyword arguments to pass to the Argilla client.
                -dataset_name: The name of the dataset.
                -workspace: The name of the workspace. By default, the first available

        Raises:
            ValueError: If the dataset name is not provided or if the datasets
                is not found.
        """
        dataset_name = kwargs.get("dataset_name")
        workspace = kwargs.get("workspace")

        if not dataset_name:
            raise ValueError("`dataset_name` keyword argument is required.")

        try:
            dataset = self.get_dataset(
                dataset_name=dataset_name, workspace=workspace
            )
            dataset.delete()
            logger.info(f"Dataset '{dataset_name}' deleted successfully.")
        except ValueError:
            logger.warning(
                f"Dataset '{dataset_name}' not found. Skipping deletion."
            )

    def get_labeled_data(self, **kwargs: Any) -> Any:
        """Gets the dataset containing the labeled data.

        Args:
            **kwargs: Additional keyword arguments to pass to the Argilla client.
                -dataset_name: The name of the dataset.
                -workspace: The name of the workspace. By default, the first available.

        Returns:
            The list of annotated records.

        Raises:
            ValueError: If the dataset name is not provided.
        """
        dataset_name = kwargs.get("dataset_name")
        workspace = kwargs.get("workspace")

        if not dataset_name:
            raise ValueError("`dataset_name` keyword argument is required.")

        return self._get_data_by_status(
            dataset_name, workspace=workspace, status="completed"
        )

    def get_unlabeled_data(self, **kwargs: str) -> Any:
        """Gets the dataset containing the unlabeled data.

        Args:
            **kwargs: Additional keyword arguments to pass to the Argilla client.

        Returns:
            The list of pending records for annotation.

        Raises:
            ValueError: If the dataset name is not provided.
        """
        dataset_name = kwargs.get("dataset_name")
        workspace = kwargs.get("workspace")

        if not dataset_name:
            raise ValueError("`dataset_name` keyword argument is required.")

        return self._get_data_by_status(
            dataset_name, workspace=workspace, status="pending"
        )
config: ArgillaAnnotatorConfig property readonly

Returns the ArgillaAnnotatorConfig config.

Returns:

Type Description
ArgillaAnnotatorConfig

The configuration.

settings_class: Type[zenml.integrations.argilla.flavors.argilla_annotator_flavor.ArgillaAnnotatorSettings] property readonly

Settings class for the Argilla annotator.

Returns:

Type Description
Type[zenml.integrations.argilla.flavors.argilla_annotator_flavor.ArgillaAnnotatorSettings]

The settings class.

add_dataset(self, **kwargs)

Create a dataset for annotation.

Parameters:

Name Type Description Default
**kwargs Any

Additional keyword arguments to pass to the Argilla client. -dataset_name: The name of the dataset. -settings: The settings for the dataset. -workspace: The name of the workspace. By default, the first available.

{}

Returns:

Type Description
Any

An Argilla dataset object.

Exceptions:

Type Description
ValueError

if dataset_name or settings aren't provided.

RuntimeError

if the workspace creation fails.

RuntimeError

if the dataset creation fails.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def add_dataset(self, **kwargs: Any) -> Any:
    """Create a dataset for annotation.

    Args:
        **kwargs: Additional keyword arguments to pass to the Argilla client.
            -dataset_name: The name of the dataset.
            -settings: The settings for the dataset.
            -workspace: The name of the workspace. By default, the first available.

    Returns:
        An Argilla dataset object.

    Raises:
        ValueError: if `dataset_name` or `settings` aren't provided.
        RuntimeError: if the workspace creation fails.
        RuntimeError: if the dataset creation fails.
    """
    dataset_name = kwargs.get("dataset_name")
    settings = kwargs.get("settings")
    workspace = kwargs.get("workspace")

    if dataset_name is None or settings is None:
        raise ValueError(
            "`dataset_name` and `settings` keyword arguments are required."
        )

    if workspace is None and not self._get_client().workspaces:
        workspace_to_create = rg.Workspace(name="argilla")
        try:
            workspace = workspace_to_create.create()
        except Exception as e:
            raise RuntimeError(
                "Failed to create the `argilla` workspace."
            ) from e

    try:
        dataset = rg.Dataset(
            name=dataset_name, workspace=workspace, settings=settings
        )
        logger.info(f"Creating the dataset '{dataset_name}' in Argilla...")
        dataset.create()
        logger.info(f"Dataset '{dataset_name}' successfully created.")
        return self.get_dataset(
            dataset_name=dataset_name, workspace=workspace
        )
    except Exception as e:
        logger.error(
            f"Failed to create dataset '{dataset_name}' in Argilla: {str(e)}"
        )
        raise RuntimeError(
            f"Failed to create the dataset '{dataset_name}' in Argilla: {str(e)}"
        ) from e
add_records(self, dataset_name, records, workspace=None, mapping=None)

Add records to an Argilla dataset for annotation.

Parameters:

Name Type Description Default
dataset_name str

The name of the dataset.

required
records Union[Any, List[Dict[str, Any]]]

The records to add to the dataset.

required
workspace Optional[str]

The name of the workspace. By default, the first available.

None
mapping Optional[Dict[str, str]]

The mapping of the records to the dataset fields. By default, None.

None

Exceptions:

Type Description
RuntimeError

If the records cannot be loaded to Argilla.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def add_records(
    self,
    dataset_name: str,
    records: Union[Any, List[Dict[str, Any]]],
    workspace: Optional[str] = None,
    mapping: Optional[Dict[str, str]] = None,
) -> Any:
    """Add records to an Argilla dataset for annotation.

    Args:
        dataset_name: The name of the dataset.
        records: The records to add to the dataset.
        workspace: The name of the workspace. By default, the first available.
        mapping: The mapping of the records to the dataset fields. By default, None.

    Raises:
        RuntimeError: If the records cannot be loaded to Argilla.
    """
    dataset = self.get_dataset(
        dataset_name=dataset_name, workspace=workspace
    )

    try:
        logger.info(
            f"Loading the records to '{dataset_name}' in Argilla..."
        )
        dataset.records.log(records=records, mapping=mapping)
        logger.info(
            f"Records loaded successfully to Argilla for '{dataset_name}'."
        )
    except Exception as e:
        logger.error(
            f"Failed to load the records to Argilla for '{dataset_name}': {str(e)}"
        )
        raise RuntimeError(
            f"Failed to load the records to Argilla: {str(e)}"
        ) from e
delete_dataset(self, **kwargs)

Deletes a dataset from the annotation interface.

Parameters:

Name Type Description Default
**kwargs Any

Additional keyword arguments to pass to the Argilla client. -dataset_name: The name of the dataset. -workspace: The name of the workspace. By default, the first available

{}

Exceptions:

Type Description
ValueError

If the dataset name is not provided or if the datasets is not found.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def delete_dataset(self, **kwargs: Any) -> None:
    """Deletes a dataset from the annotation interface.

    Args:
        **kwargs: Additional keyword arguments to pass to the Argilla client.
            -dataset_name: The name of the dataset.
            -workspace: The name of the workspace. By default, the first available

    Raises:
        ValueError: If the dataset name is not provided or if the datasets
            is not found.
    """
    dataset_name = kwargs.get("dataset_name")
    workspace = kwargs.get("workspace")

    if not dataset_name:
        raise ValueError("`dataset_name` keyword argument is required.")

    try:
        dataset = self.get_dataset(
            dataset_name=dataset_name, workspace=workspace
        )
        dataset.delete()
        logger.info(f"Dataset '{dataset_name}' deleted successfully.")
    except ValueError:
        logger.warning(
            f"Dataset '{dataset_name}' not found. Skipping deletion."
        )
get_dataset(self, **kwargs)

Gets the dataset with the given name.

Parameters:

Name Type Description Default
**kwargs Any

Additional keyword arguments to pass to the Argilla client. -dataset_name: The name of the dataset. -workspace: The name of the workspace. By default, the first available.

{}

Returns:

Type Description
Any

The Argilla Dataset for the given name and workspace, if specified.

Exceptions:

Type Description
ValueError

If the dataset name is not provided or if the dataset does not exist.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_dataset(self, **kwargs: Any) -> Any:
    """Gets the dataset with the given name.

    Args:
        **kwargs: Additional keyword arguments to pass to the Argilla client.
            -dataset_name: The name of the dataset.
            -workspace: The name of the workspace. By default, the first available.

    Returns:
        The Argilla Dataset for the given name and workspace, if specified.

    Raises:
        ValueError: If the dataset name is not provided or if the dataset
            does not exist.
    """
    dataset_name = kwargs.get("dataset_name")
    workspace = kwargs.get("workspace")

    if not dataset_name:
        raise ValueError("`dataset_name` keyword argument is required.")

    try:
        dataset = self._get_client().datasets(
            name=dataset_name, workspace=workspace
        )
        if dataset is None:
            logger.error(f"Dataset '{dataset_name}' not found.")
        else:
            return dataset
    except ValueError as e:
        logger.error(f"Dataset '{dataset_name}' not found.")
        raise ValueError(f"Dataset '{dataset_name}' not found.") from e
get_dataset_names(self, **kwargs)

Gets the names of the datasets.

Parameters:

Name Type Description Default
**kwargs Any

Additional keyword arguments to pass to the Argilla client. -workspace: The name of the workspace. By default, the first available. If set, only the dataset names in the workspace will be returned.

{}

Returns:

Type Description
List[str]

A list of dataset names.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_dataset_names(self, **kwargs: Any) -> List[str]:
    """Gets the names of the datasets.

    Args:
        **kwargs: Additional keyword arguments to pass to the Argilla client.
            -workspace: The name of the workspace. By default, the first available.
                If set, only the dataset names in the workspace will be returned.

    Returns:
        A list of dataset names.
    """
    workspace = kwargs.get("workspace")

    if workspace is None:
        dataset_names = [dataset.name for dataset in self.get_datasets()]
    else:
        dataset_names = [
            dataset.name
            for dataset in self.get_datasets(workspace=workspace)
        ]

    return dataset_names
get_dataset_stats(self, dataset_name, **kwargs)

Gets the statistics of the given dataset.

Parameters:

Name Type Description Default
dataset_name str

The name of the dataset.

required
**kwargs Any

Additional keyword arguments to pass to the Argilla client. -workspace: The name of the workspace. By default, the first available.

{}

Returns:

Type Description
Tuple[int, int]

A tuple containing (labeled_task_count, unlabeled_task_count) for the dataset.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_dataset_stats(
    self, dataset_name: str, **kwargs: Any
) -> Tuple[int, int]:
    """Gets the statistics of the given dataset.

    Args:
        dataset_name: The name of the dataset.
        **kwargs: Additional keyword arguments to pass to the Argilla client.
            -workspace: The name of the workspace. By default, the first available.

    Returns:
        A tuple containing (labeled_task_count, unlabeled_task_count) for
            the dataset.
    """
    workspace = kwargs.get("workspace")

    labeled_task_count = len(
        self._get_data_by_status(
            dataset_name=dataset_name,
            status="completed",
            workspace=workspace,
        )
    )
    unlabeled_task_count = len(
        self._get_data_by_status(
            dataset_name=dataset_name,
            status="pending",
            workspace=workspace,
        )
    )

    return (labeled_task_count, unlabeled_task_count)
get_datasets(self, **kwargs)

Gets the datasets currently available for annotation.

Parameters:

Name Type Description Default
**kwargs Any

Additional keyword arguments to pass to the Argilla client. -workspace: The name of the workspace. By default, the first available. If set, only the datasets in the workspace will be returned.

{}

Returns:

Type Description
List[Any]

A list of datasets.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_datasets(self, **kwargs: Any) -> List[Any]:
    """Gets the datasets currently available for annotation.

    Args:
        **kwargs: Additional keyword arguments to pass to the Argilla client.
            -workspace: The name of the workspace. By default, the first available.
                If set, only the datasets in the workspace will be returned.

    Returns:
        A list of datasets.
    """
    workspace = kwargs.get("workspace")

    if workspace is None:
        datasets = list(self._get_client().datasets)
    else:
        datasets = list(self._get_client().workspaces(workspace).datasets)

    return datasets
get_labeled_data(self, **kwargs)

Gets the dataset containing the labeled data.

Parameters:

Name Type Description Default
**kwargs Any

Additional keyword arguments to pass to the Argilla client. -dataset_name: The name of the dataset. -workspace: The name of the workspace. By default, the first available.

{}

Returns:

Type Description
Any

The list of annotated records.

Exceptions:

Type Description
ValueError

If the dataset name is not provided.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_labeled_data(self, **kwargs: Any) -> Any:
    """Gets the dataset containing the labeled data.

    Args:
        **kwargs: Additional keyword arguments to pass to the Argilla client.
            -dataset_name: The name of the dataset.
            -workspace: The name of the workspace. By default, the first available.

    Returns:
        The list of annotated records.

    Raises:
        ValueError: If the dataset name is not provided.
    """
    dataset_name = kwargs.get("dataset_name")
    workspace = kwargs.get("workspace")

    if not dataset_name:
        raise ValueError("`dataset_name` keyword argument is required.")

    return self._get_data_by_status(
        dataset_name, workspace=workspace, status="completed"
    )
get_unlabeled_data(self, **kwargs)

Gets the dataset containing the unlabeled data.

Parameters:

Name Type Description Default
**kwargs str

Additional keyword arguments to pass to the Argilla client.

{}

Returns:

Type Description
Any

The list of pending records for annotation.

Exceptions:

Type Description
ValueError

If the dataset name is not provided.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_unlabeled_data(self, **kwargs: str) -> Any:
    """Gets the dataset containing the unlabeled data.

    Args:
        **kwargs: Additional keyword arguments to pass to the Argilla client.

    Returns:
        The list of pending records for annotation.

    Raises:
        ValueError: If the dataset name is not provided.
    """
    dataset_name = kwargs.get("dataset_name")
    workspace = kwargs.get("workspace")

    if not dataset_name:
        raise ValueError("`dataset_name` keyword argument is required.")

    return self._get_data_by_status(
        dataset_name, workspace=workspace, status="pending"
    )
get_url(self)

Gets the top-level URL of the annotation interface.

Returns:

Type Description
str

The URL of the annotation interface.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_url(self) -> str:
    """Gets the top-level URL of the annotation interface.

    Returns:
        The URL of the annotation interface.
    """
    return (
        f"{self.config.instance_url}:{self.config.port}"
        if self.config.port
        else self.config.instance_url
    )
get_url_for_dataset(self, dataset_name, **kwargs)

Gets the URL of the annotation interface for the given dataset.

Parameters:

Name Type Description Default
dataset_name str

The name of the dataset.

required
**kwargs Any

Additional keyword arguments to pass to the Argilla client. -workspace: The name of the workspace. By default, the first available.

{}

Returns:

Type Description
str

The URL of of the dataset annotation interface.

Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_url_for_dataset(self, dataset_name: str, **kwargs: Any) -> str:
    """Gets the URL of the annotation interface for the given dataset.

    Args:
        dataset_name: The name of the dataset.
        **kwargs: Additional keyword arguments to pass to the Argilla client.
            -workspace: The name of the workspace. By default, the first available.

    Returns:
        The URL of of the dataset annotation interface.
    """
    workspace = kwargs.get("workspace")

    dataset_id = self.get_dataset(
        dataset_name=dataset_name, workspace=workspace
    ).id
    return f"{self.get_url()}/dataset/{dataset_id}/annotation-mode"
launch(self, **kwargs)

Launches the annotation interface.

Parameters:

Name Type Description Default
**kwargs Any

Additional keyword arguments to pass to the Argilla client.

{}
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def launch(self, **kwargs: Any) -> None:
    """Launches the annotation interface.

    Args:
        **kwargs: Additional keyword arguments to pass to the Argilla client.
    """
    url = kwargs.get("api_url") or self.get_url()

    if self._get_client():
        webbrowser.open(url, new=1, autoraise=True)
    else:
        logger.warning(
            "Could not launch annotation interface"
            "because the connection could not be established."
        )

flavors special

Argilla integration flavors.

argilla_annotator_flavor

Argilla annotator flavor.

ArgillaAnnotatorConfig (BaseAnnotatorConfig, ArgillaAnnotatorSettings, AuthenticationConfigMixin)

Config for the Argilla annotator.

This class combines settings and authentication configurations for Argilla into a single, usable configuration object without adding additional functionality.

Source code in zenml/integrations/argilla/flavors/argilla_annotator_flavor.py
class ArgillaAnnotatorConfig(
    BaseAnnotatorConfig,
    ArgillaAnnotatorSettings,
    AuthenticationConfigMixin,
):
    """Config for the Argilla annotator.

    This class combines settings and authentication configurations for
    Argilla into a single, usable configuration object without adding
    additional functionality.
    """
ArgillaAnnotatorFlavor (BaseAnnotatorFlavor)

Argilla annotator flavor.

Source code in zenml/integrations/argilla/flavors/argilla_annotator_flavor.py
class ArgillaAnnotatorFlavor(BaseAnnotatorFlavor):
    """Argilla annotator flavor."""

    @property
    def name(self) -> str:
        """Name of the flavor.

        Returns:
            The name of the flavor.
        """
        return ARGILLA_ANNOTATOR_FLAVOR

    @property
    def docs_url(self) -> Optional[str]:
        """A url to point at docs explaining this flavor.

        Returns:
            A flavor docs url.
        """
        return self.generate_default_docs_url()

    @property
    def sdk_docs_url(self) -> Optional[str]:
        """A url to point at SDK docs explaining this flavor.

        Returns:
            A flavor SDK docs url.
        """
        return self.generate_default_sdk_docs_url()

    @property
    def logo_url(self) -> str:
        """A url to represent the flavor in the dashboard.

        Returns:
            The flavor logo.
        """
        return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/annotator/argilla.png"

    @property
    def config_class(self) -> Type[ArgillaAnnotatorConfig]:
        """Returns `ArgillaAnnotatorConfig` config class.

        Returns:
                The config class.
        """
        return ArgillaAnnotatorConfig

    @property
    def implementation_class(self) -> Type["ArgillaAnnotator"]:
        """Implementation class for this flavor.

        Returns:
            The implementation class.
        """
        from zenml.integrations.argilla.annotators import (
            ArgillaAnnotator,
        )

        return ArgillaAnnotator
config_class: Type[zenml.integrations.argilla.flavors.argilla_annotator_flavor.ArgillaAnnotatorConfig] property readonly

Returns ArgillaAnnotatorConfig config class.

Returns:

Type Description
Type[zenml.integrations.argilla.flavors.argilla_annotator_flavor.ArgillaAnnotatorConfig]

The config class.

docs_url: Optional[str] property readonly

A url to point at docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor docs url.

implementation_class: Type[ArgillaAnnotator] property readonly

Implementation class for this flavor.

Returns:

Type Description
Type[ArgillaAnnotator]

The implementation class.

logo_url: str property readonly

A url to represent the flavor in the dashboard.

Returns:

Type Description
str

The flavor logo.

name: str property readonly

Name of the flavor.

Returns:

Type Description
str

The name of the flavor.

sdk_docs_url: Optional[str] property readonly

A url to point at SDK docs explaining this flavor.

Returns:

Type Description
Optional[str]

A flavor SDK docs url.

ArgillaAnnotatorSettings (BaseSettings)

Argilla annotator settings.

If you are using a private Hugging Face Spaces instance of Argilla you must pass in https_extra_kwargs.

Attributes:

Name Type Description
instance_url str

URL of the Argilla instance.

api_key Optional[str]

The api_key for Argilla

port Optional[int]

The port to use for the annotation interface.

headers Optional[str]

Extra headers to include in the request.

httpx_extra_kwargs Optional[str]

Extra kwargs to pass to the client.

Source code in zenml/integrations/argilla/flavors/argilla_annotator_flavor.py
class ArgillaAnnotatorSettings(BaseSettings):
    """Argilla annotator settings.

    If you are using a private Hugging Face Spaces instance of Argilla you
        must pass in https_extra_kwargs.

    Attributes:
        instance_url: URL of the Argilla instance.
        api_key: The api_key for Argilla
        port: The port to use for the annotation interface.
        headers: Extra headers to include in the request.
        httpx_extra_kwargs: Extra kwargs to pass to the client.
    """

    instance_url: str = DEFAULT_LOCAL_INSTANCE_URL
    api_key: Optional[str] = SecretField(default=None)
    port: Optional[int] = DEFAULT_LOCAL_ARGILLA_PORT
    headers: Optional[str] = None
    httpx_extra_kwargs: Optional[str] = None

    extra_headers: Optional[str] = None

    _deprecation_validator = deprecation_utils.deprecate_pydantic_attributes(
        ("extra_headers", "headers"),
    )

    @field_validator("instance_url")
    @classmethod
    def ensure_instance_url_ends_without_slash(cls, instance_url: str) -> str:
        """Pydantic validator to ensure instance URL ends without a slash.

        Args:
            instance_url: The instance URL to validate.

        Returns:
            The validated instance URL.
        """
        return instance_url.rstrip("/")
ensure_instance_url_ends_without_slash(instance_url) classmethod

Pydantic validator to ensure instance URL ends without a slash.

Parameters:

Name Type Description Default
instance_url str

The instance URL to validate.

required

Returns:

Type Description
str

The validated instance URL.

Source code in zenml/integrations/argilla/flavors/argilla_annotator_flavor.py
@field_validator("instance_url")
@classmethod
def ensure_instance_url_ends_without_slash(cls, instance_url: str) -> str:
    """Pydantic validator to ensure instance URL ends without a slash.

    Args:
        instance_url: The instance URL to validate.

    Returns:
        The validated instance URL.
    """
    return instance_url.rstrip("/")