Argilla
zenml.integrations.argilla
special
Initialization of the Argilla integration.
ArgillaIntegration (Integration)
Definition of Argilla integration for ZenML.
Source code in zenml/integrations/argilla/__init__.py
class ArgillaIntegration(Integration):
"""Definition of Argilla integration for ZenML."""
NAME = ARGILLA
REQUIREMENTS = [
"argilla>=2.0.0",
]
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Argilla integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.argilla.flavors import (
ArgillaAnnotatorFlavor,
)
return [ArgillaAnnotatorFlavor]
flavors()
classmethod
Declare the stack component flavors for the Argilla integration.
Returns:
Type | Description |
---|---|
List[Type[zenml.stack.flavor.Flavor]] |
List of stack component flavors for this integration. |
Source code in zenml/integrations/argilla/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Argilla integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.argilla.flavors import (
ArgillaAnnotatorFlavor,
)
return [ArgillaAnnotatorFlavor]
annotators
special
Initialization of the Argilla annotators submodule.
argilla_annotator
Implementation of the Argilla annotation integration.
ArgillaAnnotator (BaseAnnotator, AuthenticationMixin)
Class to interact with the Argilla annotation interface.
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
class ArgillaAnnotator(BaseAnnotator, AuthenticationMixin):
"""Class to interact with the Argilla annotation interface."""
@property
def config(self) -> ArgillaAnnotatorConfig:
"""Returns the `ArgillaAnnotatorConfig` config.
Returns:
The configuration.
"""
return cast(ArgillaAnnotatorConfig, self._config)
@property
def settings_class(self) -> Type[ArgillaAnnotatorSettings]:
"""Settings class for the Argilla annotator.
Returns:
The settings class.
"""
return ArgillaAnnotatorSettings
def get_url(self) -> str:
"""Gets the top-level URL of the annotation interface.
Returns:
The URL of the annotation interface.
"""
return (
f"{self.config.instance_url}:{self.config.port}"
if self.config.port
else self.config.instance_url
)
def _get_client(self) -> ArgillaClient:
"""Gets the Argilla client.
Returns:
Argilla client.
"""
config = self.config
init_kwargs = {"api_url": self.get_url()}
# Set the API key from the secret or using settings
authentication_secret = self.get_authentication_secret()
if config.api_key and authentication_secret:
api_key = config.api_key
logger.debug(
"Both API key and authentication secret are provided. Using API key from settings as priority."
)
elif authentication_secret:
api_key = authentication_secret.secret_values.get("api_key", "")
logger.debug("Using API key from secret.")
elif config.api_key is not None:
api_key = config.api_key
logger.debug("Using API key from settings.")
if api_key:
init_kwargs["api_key"] = api_key
if config.headers is not None:
init_kwargs["headers"] = json.loads(config.headers)
if config.httpx_extra_kwargs is not None:
init_kwargs["httpx_extra_kwargs"] = json.loads(
config.httpx_extra_kwargs
)
try:
_ = rg.Argilla(**init_kwargs).me
except ArgillaAPIError as e:
logger.error(f"Failed to verify the Argilla instance: {str(e)}")
return rg.Argilla(**init_kwargs)
def get_url_for_dataset(self, dataset_name: str, **kwargs: Any) -> str:
"""Gets the URL of the annotation interface for the given dataset.
Args:
dataset_name: The name of the dataset.
**kwargs: Additional keyword arguments to pass to the Argilla client.
-workspace: The name of the workspace. By default, the first available.
Returns:
The URL of of the dataset annotation interface.
"""
workspace = kwargs.get("workspace")
dataset_id = self.get_dataset(
dataset_name=dataset_name, workspace=workspace
).id
return f"{self.get_url()}/dataset/{dataset_id}/annotation-mode"
def get_datasets(self, **kwargs: Any) -> List[Any]:
"""Gets the datasets currently available for annotation.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-workspace: The name of the workspace. By default, the first available.
If set, only the datasets in the workspace will be returned.
Returns:
A list of datasets.
"""
workspace = kwargs.get("workspace")
if workspace is None:
datasets = list(self._get_client().datasets)
else:
datasets = list(self._get_client().workspaces(workspace).datasets)
return datasets
def get_dataset_names(self, **kwargs: Any) -> List[str]:
"""Gets the names of the datasets.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-workspace: The name of the workspace. By default, the first available.
If set, only the dataset names in the workspace will be returned.
Returns:
A list of dataset names.
"""
workspace = kwargs.get("workspace")
if workspace is None:
dataset_names = [dataset.name for dataset in self.get_datasets()]
else:
dataset_names = [
dataset.name
for dataset in self.get_datasets(workspace=workspace)
]
return dataset_names
def _get_data_by_status(
self, dataset_name: str, status: str, workspace: Optional[str]
) -> Any:
"""Gets the dataset containing the data with the specified status.
Args:
dataset_name: The name of the dataset.
status: The response status to filter by ('completed' for labeled,
'pending' for unlabeled).
workspace: The name of the workspace. By default, the first available.
Returns:
The list of records with the specified status.
"""
dataset = self.get_dataset(
dataset_name=dataset_name, workspace=workspace
)
query = rg.Query(filter=rg.Filter([("status", "==", status)]))
return dataset.records(
query=query,
with_suggestions=True,
with_vectors=True,
with_responses=True,
).to_list()
def get_dataset_stats(
self, dataset_name: str, **kwargs: Any
) -> Tuple[int, int]:
"""Gets the statistics of the given dataset.
Args:
dataset_name: The name of the dataset.
**kwargs: Additional keyword arguments to pass to the Argilla client.
-workspace: The name of the workspace. By default, the first available.
Returns:
A tuple containing (labeled_task_count, unlabeled_task_count) for
the dataset.
"""
workspace = kwargs.get("workspace")
labeled_task_count = len(
self._get_data_by_status(
dataset_name=dataset_name,
status="completed",
workspace=workspace,
)
)
unlabeled_task_count = len(
self._get_data_by_status(
dataset_name=dataset_name,
status="pending",
workspace=workspace,
)
)
return (labeled_task_count, unlabeled_task_count)
def launch(self, **kwargs: Any) -> None:
"""Launches the annotation interface.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
"""
url = kwargs.get("api_url") or self.get_url()
if self._get_client():
webbrowser.open(url, new=1, autoraise=True)
else:
logger.warning(
"Could not launch annotation interface"
"because the connection could not be established."
)
def add_dataset(self, **kwargs: Any) -> Any:
"""Create a dataset for annotation.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-dataset_name: The name of the dataset.
-settings: The settings for the dataset.
-workspace: The name of the workspace. By default, the first available.
Returns:
An Argilla dataset object.
Raises:
ValueError: if `dataset_name` or `settings` aren't provided.
RuntimeError: if the workspace creation fails.
RuntimeError: if the dataset creation fails.
"""
dataset_name = kwargs.get("dataset_name")
settings = kwargs.get("settings")
workspace = kwargs.get("workspace")
if dataset_name is None or settings is None:
raise ValueError(
"`dataset_name` and `settings` keyword arguments are required."
)
if workspace is None and not self._get_client().workspaces:
workspace_to_create = rg.Workspace(name="argilla")
try:
workspace = workspace_to_create.create()
except Exception as e:
raise RuntimeError(
"Failed to create the `argilla` workspace."
) from e
try:
dataset = rg.Dataset(
name=dataset_name, workspace=workspace, settings=settings
)
logger.info(f"Creating the dataset '{dataset_name}' in Argilla...")
dataset.create()
logger.info(f"Dataset '{dataset_name}' successfully created.")
return self.get_dataset(
dataset_name=dataset_name, workspace=workspace
)
except Exception as e:
logger.error(
f"Failed to create dataset '{dataset_name}' in Argilla: {str(e)}"
)
raise RuntimeError(
f"Failed to create the dataset '{dataset_name}' in Argilla: {str(e)}"
) from e
def add_records(
self,
dataset_name: str,
records: Union[Any, List[Dict[str, Any]]],
workspace: Optional[str] = None,
mapping: Optional[Dict[str, str]] = None,
) -> Any:
"""Add records to an Argilla dataset for annotation.
Args:
dataset_name: The name of the dataset.
records: The records to add to the dataset.
workspace: The name of the workspace. By default, the first available.
mapping: The mapping of the records to the dataset fields. By default, None.
Raises:
RuntimeError: If the records cannot be loaded to Argilla.
"""
dataset = self.get_dataset(
dataset_name=dataset_name, workspace=workspace
)
try:
logger.info(
f"Loading the records to '{dataset_name}' in Argilla..."
)
dataset.records.log(records=records, mapping=mapping)
logger.info(
f"Records loaded successfully to Argilla for '{dataset_name}'."
)
except Exception as e:
logger.error(
f"Failed to load the records to Argilla for '{dataset_name}': {str(e)}"
)
raise RuntimeError(
f"Failed to load the records to Argilla: {str(e)}"
) from e
def get_dataset(self, **kwargs: Any) -> Any:
"""Gets the dataset with the given name.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-dataset_name: The name of the dataset.
-workspace: The name of the workspace. By default, the first available.
Returns:
The Argilla Dataset for the given name and workspace, if specified.
Raises:
ValueError: If the dataset name is not provided or if the dataset
does not exist.
"""
dataset_name = kwargs.get("dataset_name")
workspace = kwargs.get("workspace")
if not dataset_name:
raise ValueError("`dataset_name` keyword argument is required.")
try:
dataset = self._get_client().datasets(
name=dataset_name, workspace=workspace
)
if dataset is None:
logger.error(f"Dataset '{dataset_name}' not found.")
else:
return dataset
except ValueError as e:
logger.error(f"Dataset '{dataset_name}' not found.")
raise ValueError(f"Dataset '{dataset_name}' not found.") from e
def delete_dataset(self, **kwargs: Any) -> None:
"""Deletes a dataset from the annotation interface.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-dataset_name: The name of the dataset.
-workspace: The name of the workspace. By default, the first available
Raises:
ValueError: If the dataset name is not provided or if the datasets
is not found.
"""
dataset_name = kwargs.get("dataset_name")
workspace = kwargs.get("workspace")
if not dataset_name:
raise ValueError("`dataset_name` keyword argument is required.")
try:
dataset = self.get_dataset(
dataset_name=dataset_name, workspace=workspace
)
dataset.delete()
logger.info(f"Dataset '{dataset_name}' deleted successfully.")
except ValueError:
logger.warning(
f"Dataset '{dataset_name}' not found. Skipping deletion."
)
def get_labeled_data(self, **kwargs: Any) -> Any:
"""Gets the dataset containing the labeled data.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-dataset_name: The name of the dataset.
-workspace: The name of the workspace. By default, the first available.
Returns:
The list of annotated records.
Raises:
ValueError: If the dataset name is not provided.
"""
dataset_name = kwargs.get("dataset_name")
workspace = kwargs.get("workspace")
if not dataset_name:
raise ValueError("`dataset_name` keyword argument is required.")
return self._get_data_by_status(
dataset_name, workspace=workspace, status="completed"
)
def get_unlabeled_data(self, **kwargs: str) -> Any:
"""Gets the dataset containing the unlabeled data.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
Returns:
The list of pending records for annotation.
Raises:
ValueError: If the dataset name is not provided.
"""
dataset_name = kwargs.get("dataset_name")
workspace = kwargs.get("workspace")
if not dataset_name:
raise ValueError("`dataset_name` keyword argument is required.")
return self._get_data_by_status(
dataset_name, workspace=workspace, status="pending"
)
config: ArgillaAnnotatorConfig
property
readonly
Returns the ArgillaAnnotatorConfig
config.
Returns:
Type | Description |
---|---|
ArgillaAnnotatorConfig |
The configuration. |
settings_class: Type[zenml.integrations.argilla.flavors.argilla_annotator_flavor.ArgillaAnnotatorSettings]
property
readonly
Settings class for the Argilla annotator.
Returns:
Type | Description |
---|---|
Type[zenml.integrations.argilla.flavors.argilla_annotator_flavor.ArgillaAnnotatorSettings] |
The settings class. |
add_dataset(self, **kwargs)
Create a dataset for annotation.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
**kwargs |
Any |
Additional keyword arguments to pass to the Argilla client. -dataset_name: The name of the dataset. -settings: The settings for the dataset. -workspace: The name of the workspace. By default, the first available. |
{} |
Returns:
Type | Description |
---|---|
Any |
An Argilla dataset object. |
Exceptions:
Type | Description |
---|---|
ValueError |
if |
RuntimeError |
if the workspace creation fails. |
RuntimeError |
if the dataset creation fails. |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def add_dataset(self, **kwargs: Any) -> Any:
"""Create a dataset for annotation.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-dataset_name: The name of the dataset.
-settings: The settings for the dataset.
-workspace: The name of the workspace. By default, the first available.
Returns:
An Argilla dataset object.
Raises:
ValueError: if `dataset_name` or `settings` aren't provided.
RuntimeError: if the workspace creation fails.
RuntimeError: if the dataset creation fails.
"""
dataset_name = kwargs.get("dataset_name")
settings = kwargs.get("settings")
workspace = kwargs.get("workspace")
if dataset_name is None or settings is None:
raise ValueError(
"`dataset_name` and `settings` keyword arguments are required."
)
if workspace is None and not self._get_client().workspaces:
workspace_to_create = rg.Workspace(name="argilla")
try:
workspace = workspace_to_create.create()
except Exception as e:
raise RuntimeError(
"Failed to create the `argilla` workspace."
) from e
try:
dataset = rg.Dataset(
name=dataset_name, workspace=workspace, settings=settings
)
logger.info(f"Creating the dataset '{dataset_name}' in Argilla...")
dataset.create()
logger.info(f"Dataset '{dataset_name}' successfully created.")
return self.get_dataset(
dataset_name=dataset_name, workspace=workspace
)
except Exception as e:
logger.error(
f"Failed to create dataset '{dataset_name}' in Argilla: {str(e)}"
)
raise RuntimeError(
f"Failed to create the dataset '{dataset_name}' in Argilla: {str(e)}"
) from e
add_records(self, dataset_name, records, workspace=None, mapping=None)
Add records to an Argilla dataset for annotation.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset_name |
str |
The name of the dataset. |
required |
records |
Union[Any, List[Dict[str, Any]]] |
The records to add to the dataset. |
required |
workspace |
Optional[str] |
The name of the workspace. By default, the first available. |
None |
mapping |
Optional[Dict[str, str]] |
The mapping of the records to the dataset fields. By default, None. |
None |
Exceptions:
Type | Description |
---|---|
RuntimeError |
If the records cannot be loaded to Argilla. |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def add_records(
self,
dataset_name: str,
records: Union[Any, List[Dict[str, Any]]],
workspace: Optional[str] = None,
mapping: Optional[Dict[str, str]] = None,
) -> Any:
"""Add records to an Argilla dataset for annotation.
Args:
dataset_name: The name of the dataset.
records: The records to add to the dataset.
workspace: The name of the workspace. By default, the first available.
mapping: The mapping of the records to the dataset fields. By default, None.
Raises:
RuntimeError: If the records cannot be loaded to Argilla.
"""
dataset = self.get_dataset(
dataset_name=dataset_name, workspace=workspace
)
try:
logger.info(
f"Loading the records to '{dataset_name}' in Argilla..."
)
dataset.records.log(records=records, mapping=mapping)
logger.info(
f"Records loaded successfully to Argilla for '{dataset_name}'."
)
except Exception as e:
logger.error(
f"Failed to load the records to Argilla for '{dataset_name}': {str(e)}"
)
raise RuntimeError(
f"Failed to load the records to Argilla: {str(e)}"
) from e
delete_dataset(self, **kwargs)
Deletes a dataset from the annotation interface.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
**kwargs |
Any |
Additional keyword arguments to pass to the Argilla client. -dataset_name: The name of the dataset. -workspace: The name of the workspace. By default, the first available |
{} |
Exceptions:
Type | Description |
---|---|
ValueError |
If the dataset name is not provided or if the datasets is not found. |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def delete_dataset(self, **kwargs: Any) -> None:
"""Deletes a dataset from the annotation interface.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-dataset_name: The name of the dataset.
-workspace: The name of the workspace. By default, the first available
Raises:
ValueError: If the dataset name is not provided or if the datasets
is not found.
"""
dataset_name = kwargs.get("dataset_name")
workspace = kwargs.get("workspace")
if not dataset_name:
raise ValueError("`dataset_name` keyword argument is required.")
try:
dataset = self.get_dataset(
dataset_name=dataset_name, workspace=workspace
)
dataset.delete()
logger.info(f"Dataset '{dataset_name}' deleted successfully.")
except ValueError:
logger.warning(
f"Dataset '{dataset_name}' not found. Skipping deletion."
)
get_dataset(self, **kwargs)
Gets the dataset with the given name.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
**kwargs |
Any |
Additional keyword arguments to pass to the Argilla client. -dataset_name: The name of the dataset. -workspace: The name of the workspace. By default, the first available. |
{} |
Returns:
Type | Description |
---|---|
Any |
The Argilla Dataset for the given name and workspace, if specified. |
Exceptions:
Type | Description |
---|---|
ValueError |
If the dataset name is not provided or if the dataset does not exist. |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_dataset(self, **kwargs: Any) -> Any:
"""Gets the dataset with the given name.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-dataset_name: The name of the dataset.
-workspace: The name of the workspace. By default, the first available.
Returns:
The Argilla Dataset for the given name and workspace, if specified.
Raises:
ValueError: If the dataset name is not provided or if the dataset
does not exist.
"""
dataset_name = kwargs.get("dataset_name")
workspace = kwargs.get("workspace")
if not dataset_name:
raise ValueError("`dataset_name` keyword argument is required.")
try:
dataset = self._get_client().datasets(
name=dataset_name, workspace=workspace
)
if dataset is None:
logger.error(f"Dataset '{dataset_name}' not found.")
else:
return dataset
except ValueError as e:
logger.error(f"Dataset '{dataset_name}' not found.")
raise ValueError(f"Dataset '{dataset_name}' not found.") from e
get_dataset_names(self, **kwargs)
Gets the names of the datasets.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
**kwargs |
Any |
Additional keyword arguments to pass to the Argilla client. -workspace: The name of the workspace. By default, the first available. If set, only the dataset names in the workspace will be returned. |
{} |
Returns:
Type | Description |
---|---|
List[str] |
A list of dataset names. |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_dataset_names(self, **kwargs: Any) -> List[str]:
"""Gets the names of the datasets.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-workspace: The name of the workspace. By default, the first available.
If set, only the dataset names in the workspace will be returned.
Returns:
A list of dataset names.
"""
workspace = kwargs.get("workspace")
if workspace is None:
dataset_names = [dataset.name for dataset in self.get_datasets()]
else:
dataset_names = [
dataset.name
for dataset in self.get_datasets(workspace=workspace)
]
return dataset_names
get_dataset_stats(self, dataset_name, **kwargs)
Gets the statistics of the given dataset.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset_name |
str |
The name of the dataset. |
required |
**kwargs |
Any |
Additional keyword arguments to pass to the Argilla client. -workspace: The name of the workspace. By default, the first available. |
{} |
Returns:
Type | Description |
---|---|
Tuple[int, int] |
A tuple containing (labeled_task_count, unlabeled_task_count) for the dataset. |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_dataset_stats(
self, dataset_name: str, **kwargs: Any
) -> Tuple[int, int]:
"""Gets the statistics of the given dataset.
Args:
dataset_name: The name of the dataset.
**kwargs: Additional keyword arguments to pass to the Argilla client.
-workspace: The name of the workspace. By default, the first available.
Returns:
A tuple containing (labeled_task_count, unlabeled_task_count) for
the dataset.
"""
workspace = kwargs.get("workspace")
labeled_task_count = len(
self._get_data_by_status(
dataset_name=dataset_name,
status="completed",
workspace=workspace,
)
)
unlabeled_task_count = len(
self._get_data_by_status(
dataset_name=dataset_name,
status="pending",
workspace=workspace,
)
)
return (labeled_task_count, unlabeled_task_count)
get_datasets(self, **kwargs)
Gets the datasets currently available for annotation.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
**kwargs |
Any |
Additional keyword arguments to pass to the Argilla client. -workspace: The name of the workspace. By default, the first available. If set, only the datasets in the workspace will be returned. |
{} |
Returns:
Type | Description |
---|---|
List[Any] |
A list of datasets. |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_datasets(self, **kwargs: Any) -> List[Any]:
"""Gets the datasets currently available for annotation.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-workspace: The name of the workspace. By default, the first available.
If set, only the datasets in the workspace will be returned.
Returns:
A list of datasets.
"""
workspace = kwargs.get("workspace")
if workspace is None:
datasets = list(self._get_client().datasets)
else:
datasets = list(self._get_client().workspaces(workspace).datasets)
return datasets
get_labeled_data(self, **kwargs)
Gets the dataset containing the labeled data.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
**kwargs |
Any |
Additional keyword arguments to pass to the Argilla client. -dataset_name: The name of the dataset. -workspace: The name of the workspace. By default, the first available. |
{} |
Returns:
Type | Description |
---|---|
Any |
The list of annotated records. |
Exceptions:
Type | Description |
---|---|
ValueError |
If the dataset name is not provided. |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_labeled_data(self, **kwargs: Any) -> Any:
"""Gets the dataset containing the labeled data.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
-dataset_name: The name of the dataset.
-workspace: The name of the workspace. By default, the first available.
Returns:
The list of annotated records.
Raises:
ValueError: If the dataset name is not provided.
"""
dataset_name = kwargs.get("dataset_name")
workspace = kwargs.get("workspace")
if not dataset_name:
raise ValueError("`dataset_name` keyword argument is required.")
return self._get_data_by_status(
dataset_name, workspace=workspace, status="completed"
)
get_unlabeled_data(self, **kwargs)
Gets the dataset containing the unlabeled data.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
**kwargs |
str |
Additional keyword arguments to pass to the Argilla client. |
{} |
Returns:
Type | Description |
---|---|
Any |
The list of pending records for annotation. |
Exceptions:
Type | Description |
---|---|
ValueError |
If the dataset name is not provided. |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_unlabeled_data(self, **kwargs: str) -> Any:
"""Gets the dataset containing the unlabeled data.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
Returns:
The list of pending records for annotation.
Raises:
ValueError: If the dataset name is not provided.
"""
dataset_name = kwargs.get("dataset_name")
workspace = kwargs.get("workspace")
if not dataset_name:
raise ValueError("`dataset_name` keyword argument is required.")
return self._get_data_by_status(
dataset_name, workspace=workspace, status="pending"
)
get_url(self)
Gets the top-level URL of the annotation interface.
Returns:
Type | Description |
---|---|
str |
The URL of the annotation interface. |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_url(self) -> str:
"""Gets the top-level URL of the annotation interface.
Returns:
The URL of the annotation interface.
"""
return (
f"{self.config.instance_url}:{self.config.port}"
if self.config.port
else self.config.instance_url
)
get_url_for_dataset(self, dataset_name, **kwargs)
Gets the URL of the annotation interface for the given dataset.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset_name |
str |
The name of the dataset. |
required |
**kwargs |
Any |
Additional keyword arguments to pass to the Argilla client. -workspace: The name of the workspace. By default, the first available. |
{} |
Returns:
Type | Description |
---|---|
str |
The URL of of the dataset annotation interface. |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def get_url_for_dataset(self, dataset_name: str, **kwargs: Any) -> str:
"""Gets the URL of the annotation interface for the given dataset.
Args:
dataset_name: The name of the dataset.
**kwargs: Additional keyword arguments to pass to the Argilla client.
-workspace: The name of the workspace. By default, the first available.
Returns:
The URL of of the dataset annotation interface.
"""
workspace = kwargs.get("workspace")
dataset_id = self.get_dataset(
dataset_name=dataset_name, workspace=workspace
).id
return f"{self.get_url()}/dataset/{dataset_id}/annotation-mode"
launch(self, **kwargs)
Launches the annotation interface.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
**kwargs |
Any |
Additional keyword arguments to pass to the Argilla client. |
{} |
Source code in zenml/integrations/argilla/annotators/argilla_annotator.py
def launch(self, **kwargs: Any) -> None:
"""Launches the annotation interface.
Args:
**kwargs: Additional keyword arguments to pass to the Argilla client.
"""
url = kwargs.get("api_url") or self.get_url()
if self._get_client():
webbrowser.open(url, new=1, autoraise=True)
else:
logger.warning(
"Could not launch annotation interface"
"because the connection could not be established."
)
flavors
special
Argilla integration flavors.
argilla_annotator_flavor
Argilla annotator flavor.
ArgillaAnnotatorConfig (BaseAnnotatorConfig, ArgillaAnnotatorSettings, AuthenticationConfigMixin)
Config for the Argilla annotator.
This class combines settings and authentication configurations for Argilla into a single, usable configuration object without adding additional functionality.
Source code in zenml/integrations/argilla/flavors/argilla_annotator_flavor.py
class ArgillaAnnotatorConfig(
BaseAnnotatorConfig,
ArgillaAnnotatorSettings,
AuthenticationConfigMixin,
):
"""Config for the Argilla annotator.
This class combines settings and authentication configurations for
Argilla into a single, usable configuration object without adding
additional functionality.
"""
ArgillaAnnotatorFlavor (BaseAnnotatorFlavor)
Argilla annotator flavor.
Source code in zenml/integrations/argilla/flavors/argilla_annotator_flavor.py
class ArgillaAnnotatorFlavor(BaseAnnotatorFlavor):
"""Argilla annotator flavor."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return ARGILLA_ANNOTATOR_FLAVOR
@property
def docs_url(self) -> Optional[str]:
"""A url to point at docs explaining this flavor.
Returns:
A flavor docs url.
"""
return self.generate_default_docs_url()
@property
def sdk_docs_url(self) -> Optional[str]:
"""A url to point at SDK docs explaining this flavor.
Returns:
A flavor SDK docs url.
"""
return self.generate_default_sdk_docs_url()
@property
def logo_url(self) -> str:
"""A url to represent the flavor in the dashboard.
Returns:
The flavor logo.
"""
return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/annotator/argilla.png"
@property
def config_class(self) -> Type[ArgillaAnnotatorConfig]:
"""Returns `ArgillaAnnotatorConfig` config class.
Returns:
The config class.
"""
return ArgillaAnnotatorConfig
@property
def implementation_class(self) -> Type["ArgillaAnnotator"]:
"""Implementation class for this flavor.
Returns:
The implementation class.
"""
from zenml.integrations.argilla.annotators import (
ArgillaAnnotator,
)
return ArgillaAnnotator
config_class: Type[zenml.integrations.argilla.flavors.argilla_annotator_flavor.ArgillaAnnotatorConfig]
property
readonly
Returns ArgillaAnnotatorConfig
config class.
Returns:
Type | Description |
---|---|
Type[zenml.integrations.argilla.flavors.argilla_annotator_flavor.ArgillaAnnotatorConfig] |
The config class. |
docs_url: Optional[str]
property
readonly
A url to point at docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor docs url. |
implementation_class: Type[ArgillaAnnotator]
property
readonly
Implementation class for this flavor.
Returns:
Type | Description |
---|---|
Type[ArgillaAnnotator] |
The implementation class. |
logo_url: str
property
readonly
A url to represent the flavor in the dashboard.
Returns:
Type | Description |
---|---|
str |
The flavor logo. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
sdk_docs_url: Optional[str]
property
readonly
A url to point at SDK docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor SDK docs url. |
ArgillaAnnotatorSettings (BaseSettings)
Argilla annotator settings.
If you are using a private Hugging Face Spaces instance of Argilla you must pass in https_extra_kwargs.
Attributes:
Name | Type | Description |
---|---|---|
instance_url |
str |
URL of the Argilla instance. |
api_key |
Optional[str] |
The api_key for Argilla |
port |
Optional[int] |
The port to use for the annotation interface. |
headers |
Optional[str] |
Extra headers to include in the request. |
httpx_extra_kwargs |
Optional[str] |
Extra kwargs to pass to the client. |
Source code in zenml/integrations/argilla/flavors/argilla_annotator_flavor.py
class ArgillaAnnotatorSettings(BaseSettings):
"""Argilla annotator settings.
If you are using a private Hugging Face Spaces instance of Argilla you
must pass in https_extra_kwargs.
Attributes:
instance_url: URL of the Argilla instance.
api_key: The api_key for Argilla
port: The port to use for the annotation interface.
headers: Extra headers to include in the request.
httpx_extra_kwargs: Extra kwargs to pass to the client.
"""
instance_url: str = DEFAULT_LOCAL_INSTANCE_URL
api_key: Optional[str] = SecretField(default=None)
port: Optional[int] = DEFAULT_LOCAL_ARGILLA_PORT
headers: Optional[str] = None
httpx_extra_kwargs: Optional[str] = None
extra_headers: Optional[str] = None
_deprecation_validator = deprecation_utils.deprecate_pydantic_attributes(
("extra_headers", "headers"),
)
@field_validator("instance_url")
@classmethod
def ensure_instance_url_ends_without_slash(cls, instance_url: str) -> str:
"""Pydantic validator to ensure instance URL ends without a slash.
Args:
instance_url: The instance URL to validate.
Returns:
The validated instance URL.
"""
return instance_url.rstrip("/")
ensure_instance_url_ends_without_slash(instance_url)
classmethod
Pydantic validator to ensure instance URL ends without a slash.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
instance_url |
str |
The instance URL to validate. |
required |
Returns:
Type | Description |
---|---|
str |
The validated instance URL. |
Source code in zenml/integrations/argilla/flavors/argilla_annotator_flavor.py
@field_validator("instance_url")
@classmethod
def ensure_instance_url_ends_without_slash(cls, instance_url: str) -> str:
"""Pydantic validator to ensure instance URL ends without a slash.
Args:
instance_url: The instance URL to validate.
Returns:
The validated instance URL.
"""
return instance_url.rstrip("/")