Skip to content

Annotators

zenml.annotators

Initialization of the ZenML annotator stack component.

Attributes

__all__ = ['BaseAnnotator'] module-attribute

Classes

BaseAnnotator(name: str, id: UUID, config: StackComponentConfig, flavor: str, type: StackComponentType, user: Optional[UUID], created: datetime, updated: datetime, labels: Optional[Dict[str, Any]] = None, connector_requirements: Optional[ServiceConnectorRequirements] = None, connector: Optional[UUID] = None, connector_resource_id: Optional[str] = None, *args: Any, **kwargs: Any)

Bases: StackComponent, ABC

Base class for all ZenML annotators.

Source code in src/zenml/stack/stack_component.py
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
def __init__(
    self,
    name: str,
    id: UUID,
    config: StackComponentConfig,
    flavor: str,
    type: StackComponentType,
    user: Optional[UUID],
    created: datetime,
    updated: datetime,
    labels: Optional[Dict[str, Any]] = None,
    connector_requirements: Optional[ServiceConnectorRequirements] = None,
    connector: Optional[UUID] = None,
    connector_resource_id: Optional[str] = None,
    *args: Any,
    **kwargs: Any,
):
    """Initializes a StackComponent.

    Args:
        name: The name of the component.
        id: The unique ID of the component.
        config: The config of the component.
        flavor: The flavor of the component.
        type: The type of the component.
        user: The ID of the user who created the component.
        created: The creation time of the component.
        updated: The last update time of the component.
        labels: The labels of the component.
        connector_requirements: The requirements for the connector.
        connector: The ID of a connector linked to the component.
        connector_resource_id: The custom resource ID to access through
            the connector.
        *args: Additional positional arguments.
        **kwargs: Additional keyword arguments.

    Raises:
        ValueError: If a secret reference is passed as name.
    """
    if secret_utils.is_secret_reference(name):
        raise ValueError(
            "Passing the `name` attribute of a stack component as a "
            "secret reference is not allowed."
        )

    self.id = id
    self.name = name
    self._config = config
    self.flavor = flavor
    self.type = type
    self.user = user
    self.created = created
    self.updated = updated
    self.labels = labels
    self.connector_requirements = connector_requirements
    self.connector = connector
    self.connector_resource_id = connector_resource_id
    self._connector_instance: Optional[ServiceConnector] = None
Attributes
config: BaseAnnotatorConfig property

Returns the BaseAnnotatorConfig config.

Returns:

Type Description
BaseAnnotatorConfig

The configuration.

Functions
add_dataset(**kwargs: Any) -> Any abstractmethod

Registers a dataset for annotation.

Parameters:

Name Type Description Default
**kwargs Any

keyword arguments.

{}

Returns:

Type Description
Any

The dataset or confirmation object on adding the dataset.

Source code in src/zenml/annotators/base_annotator.py
102
103
104
105
106
107
108
109
110
111
@abstractmethod
def add_dataset(self, **kwargs: Any) -> Any:
    """Registers a dataset for annotation.

    Args:
        **kwargs: keyword arguments.

    Returns:
        The dataset or confirmation object on adding the dataset.
    """
delete_dataset(**kwargs: Any) -> None abstractmethod

Deletes a dataset.

Parameters:

Name Type Description Default
**kwargs Any

keyword arguments.

{}
Source code in src/zenml/annotators/base_annotator.py
124
125
126
127
128
129
130
@abstractmethod
def delete_dataset(self, **kwargs: Any) -> None:
    """Deletes a dataset.

    Args:
        **kwargs: keyword arguments.
    """
get_dataset(**kwargs: Any) -> Any abstractmethod

Gets the dataset with the given name.

Parameters:

Name Type Description Default
**kwargs Any

keyword arguments.

{}

Returns:

Type Description
Any

The dataset with the given name.

Source code in src/zenml/annotators/base_annotator.py
113
114
115
116
117
118
119
120
121
122
@abstractmethod
def get_dataset(self, **kwargs: Any) -> Any:
    """Gets the dataset with the given name.

    Args:
        **kwargs: keyword arguments.

    Returns:
        The dataset with the given name.
    """
get_dataset_names() -> List[str] abstractmethod

Gets the names of the datasets currently available for annotation.

Returns:

Type Description
List[str]

The names of the datasets currently available for annotation.

Source code in src/zenml/annotators/base_annotator.py
73
74
75
76
77
78
79
@abstractmethod
def get_dataset_names(self) -> List[str]:
    """Gets the names of the datasets currently available for annotation.

    Returns:
        The names of the datasets currently available for annotation.
    """
get_dataset_stats(dataset_name: str) -> Tuple[int, int] abstractmethod

Gets the statistics of a dataset.

Parameters:

Name Type Description Default
dataset_name str

name of the dataset.

required

Returns:

Type Description
Tuple[int, int]

A tuple containing (labeled_task_count, unlabeled_task_count) for the dataset.

Source code in src/zenml/annotators/base_annotator.py
81
82
83
84
85
86
87
88
89
90
91
@abstractmethod
def get_dataset_stats(self, dataset_name: str) -> Tuple[int, int]:
    """Gets the statistics of a dataset.

    Args:
        dataset_name: name of the dataset.

    Returns:
        A tuple containing (labeled_task_count, unlabeled_task_count) for
            the dataset.
    """
get_datasets() -> List[Any] abstractmethod

Gets the datasets currently available for annotation.

Returns:

Type Description
List[Any]

The datasets currently available for annotation.

Source code in src/zenml/annotators/base_annotator.py
65
66
67
68
69
70
71
@abstractmethod
def get_datasets(self) -> List[Any]:
    """Gets the datasets currently available for annotation.

    Returns:
        The datasets currently available for annotation.
    """
get_labeled_data(**kwargs: Any) -> Any abstractmethod

Gets the labeled data for the given dataset.

Parameters:

Name Type Description Default
**kwargs Any

keyword arguments.

{}

Returns:

Type Description
Any

The labeled data for the given dataset.

Source code in src/zenml/annotators/base_annotator.py
132
133
134
135
136
137
138
139
140
141
@abstractmethod
def get_labeled_data(self, **kwargs: Any) -> Any:
    """Gets the labeled data for the given dataset.

    Args:
        **kwargs: keyword arguments.

    Returns:
        The labeled data for the given dataset.
    """
get_unlabeled_data(**kwargs: str) -> Any abstractmethod

Gets the unlabeled data for the given dataset.

Parameters:

Name Type Description Default
**kwargs str

Additional keyword arguments to pass to the Label Studio client.

{}

Returns:

Type Description
Any

The unlabeled data for the given dataset.

Source code in src/zenml/annotators/base_annotator.py
143
144
145
146
147
148
149
150
151
152
@abstractmethod
def get_unlabeled_data(self, **kwargs: str) -> Any:
    """Gets the unlabeled data for the given dataset.

    Args:
        **kwargs: Additional keyword arguments to pass to the Label Studio client.

    Returns:
        The unlabeled data for the given dataset.
    """
get_url() -> str abstractmethod

Gets the URL of the annotation interface.

Returns:

Type Description
str

The URL of the annotation interface.

Source code in src/zenml/annotators/base_annotator.py
46
47
48
49
50
51
52
@abstractmethod
def get_url(self) -> str:
    """Gets the URL of the annotation interface.

    Returns:
        The URL of the annotation interface.
    """
get_url_for_dataset(dataset_name: str) -> str abstractmethod

Gets the URL of the annotation interface for a specific dataset.

Parameters:

Name Type Description Default
dataset_name str

name of the dataset.

required

Returns:

Type Description
str

The URL of the dataset annotation interface.

Source code in src/zenml/annotators/base_annotator.py
54
55
56
57
58
59
60
61
62
63
@abstractmethod
def get_url_for_dataset(self, dataset_name: str) -> str:
    """Gets the URL of the annotation interface for a specific dataset.

    Args:
        dataset_name: name of the dataset.

    Returns:
        The URL of the dataset annotation interface.
    """
launch(**kwargs: Any) -> None abstractmethod

Launches the annotation interface.

Parameters:

Name Type Description Default
**kwargs Any

Additional keyword arguments to pass to the annotation client.

{}
Source code in src/zenml/annotators/base_annotator.py
 93
 94
 95
 96
 97
 98
 99
100
@abstractmethod
def launch(self, **kwargs: Any) -> None:
    """Launches the annotation interface.

    Args:
        **kwargs: Additional keyword arguments to pass to the
            annotation client.
    """

Modules

base_annotator

Base class for ZenML annotator stack components.

Classes
BaseAnnotator(name: str, id: UUID, config: StackComponentConfig, flavor: str, type: StackComponentType, user: Optional[UUID], created: datetime, updated: datetime, labels: Optional[Dict[str, Any]] = None, connector_requirements: Optional[ServiceConnectorRequirements] = None, connector: Optional[UUID] = None, connector_resource_id: Optional[str] = None, *args: Any, **kwargs: Any)

Bases: StackComponent, ABC

Base class for all ZenML annotators.

Source code in src/zenml/stack/stack_component.py
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
def __init__(
    self,
    name: str,
    id: UUID,
    config: StackComponentConfig,
    flavor: str,
    type: StackComponentType,
    user: Optional[UUID],
    created: datetime,
    updated: datetime,
    labels: Optional[Dict[str, Any]] = None,
    connector_requirements: Optional[ServiceConnectorRequirements] = None,
    connector: Optional[UUID] = None,
    connector_resource_id: Optional[str] = None,
    *args: Any,
    **kwargs: Any,
):
    """Initializes a StackComponent.

    Args:
        name: The name of the component.
        id: The unique ID of the component.
        config: The config of the component.
        flavor: The flavor of the component.
        type: The type of the component.
        user: The ID of the user who created the component.
        created: The creation time of the component.
        updated: The last update time of the component.
        labels: The labels of the component.
        connector_requirements: The requirements for the connector.
        connector: The ID of a connector linked to the component.
        connector_resource_id: The custom resource ID to access through
            the connector.
        *args: Additional positional arguments.
        **kwargs: Additional keyword arguments.

    Raises:
        ValueError: If a secret reference is passed as name.
    """
    if secret_utils.is_secret_reference(name):
        raise ValueError(
            "Passing the `name` attribute of a stack component as a "
            "secret reference is not allowed."
        )

    self.id = id
    self.name = name
    self._config = config
    self.flavor = flavor
    self.type = type
    self.user = user
    self.created = created
    self.updated = updated
    self.labels = labels
    self.connector_requirements = connector_requirements
    self.connector = connector
    self.connector_resource_id = connector_resource_id
    self._connector_instance: Optional[ServiceConnector] = None
Attributes
config: BaseAnnotatorConfig property

Returns the BaseAnnotatorConfig config.

Returns:

Type Description
BaseAnnotatorConfig

The configuration.

Functions
add_dataset(**kwargs: Any) -> Any abstractmethod

Registers a dataset for annotation.

Parameters:

Name Type Description Default
**kwargs Any

keyword arguments.

{}

Returns:

Type Description
Any

The dataset or confirmation object on adding the dataset.

Source code in src/zenml/annotators/base_annotator.py
102
103
104
105
106
107
108
109
110
111
@abstractmethod
def add_dataset(self, **kwargs: Any) -> Any:
    """Registers a dataset for annotation.

    Args:
        **kwargs: keyword arguments.

    Returns:
        The dataset or confirmation object on adding the dataset.
    """
delete_dataset(**kwargs: Any) -> None abstractmethod

Deletes a dataset.

Parameters:

Name Type Description Default
**kwargs Any

keyword arguments.

{}
Source code in src/zenml/annotators/base_annotator.py
124
125
126
127
128
129
130
@abstractmethod
def delete_dataset(self, **kwargs: Any) -> None:
    """Deletes a dataset.

    Args:
        **kwargs: keyword arguments.
    """
get_dataset(**kwargs: Any) -> Any abstractmethod

Gets the dataset with the given name.

Parameters:

Name Type Description Default
**kwargs Any

keyword arguments.

{}

Returns:

Type Description
Any

The dataset with the given name.

Source code in src/zenml/annotators/base_annotator.py
113
114
115
116
117
118
119
120
121
122
@abstractmethod
def get_dataset(self, **kwargs: Any) -> Any:
    """Gets the dataset with the given name.

    Args:
        **kwargs: keyword arguments.

    Returns:
        The dataset with the given name.
    """
get_dataset_names() -> List[str] abstractmethod

Gets the names of the datasets currently available for annotation.

Returns:

Type Description
List[str]

The names of the datasets currently available for annotation.

Source code in src/zenml/annotators/base_annotator.py
73
74
75
76
77
78
79
@abstractmethod
def get_dataset_names(self) -> List[str]:
    """Gets the names of the datasets currently available for annotation.

    Returns:
        The names of the datasets currently available for annotation.
    """
get_dataset_stats(dataset_name: str) -> Tuple[int, int] abstractmethod

Gets the statistics of a dataset.

Parameters:

Name Type Description Default
dataset_name str

name of the dataset.

required

Returns:

Type Description
Tuple[int, int]

A tuple containing (labeled_task_count, unlabeled_task_count) for the dataset.

Source code in src/zenml/annotators/base_annotator.py
81
82
83
84
85
86
87
88
89
90
91
@abstractmethod
def get_dataset_stats(self, dataset_name: str) -> Tuple[int, int]:
    """Gets the statistics of a dataset.

    Args:
        dataset_name: name of the dataset.

    Returns:
        A tuple containing (labeled_task_count, unlabeled_task_count) for
            the dataset.
    """
get_datasets() -> List[Any] abstractmethod

Gets the datasets currently available for annotation.

Returns:

Type Description
List[Any]

The datasets currently available for annotation.

Source code in src/zenml/annotators/base_annotator.py
65
66
67
68
69
70
71
@abstractmethod
def get_datasets(self) -> List[Any]:
    """Gets the datasets currently available for annotation.

    Returns:
        The datasets currently available for annotation.
    """
get_labeled_data(**kwargs: Any) -> Any abstractmethod

Gets the labeled data for the given dataset.

Parameters:

Name Type Description Default
**kwargs Any

keyword arguments.

{}

Returns:

Type Description
Any

The labeled data for the given dataset.

Source code in src/zenml/annotators/base_annotator.py
132
133
134
135
136
137
138
139
140
141
@abstractmethod
def get_labeled_data(self, **kwargs: Any) -> Any:
    """Gets the labeled data for the given dataset.

    Args:
        **kwargs: keyword arguments.

    Returns:
        The labeled data for the given dataset.
    """
get_unlabeled_data(**kwargs: str) -> Any abstractmethod

Gets the unlabeled data for the given dataset.

Parameters:

Name Type Description Default
**kwargs str

Additional keyword arguments to pass to the Label Studio client.

{}

Returns:

Type Description
Any

The unlabeled data for the given dataset.

Source code in src/zenml/annotators/base_annotator.py
143
144
145
146
147
148
149
150
151
152
@abstractmethod
def get_unlabeled_data(self, **kwargs: str) -> Any:
    """Gets the unlabeled data for the given dataset.

    Args:
        **kwargs: Additional keyword arguments to pass to the Label Studio client.

    Returns:
        The unlabeled data for the given dataset.
    """
get_url() -> str abstractmethod

Gets the URL of the annotation interface.

Returns:

Type Description
str

The URL of the annotation interface.

Source code in src/zenml/annotators/base_annotator.py
46
47
48
49
50
51
52
@abstractmethod
def get_url(self) -> str:
    """Gets the URL of the annotation interface.

    Returns:
        The URL of the annotation interface.
    """
get_url_for_dataset(dataset_name: str) -> str abstractmethod

Gets the URL of the annotation interface for a specific dataset.

Parameters:

Name Type Description Default
dataset_name str

name of the dataset.

required

Returns:

Type Description
str

The URL of the dataset annotation interface.

Source code in src/zenml/annotators/base_annotator.py
54
55
56
57
58
59
60
61
62
63
@abstractmethod
def get_url_for_dataset(self, dataset_name: str) -> str:
    """Gets the URL of the annotation interface for a specific dataset.

    Args:
        dataset_name: name of the dataset.

    Returns:
        The URL of the dataset annotation interface.
    """
launch(**kwargs: Any) -> None abstractmethod

Launches the annotation interface.

Parameters:

Name Type Description Default
**kwargs Any

Additional keyword arguments to pass to the annotation client.

{}
Source code in src/zenml/annotators/base_annotator.py
 93
 94
 95
 96
 97
 98
 99
100
@abstractmethod
def launch(self, **kwargs: Any) -> None:
    """Launches the annotation interface.

    Args:
        **kwargs: Additional keyword arguments to pass to the
            annotation client.
    """
BaseAnnotatorConfig(warn_about_plain_text_secrets: bool = False, **kwargs: Any)

Bases: StackComponentConfig

Base config for annotators.

Attributes:

Name Type Description
notebook_only bool

if the annotator can only be used in a notebook.

Source code in src/zenml/stack/stack_component.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def __init__(
    self, warn_about_plain_text_secrets: bool = False, **kwargs: Any
) -> None:
    """Ensures that secret references don't clash with pydantic validation.

    StackComponents allow the specification of all their string attributes
    using secret references of the form `{{secret_name.key}}`. This however
    is only possible when the stack component does not perform any explicit
    validation of this attribute using pydantic validators. If this were
    the case, the validation would run on the secret reference and would
    fail or in the worst case, modify the secret reference and lead to
    unexpected behavior. This method ensures that no attributes that require
    custom pydantic validation are set as secret references.

    Args:
        warn_about_plain_text_secrets: If true, then warns about using
            plain-text secrets.
        **kwargs: Arguments to initialize this stack component.

    Raises:
        ValueError: If an attribute that requires custom pydantic validation
            is passed as a secret reference, or if the `name` attribute
            was passed as a secret reference.
    """
    for key, value in kwargs.items():
        try:
            field = self.__class__.model_fields[key]
        except KeyError:
            # Value for a private attribute or non-existing field, this
            # will fail during the upcoming pydantic validation
            continue

        if value is None:
            continue

        if not secret_utils.is_secret_reference(value):
            if (
                secret_utils.is_secret_field(field)
                and warn_about_plain_text_secrets
            ):
                logger.warning(
                    "You specified a plain-text value for the sensitive "
                    f"attribute `{key}` for a `{self.__class__.__name__}` "
                    "stack component. This is currently only a warning, "
                    "but future versions of ZenML will require you to pass "
                    "in sensitive information as secrets. Check out the "
                    "documentation on how to configure your stack "
                    "components with secrets here: "
                    "https://docs.zenml.io/getting-started/deploying-zenml/secret-management"
                )
            continue

        if pydantic_utils.has_validators(
            pydantic_class=self.__class__, field_name=key
        ):
            raise ValueError(
                f"Passing the stack component attribute `{key}` as a "
                "secret reference is not allowed as additional validation "
                "is required for this attribute."
            )

    super().__init__(**kwargs)
BaseAnnotatorFlavor

Bases: Flavor

Base class for annotator flavors.

Attributes
config_class: Type[BaseAnnotatorConfig] property

Config class for this flavor.

Returns:

Type Description
Type[BaseAnnotatorConfig]

The config class.

implementation_class: Type[BaseAnnotator] abstractmethod property

Implementation class.

Returns:

Type Description
Type[BaseAnnotator]

The implementation class.

type: StackComponentType property

Returns the flavor type.

Returns:

Type Description
StackComponentType

The flavor type.