Prodigy
        zenml.integrations.prodigy
  
      special
  
    Initialization of the Prodigy integration.
        
ProdigyIntegration            (Integration)
        
    Definition of Prodigy integration for ZenML.
Source code in zenml/integrations/prodigy/__init__.py
          class ProdigyIntegration(Integration):
    """Definition of Prodigy integration for ZenML."""
    NAME = PRODIGY
    REQUIREMENTS = [
        "prodigy",
        "urllib3<2",
    ]
    REQUIREMENTS_IGNORED_ON_UNINSTALL = ["urllib3"]
    @classmethod
    def flavors(cls) -> List[Type[Flavor]]:
        """Declare the stack component flavors for the Prodigy integration.
        Returns:
            List of stack component flavors for this integration.
        """
        from zenml.integrations.prodigy.flavors import (
            ProdigyAnnotatorFlavor,
        )
        return [ProdigyAnnotatorFlavor]
flavors()
  
      classmethod
  
    Declare the stack component flavors for the Prodigy integration.
Returns:
| Type | Description | 
|---|---|
| List[Type[zenml.stack.flavor.Flavor]] | List of stack component flavors for this integration. | 
Source code in zenml/integrations/prodigy/__init__.py
          @classmethod
def flavors(cls) -> List[Type[Flavor]]:
    """Declare the stack component flavors for the Prodigy integration.
    Returns:
        List of stack component flavors for this integration.
    """
    from zenml.integrations.prodigy.flavors import (
        ProdigyAnnotatorFlavor,
    )
    return [ProdigyAnnotatorFlavor]
        annotators
  
      special
  
    Initialization of the Prodigy annotators submodule.
        prodigy_annotator
    Implementation of the Prodigy annotation integration.
        
ProdigyAnnotator            (BaseAnnotator, AuthenticationMixin)
        
    Class to interact with the Prodigy annotation interface.
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          class ProdigyAnnotator(BaseAnnotator, AuthenticationMixin):
    """Class to interact with the Prodigy annotation interface."""
    @property
    def config(self) -> ProdigyAnnotatorConfig:
        """Returns the `ProdigyAnnotatorConfig` config.
        Returns:
            The configuration.
        """
        return cast(ProdigyAnnotatorConfig, self._config)
    def get_url(self) -> str:
        """Gets the top-level URL of the annotation interface.
        Returns:
            The URL of the annotation interface.
        """
        instance_url = DEFAULT_LOCAL_INSTANCE_HOST
        port = DEFAULT_LOCAL_PRODIGY_PORT
        if self.config.custom_config_path:
            with open(self.config.custom_config_path, "r") as f:
                config = json.load(f)
            instance_url = config.get("instance_url", instance_url)
            port = config.get("port", port)
        return f"http://{instance_url}:{port}"
    def get_url_for_dataset(self, dataset_name: str) -> str:
        """Gets the URL of the annotation interface for the given dataset.
        Prodigy does not support dataset-specific URLs, so this method returns
        the top-level URL since that's what will be served for the user.
        Args:
            dataset_name: The name of the dataset. (Unuse)
        Returns:
            The URL of the annotation interface.
        """
        return self.get_url()
    def get_datasets(self) -> List[Any]:
        """Gets the datasets currently available for annotation.
        Returns:
            A list of datasets (str).
        """
        datasets = self._get_db().datasets
        return cast(List[Any], datasets)
    def get_dataset_names(self) -> List[str]:
        """Gets the names of the datasets.
        Returns:
            A list of dataset names.
        """
        return self.get_datasets()
    def get_dataset_stats(self, dataset_name: str) -> Tuple[int, int]:
        """Gets the statistics of the given dataset.
        Args:
            dataset_name: The name of the dataset.
        Returns:
            A tuple containing (labeled_task_count, unlabeled_task_count) for
                the dataset.
        Raises:
            IndexError: If the dataset does not exist.
        """
        db = self._get_db()
        try:
            labeled_data_count = db.count_dataset(name=dataset_name)
        except ValueError as e:
            raise IndexError(
                f"Dataset {dataset_name} does not exist. Please use `zenml "
                f"annotator dataset list` to list the available datasets."
            ) from e
        return (labeled_data_count, 0)
    def launch(self, **kwargs: Any) -> None:
        """Launches the annotation interface.
        This method extracts the 'command' and additional config
            parameters from kwargs.
        Args:
            **kwargs: Should include:
                - command: The full recipe command without "prodigy".
                - Any additional config parameters to overwrite the
                    project-specific, global, and recipe config.
        Raises:
            ValueError: If the 'command' keyword argument is not provided.
        """
        command = kwargs.get("command")
        if not command:
            raise ValueError(
                "The 'command' keyword argument is required for launching Prodigy."
            )
        # Remove 'command' from kwargs to pass the rest as config parameters
        config = {
            key: value for key, value in kwargs.items() if key != "command"
        }
        prodigy.serve(command=command, **config)
    def _get_db(
        self,
        custom_database: PeeweeDatabase = None,
        display_id: Optional[str] = None,
        display_name: Optional[str] = None,
    ) -> ProdigyDatabase:
        """Gets Prodigy database / client.
        Args:
            custom_database: Custom database to use.
            display_id: The display id of the database.
            display_name: The display name of the database.
        Returns:
            Prodigy database client.
        """
        db_kwargs = {}
        if custom_database:
            db_kwargs["db"] = custom_database
        if display_id:
            db_kwargs["display_id"] = display_id
        if display_name:
            db_kwargs["display_name"] = display_name
        # database is passed in without the keyword argument
        if custom_database:
            return connect(custom_database, **db_kwargs)
        return connect(**db_kwargs)
    def add_dataset(self, **kwargs: Any) -> Any:
        """Registers a dataset for annotation.
        Args:
            **kwargs: Additional keyword arguments to pass to the Prodigy client.
        Returns:
            A Prodigy list representing the dataset.
        Raises:
            ValueError: if 'dataset_name' and 'label_config' aren't provided.
        """
        db = self._get_db()
        dataset_kwargs = {"dataset_name": kwargs.get("dataset_name")}
        if not dataset_kwargs["dataset_name"]:
            raise ValueError("`dataset_name` keyword argument is required.")
        if kwargs.get("dataset_meta"):
            dataset_kwargs["dataset_meta"] = kwargs.get("dataset_meta")
        return db.add_dataset(**dataset_kwargs)
    def delete_dataset(self, **kwargs: Any) -> None:
        """Deletes a dataset from the annotation interface.
        Args:
            **kwargs: Additional keyword arguments to pass to the Prodigy
                client.
        Raises:
            ValueError: If the dataset name is not provided or if the dataset
                does not exist.
        """
        db = self._get_db()
        if not (dataset_name := kwargs.get("dataset_name")):
            raise ValueError("`dataset_name` keyword argument is required.")
        try:
            db.drop_dataset(name=dataset_name)
        except ProdigyError as e:
            # see https://support.prodi.gy/t/how-to-import-datasetdoesnotexist-error/7205
            if type(e).__name__ == "DatasetNotFound":
                raise ValueError(
                    f"Dataset name '{dataset_name}' does not exist."
                ) from e
    def get_dataset(self, **kwargs: Any) -> Any:
        """Gets the dataset metadata for the given name.
        If you would like the labelled data, use `get_labeled_data` instead.
        Args:
            **kwargs: Additional keyword arguments to pass to the Prodigy client.
        Returns:
            The metadata associated with a Prodigy dataset
        Raises:
            ValueError: If the dataset name is not provided or if the dataset
                does not exist.
        """
        db = self._get_db()
        if dataset_name := kwargs.get("dataset_name"):
            try:
                return db.get_meta(name=dataset_name)
            except Exception as e:
                raise ValueError(
                    f"Dataset name '{dataset_name}' does not exist."
                ) from e
    def get_labeled_data(self, **kwargs: Any) -> Any:
        """Gets the labeled data for the given dataset.
        Args:
            **kwargs: Additional keyword arguments to pass to the Prodigy client.
        Returns:
            A list of all examples in the dataset serialized to the
                Prodigy Task format.
        Raises:
            ValueError: If the dataset name is not provided or if the dataset
                does not exist.
        """
        if dataset_name := kwargs.get("dataset_name"):
            return self._get_db().get_dataset_examples(dataset_name)
        else:
            raise ValueError("`dataset_name` keyword argument is required.")
    def get_unlabeled_data(self, **kwargs: str) -> Any:
        """Gets the unlabeled data for the given dataset.
        Args:
            **kwargs: Additional keyword arguments to pass to the Prodigy client.
        Raises:
            NotImplementedError: Prodigy doesn't allow fetching unlabeled data.
        """
        raise NotImplementedError(
            "Prodigy doesn't allow fetching unlabeled data."
        )
config: ProdigyAnnotatorConfig
  
      property
      readonly
  
    Returns the ProdigyAnnotatorConfig config.
Returns:
| Type | Description | 
|---|---|
| ProdigyAnnotatorConfig | The configuration. | 
add_dataset(self, **kwargs)
    Registers a dataset for annotation.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| **kwargs | Any | Additional keyword arguments to pass to the Prodigy client. | {} | 
Returns:
| Type | Description | 
|---|---|
| Any | A Prodigy list representing the dataset. | 
Exceptions:
| Type | Description | 
|---|---|
| ValueError | if 'dataset_name' and 'label_config' aren't provided. | 
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          def add_dataset(self, **kwargs: Any) -> Any:
    """Registers a dataset for annotation.
    Args:
        **kwargs: Additional keyword arguments to pass to the Prodigy client.
    Returns:
        A Prodigy list representing the dataset.
    Raises:
        ValueError: if 'dataset_name' and 'label_config' aren't provided.
    """
    db = self._get_db()
    dataset_kwargs = {"dataset_name": kwargs.get("dataset_name")}
    if not dataset_kwargs["dataset_name"]:
        raise ValueError("`dataset_name` keyword argument is required.")
    if kwargs.get("dataset_meta"):
        dataset_kwargs["dataset_meta"] = kwargs.get("dataset_meta")
    return db.add_dataset(**dataset_kwargs)
delete_dataset(self, **kwargs)
    Deletes a dataset from the annotation interface.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| **kwargs | Any | Additional keyword arguments to pass to the Prodigy client. | {} | 
Exceptions:
| Type | Description | 
|---|---|
| ValueError | If the dataset name is not provided or if the dataset does not exist. | 
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          def delete_dataset(self, **kwargs: Any) -> None:
    """Deletes a dataset from the annotation interface.
    Args:
        **kwargs: Additional keyword arguments to pass to the Prodigy
            client.
    Raises:
        ValueError: If the dataset name is not provided or if the dataset
            does not exist.
    """
    db = self._get_db()
    if not (dataset_name := kwargs.get("dataset_name")):
        raise ValueError("`dataset_name` keyword argument is required.")
    try:
        db.drop_dataset(name=dataset_name)
    except ProdigyError as e:
        # see https://support.prodi.gy/t/how-to-import-datasetdoesnotexist-error/7205
        if type(e).__name__ == "DatasetNotFound":
            raise ValueError(
                f"Dataset name '{dataset_name}' does not exist."
            ) from e
get_dataset(self, **kwargs)
    Gets the dataset metadata for the given name.
If you would like the labelled data, use get_labeled_data instead.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| **kwargs | Any | Additional keyword arguments to pass to the Prodigy client. | {} | 
Returns:
| Type | Description | 
|---|---|
| Any | The metadata associated with a Prodigy dataset | 
Exceptions:
| Type | Description | 
|---|---|
| ValueError | If the dataset name is not provided or if the dataset does not exist. | 
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          def get_dataset(self, **kwargs: Any) -> Any:
    """Gets the dataset metadata for the given name.
    If you would like the labelled data, use `get_labeled_data` instead.
    Args:
        **kwargs: Additional keyword arguments to pass to the Prodigy client.
    Returns:
        The metadata associated with a Prodigy dataset
    Raises:
        ValueError: If the dataset name is not provided or if the dataset
            does not exist.
    """
    db = self._get_db()
    if dataset_name := kwargs.get("dataset_name"):
        try:
            return db.get_meta(name=dataset_name)
        except Exception as e:
            raise ValueError(
                f"Dataset name '{dataset_name}' does not exist."
            ) from e
get_dataset_names(self)
    Gets the names of the datasets.
Returns:
| Type | Description | 
|---|---|
| List[str] | A list of dataset names. | 
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          def get_dataset_names(self) -> List[str]:
    """Gets the names of the datasets.
    Returns:
        A list of dataset names.
    """
    return self.get_datasets()
get_dataset_stats(self, dataset_name)
    Gets the statistics of the given dataset.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| dataset_name | str | The name of the dataset. | required | 
Returns:
| Type | Description | 
|---|---|
| Tuple[int, int] | A tuple containing (labeled_task_count, unlabeled_task_count) for the dataset. | 
Exceptions:
| Type | Description | 
|---|---|
| IndexError | If the dataset does not exist. | 
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          def get_dataset_stats(self, dataset_name: str) -> Tuple[int, int]:
    """Gets the statistics of the given dataset.
    Args:
        dataset_name: The name of the dataset.
    Returns:
        A tuple containing (labeled_task_count, unlabeled_task_count) for
            the dataset.
    Raises:
        IndexError: If the dataset does not exist.
    """
    db = self._get_db()
    try:
        labeled_data_count = db.count_dataset(name=dataset_name)
    except ValueError as e:
        raise IndexError(
            f"Dataset {dataset_name} does not exist. Please use `zenml "
            f"annotator dataset list` to list the available datasets."
        ) from e
    return (labeled_data_count, 0)
get_datasets(self)
    Gets the datasets currently available for annotation.
Returns:
| Type | Description | 
|---|---|
| List[Any] | A list of datasets (str). | 
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          def get_datasets(self) -> List[Any]:
    """Gets the datasets currently available for annotation.
    Returns:
        A list of datasets (str).
    """
    datasets = self._get_db().datasets
    return cast(List[Any], datasets)
get_labeled_data(self, **kwargs)
    Gets the labeled data for the given dataset.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| **kwargs | Any | Additional keyword arguments to pass to the Prodigy client. | {} | 
Returns:
| Type | Description | 
|---|---|
| Any | A list of all examples in the dataset serialized to the Prodigy Task format. | 
Exceptions:
| Type | Description | 
|---|---|
| ValueError | If the dataset name is not provided or if the dataset does not exist. | 
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          def get_labeled_data(self, **kwargs: Any) -> Any:
    """Gets the labeled data for the given dataset.
    Args:
        **kwargs: Additional keyword arguments to pass to the Prodigy client.
    Returns:
        A list of all examples in the dataset serialized to the
            Prodigy Task format.
    Raises:
        ValueError: If the dataset name is not provided or if the dataset
            does not exist.
    """
    if dataset_name := kwargs.get("dataset_name"):
        return self._get_db().get_dataset_examples(dataset_name)
    else:
        raise ValueError("`dataset_name` keyword argument is required.")
get_unlabeled_data(self, **kwargs)
    Gets the unlabeled data for the given dataset.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| **kwargs | str | Additional keyword arguments to pass to the Prodigy client. | {} | 
Exceptions:
| Type | Description | 
|---|---|
| NotImplementedError | Prodigy doesn't allow fetching unlabeled data. | 
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          def get_unlabeled_data(self, **kwargs: str) -> Any:
    """Gets the unlabeled data for the given dataset.
    Args:
        **kwargs: Additional keyword arguments to pass to the Prodigy client.
    Raises:
        NotImplementedError: Prodigy doesn't allow fetching unlabeled data.
    """
    raise NotImplementedError(
        "Prodigy doesn't allow fetching unlabeled data."
    )
get_url(self)
    Gets the top-level URL of the annotation interface.
Returns:
| Type | Description | 
|---|---|
| str | The URL of the annotation interface. | 
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          def get_url(self) -> str:
    """Gets the top-level URL of the annotation interface.
    Returns:
        The URL of the annotation interface.
    """
    instance_url = DEFAULT_LOCAL_INSTANCE_HOST
    port = DEFAULT_LOCAL_PRODIGY_PORT
    if self.config.custom_config_path:
        with open(self.config.custom_config_path, "r") as f:
            config = json.load(f)
        instance_url = config.get("instance_url", instance_url)
        port = config.get("port", port)
    return f"http://{instance_url}:{port}"
get_url_for_dataset(self, dataset_name)
    Gets the URL of the annotation interface for the given dataset.
Prodigy does not support dataset-specific URLs, so this method returns the top-level URL since that's what will be served for the user.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| dataset_name | str | The name of the dataset. (Unuse) | required | 
Returns:
| Type | Description | 
|---|---|
| str | The URL of the annotation interface. | 
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          def get_url_for_dataset(self, dataset_name: str) -> str:
    """Gets the URL of the annotation interface for the given dataset.
    Prodigy does not support dataset-specific URLs, so this method returns
    the top-level URL since that's what will be served for the user.
    Args:
        dataset_name: The name of the dataset. (Unuse)
    Returns:
        The URL of the annotation interface.
    """
    return self.get_url()
launch(self, **kwargs)
    Launches the annotation interface.
This method extracts the 'command' and additional config parameters from kwargs.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| **kwargs | Any | Should include: - command: The full recipe command without "prodigy". - Any additional config parameters to overwrite the project-specific, global, and recipe config. | {} | 
Exceptions:
| Type | Description | 
|---|---|
| ValueError | If the 'command' keyword argument is not provided. | 
Source code in zenml/integrations/prodigy/annotators/prodigy_annotator.py
          def launch(self, **kwargs: Any) -> None:
    """Launches the annotation interface.
    This method extracts the 'command' and additional config
        parameters from kwargs.
    Args:
        **kwargs: Should include:
            - command: The full recipe command without "prodigy".
            - Any additional config parameters to overwrite the
                project-specific, global, and recipe config.
    Raises:
        ValueError: If the 'command' keyword argument is not provided.
    """
    command = kwargs.get("command")
    if not command:
        raise ValueError(
            "The 'command' keyword argument is required for launching Prodigy."
        )
    # Remove 'command' from kwargs to pass the rest as config parameters
    config = {
        key: value for key, value in kwargs.items() if key != "command"
    }
    prodigy.serve(command=command, **config)
        flavors
  
      special
  
    Prodigy integration flavors.
        prodigy_annotator_flavor
    Prodigy annotator flavor.
        
ProdigyAnnotatorConfig            (BaseAnnotatorConfig, AuthenticationConfigMixin)
        
    Config for the Prodigy annotator.
See https://prodi.gy/docs/install#config for more on custom config files, but this allows you to override the default Prodigy config.
Attributes:
| Name | Type | Description | 
|---|---|---|
| custom_config_path | Optional[str] | The path to a custom config file for Prodigy. | 
Source code in zenml/integrations/prodigy/flavors/prodigy_annotator_flavor.py
          class ProdigyAnnotatorConfig(BaseAnnotatorConfig, AuthenticationConfigMixin):
    """Config for the Prodigy annotator.
    See https://prodi.gy/docs/install#config for more on custom config files,
    but this allows you to override the default Prodigy config.
    Attributes:
        custom_config_path: The path to a custom config file for Prodigy.
    """
    custom_config_path: Optional[str] = None
        
ProdigyAnnotatorFlavor            (BaseAnnotatorFlavor)
        
    Prodigy annotator flavor.
Source code in zenml/integrations/prodigy/flavors/prodigy_annotator_flavor.py
          class ProdigyAnnotatorFlavor(BaseAnnotatorFlavor):
    """Prodigy annotator flavor."""
    @property
    def name(self) -> str:
        """Name of the flavor.
        Returns:
            The name of the flavor.
        """
        return PRODIGY_ANNOTATOR_FLAVOR
    @property
    def docs_url(self) -> Optional[str]:
        """A url to point at docs explaining this flavor.
        Returns:
            A flavor docs url.
        """
        return self.generate_default_docs_url()
    @property
    def sdk_docs_url(self) -> Optional[str]:
        """A url to point at SDK docs explaining this flavor.
        Returns:
            A flavor SDK docs url.
        """
        return self.generate_default_sdk_docs_url()
    @property
    def logo_url(self) -> str:
        """A url to represent the flavor in the dashboard.
        Returns:
            The flavor logo.
        """
        return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/annotator/prodigy.png"
    @property
    def config_class(self) -> Type[ProdigyAnnotatorConfig]:
        """Returns `ProdigyAnnotatorConfig` config class.
        Returns:
                The config class.
        """
        return ProdigyAnnotatorConfig
    @property
    def implementation_class(self) -> Type["ProdigyAnnotator"]:
        """Implementation class for this flavor.
        Returns:
            The implementation class.
        """
        from zenml.integrations.prodigy.annotators import (
            ProdigyAnnotator,
        )
        return ProdigyAnnotator
config_class: Type[zenml.integrations.prodigy.flavors.prodigy_annotator_flavor.ProdigyAnnotatorConfig]
  
      property
      readonly
  
    Returns ProdigyAnnotatorConfig config class.
Returns:
| Type | Description | 
|---|---|
| Type[zenml.integrations.prodigy.flavors.prodigy_annotator_flavor.ProdigyAnnotatorConfig] | The config class. | 
docs_url: Optional[str]
  
      property
      readonly
  
    A url to point at docs explaining this flavor.
Returns:
| Type | Description | 
|---|---|
| Optional[str] | A flavor docs url. | 
implementation_class: Type[ProdigyAnnotator]
  
      property
      readonly
  
    Implementation class for this flavor.
Returns:
| Type | Description | 
|---|---|
| Type[ProdigyAnnotator] | The implementation class. | 
logo_url: str
  
      property
      readonly
  
    A url to represent the flavor in the dashboard.
Returns:
| Type | Description | 
|---|---|
| str | The flavor logo. | 
name: str
  
      property
      readonly
  
    Name of the flavor.
Returns:
| Type | Description | 
|---|---|
| str | The name of the flavor. | 
sdk_docs_url: Optional[str]
  
      property
      readonly
  
    A url to point at SDK docs explaining this flavor.
Returns:
| Type | Description | 
|---|---|
| Optional[str] | A flavor SDK docs url. |