Huggingface
zenml.integrations.huggingface
special
Initialization of the Huggingface integration.
HuggingfaceIntegration (Integration)
Definition of Huggingface integration for ZenML.
Source code in zenml/integrations/huggingface/__init__.py
class HuggingfaceIntegration(Integration):
"""Definition of Huggingface integration for ZenML."""
NAME = HUGGINGFACE
REQUIREMENTS = [
"transformers<=4.31",
"datasets",
"huggingface_hub>0.19.0",
"accelerate",
"bitsandbytes>=0.41.3",
"peft",
# temporary fix for CI issue similar to:
# - https://github.com/huggingface/datasets/issues/6737
# - https://github.com/huggingface/datasets/issues/6697
# TODO try relaxing it back going forward
"fsspec<=2023.12.0",
]
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.huggingface import materializers # noqa
from zenml.integrations.huggingface import services
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Huggingface integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.huggingface.flavors import (
HuggingFaceModelDeployerFlavor,
)
return [HuggingFaceModelDeployerFlavor]
activate()
classmethod
Activates the integration.
Source code in zenml/integrations/huggingface/__init__.py
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.huggingface import materializers # noqa
from zenml.integrations.huggingface import services
flavors()
classmethod
Declare the stack component flavors for the Huggingface integration.
Returns:
Type | Description |
---|---|
List[Type[zenml.stack.flavor.Flavor]] |
List of stack component flavors for this integration. |
Source code in zenml/integrations/huggingface/__init__.py
@classmethod
def flavors(cls) -> List[Type[Flavor]]:
"""Declare the stack component flavors for the Huggingface integration.
Returns:
List of stack component flavors for this integration.
"""
from zenml.integrations.huggingface.flavors import (
HuggingFaceModelDeployerFlavor,
)
return [HuggingFaceModelDeployerFlavor]
flavors
special
Hugging Face integration flavors.
huggingface_model_deployer_flavor
Hugging Face model deployer flavor.
HuggingFaceBaseConfig (BaseModel)
Hugging Face Inference Endpoint configuration.
Source code in zenml/integrations/huggingface/flavors/huggingface_model_deployer_flavor.py
class HuggingFaceBaseConfig(BaseModel):
"""Hugging Face Inference Endpoint configuration."""
repository: Optional[str] = None
framework: Optional[str] = None
accelerator: Optional[str] = None
instance_size: Optional[str] = None
instance_type: Optional[str] = None
region: Optional[str] = None
vendor: Optional[str] = None
account_id: Optional[str] = None
min_replica: int = 0
max_replica: int = 1
revision: Optional[str] = None
task: Optional[str] = None
custom_image: Optional[Dict[str, Any]] = None
endpoint_type: str = "public"
secret_name: Optional[str] = None
namespace: Optional[str] = None
HuggingFaceModelDeployerConfig (BaseModelDeployerConfig, HuggingFaceBaseConfig)
Configuration for the Hugging Face model deployer.
Attributes:
Name | Type | Description |
---|---|---|
token |
Optional[str] |
Hugging Face token used for authentication |
namespace |
str |
Hugging Face namespace used to list endpoints |
Source code in zenml/integrations/huggingface/flavors/huggingface_model_deployer_flavor.py
class HuggingFaceModelDeployerConfig(
BaseModelDeployerConfig, HuggingFaceBaseConfig
):
"""Configuration for the Hugging Face model deployer.
Attributes:
token: Hugging Face token used for authentication
namespace: Hugging Face namespace used to list endpoints
"""
token: Optional[str] = SecretField(default=None)
# The namespace to list endpoints for. Set to `"*"` to list all endpoints
# from all namespaces (i.e. personal namespace and all orgs the user belongs to).
namespace: str
HuggingFaceModelDeployerFlavor (BaseModelDeployerFlavor)
Hugging Face Endpoint model deployer flavor.
Source code in zenml/integrations/huggingface/flavors/huggingface_model_deployer_flavor.py
class HuggingFaceModelDeployerFlavor(BaseModelDeployerFlavor):
"""Hugging Face Endpoint model deployer flavor."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return HUGGINGFACE_MODEL_DEPLOYER_FLAVOR
@property
def docs_url(self) -> Optional[str]:
"""A url to point at docs explaining this flavor.
Returns:
A flavor docs url.
"""
return self.generate_default_docs_url()
@property
def sdk_docs_url(self) -> Optional[str]:
"""A url to point at SDK docs explaining this flavor.
Returns:
A flavor SDK docs url.
"""
return self.generate_default_sdk_docs_url()
@property
def logo_url(self) -> str:
"""A url to represent the flavor in the dashboard.
Returns:
The flavor logo.
"""
return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/model_registry/huggingface.png"
@property
def config_class(self) -> Type[HuggingFaceModelDeployerConfig]:
"""Returns `HuggingFaceModelDeployerConfig` config class.
Returns:
The config class.
"""
return HuggingFaceModelDeployerConfig
@property
def implementation_class(self) -> Type["HuggingFaceModelDeployer"]:
"""Implementation class for this flavor.
Returns:
The implementation class.
"""
from zenml.integrations.huggingface.model_deployers.huggingface_model_deployer import (
HuggingFaceModelDeployer,
)
return HuggingFaceModelDeployer
config_class: Type[zenml.integrations.huggingface.flavors.huggingface_model_deployer_flavor.HuggingFaceModelDeployerConfig]
property
readonly
Returns HuggingFaceModelDeployerConfig
config class.
Returns:
Type | Description |
---|---|
Type[zenml.integrations.huggingface.flavors.huggingface_model_deployer_flavor.HuggingFaceModelDeployerConfig] |
The config class. |
docs_url: Optional[str]
property
readonly
A url to point at docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor docs url. |
implementation_class: Type[HuggingFaceModelDeployer]
property
readonly
Implementation class for this flavor.
Returns:
Type | Description |
---|---|
Type[HuggingFaceModelDeployer] |
The implementation class. |
logo_url: str
property
readonly
A url to represent the flavor in the dashboard.
Returns:
Type | Description |
---|---|
str |
The flavor logo. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
sdk_docs_url: Optional[str]
property
readonly
A url to point at SDK docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor SDK docs url. |
materializers
special
Initialization of Huggingface materializers.
huggingface_datasets_materializer
Implementation of the Huggingface datasets materializer.
HFDatasetMaterializer (BaseMaterializer)
Materializer to read data to and from huggingface datasets.
Source code in zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py
class HFDatasetMaterializer(BaseMaterializer):
"""Materializer to read data to and from huggingface datasets."""
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (Dataset, DatasetDict)
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = (
ArtifactType.DATA_ANALYSIS
)
def load(
self, data_type: Union[Type[Dataset], Type[DatasetDict]]
) -> Union[Dataset, DatasetDict]:
"""Reads Dataset.
Args:
data_type: The type of the dataset to read.
Returns:
The dataset read from the specified dir.
"""
temp_dir = mkdtemp()
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_DATASET_DIR),
temp_dir,
)
return load_from_disk(temp_dir)
def save(self, ds: Union[Dataset, DatasetDict]) -> None:
"""Writes a Dataset to the specified dir.
Args:
ds: The Dataset to write.
"""
temp_dir = TemporaryDirectory()
path = os.path.join(temp_dir.name, DEFAULT_DATASET_DIR)
try:
ds.save_to_disk(path)
io_utils.copy_dir(
path,
os.path.join(self.uri, DEFAULT_DATASET_DIR),
)
finally:
fileio.rmtree(temp_dir.name)
def extract_metadata(
self, ds: Union[Dataset, DatasetDict]
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `Dataset` object.
Args:
ds: The `Dataset` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
Raises:
ValueError: If the given object is not a `Dataset` or `DatasetDict`.
"""
pandas_materializer = PandasMaterializer(self.uri)
if isinstance(ds, Dataset):
return pandas_materializer.extract_metadata(ds.to_pandas())
elif isinstance(ds, DatasetDict):
metadata: Dict[str, Dict[str, "MetadataType"]] = defaultdict(dict)
for dataset_name, dataset in ds.items():
dataset_metadata = pandas_materializer.extract_metadata(
dataset.to_pandas()
)
for key, value in dataset_metadata.items():
metadata[key][dataset_name] = value
return dict(metadata)
raise ValueError(f"Unsupported type {type(ds)}")
extract_metadata(self, ds)
Extract metadata from the given Dataset
object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
ds |
Union[datasets.Dataset, datasets.dataset_dict.DatasetDict] |
The |
required |
Returns:
Type | Description |
---|---|
Dict[str, MetadataType] |
The extracted metadata as a dictionary. |
Exceptions:
Type | Description |
---|---|
ValueError |
If the given object is not a |
Source code in zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py
def extract_metadata(
self, ds: Union[Dataset, DatasetDict]
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `Dataset` object.
Args:
ds: The `Dataset` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
Raises:
ValueError: If the given object is not a `Dataset` or `DatasetDict`.
"""
pandas_materializer = PandasMaterializer(self.uri)
if isinstance(ds, Dataset):
return pandas_materializer.extract_metadata(ds.to_pandas())
elif isinstance(ds, DatasetDict):
metadata: Dict[str, Dict[str, "MetadataType"]] = defaultdict(dict)
for dataset_name, dataset in ds.items():
dataset_metadata = pandas_materializer.extract_metadata(
dataset.to_pandas()
)
for key, value in dataset_metadata.items():
metadata[key][dataset_name] = value
return dict(metadata)
raise ValueError(f"Unsupported type {type(ds)}")
load(self, data_type)
Reads Dataset.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Union[Type[datasets.Dataset], Type[datasets.dataset_dict.DatasetDict]] |
The type of the dataset to read. |
required |
Returns:
Type | Description |
---|---|
Union[datasets.Dataset, datasets.dataset_dict.DatasetDict] |
The dataset read from the specified dir. |
Source code in zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py
def load(
self, data_type: Union[Type[Dataset], Type[DatasetDict]]
) -> Union[Dataset, DatasetDict]:
"""Reads Dataset.
Args:
data_type: The type of the dataset to read.
Returns:
The dataset read from the specified dir.
"""
temp_dir = mkdtemp()
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_DATASET_DIR),
temp_dir,
)
return load_from_disk(temp_dir)
save(self, ds)
Writes a Dataset to the specified dir.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
ds |
Union[datasets.Dataset, datasets.dataset_dict.DatasetDict] |
The Dataset to write. |
required |
Source code in zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py
def save(self, ds: Union[Dataset, DatasetDict]) -> None:
"""Writes a Dataset to the specified dir.
Args:
ds: The Dataset to write.
"""
temp_dir = TemporaryDirectory()
path = os.path.join(temp_dir.name, DEFAULT_DATASET_DIR)
try:
ds.save_to_disk(path)
io_utils.copy_dir(
path,
os.path.join(self.uri, DEFAULT_DATASET_DIR),
)
finally:
fileio.rmtree(temp_dir.name)
huggingface_pt_model_materializer
Implementation of the Huggingface PyTorch model materializer.
HFPTModelMaterializer (BaseMaterializer)
Materializer to read torch model to and from huggingface pretrained model.
Source code in zenml/integrations/huggingface/materializers/huggingface_pt_model_materializer.py
class HFPTModelMaterializer(BaseMaterializer):
"""Materializer to read torch model to and from huggingface pretrained model."""
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (PreTrainedModel,)
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL
def load(self, data_type: Type[PreTrainedModel]) -> PreTrainedModel:
"""Reads HFModel.
Args:
data_type: The type of the model to read.
Returns:
The model read from the specified dir.
"""
temp_dir = TemporaryDirectory()
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_PT_MODEL_DIR), temp_dir.name
)
config = AutoConfig.from_pretrained(temp_dir.name)
architecture = config.architectures[0]
model_cls = getattr(
importlib.import_module("transformers"), architecture
)
return model_cls.from_pretrained(temp_dir.name)
def save(self, model: PreTrainedModel) -> None:
"""Writes a Model to the specified dir.
Args:
model: The Torch Model to write.
"""
temp_dir = TemporaryDirectory()
model.save_pretrained(temp_dir.name)
io_utils.copy_dir(
temp_dir.name,
os.path.join(self.uri, DEFAULT_PT_MODEL_DIR),
)
def extract_metadata(
self, model: PreTrainedModel
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `PreTrainedModel` object.
Args:
model: The `PreTrainedModel` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
from zenml.integrations.pytorch.utils import count_module_params
module_param_metadata = count_module_params(model)
return {
**module_param_metadata,
"dtype": DType(str(model.dtype)),
"device": str(model.device),
}
extract_metadata(self, model)
Extract metadata from the given PreTrainedModel
object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model |
transformers.PreTrainedModel |
The |
required |
Returns:
Type | Description |
---|---|
Dict[str, MetadataType] |
The extracted metadata as a dictionary. |
Source code in zenml/integrations/huggingface/materializers/huggingface_pt_model_materializer.py
def extract_metadata(
self, model: PreTrainedModel
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `PreTrainedModel` object.
Args:
model: The `PreTrainedModel` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
from zenml.integrations.pytorch.utils import count_module_params
module_param_metadata = count_module_params(model)
return {
**module_param_metadata,
"dtype": DType(str(model.dtype)),
"device": str(model.device),
}
load(self, data_type)
Reads HFModel.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[transformers.PreTrainedModel] |
The type of the model to read. |
required |
Returns:
Type | Description |
---|---|
transformers.PreTrainedModel |
The model read from the specified dir. |
Source code in zenml/integrations/huggingface/materializers/huggingface_pt_model_materializer.py
def load(self, data_type: Type[PreTrainedModel]) -> PreTrainedModel:
"""Reads HFModel.
Args:
data_type: The type of the model to read.
Returns:
The model read from the specified dir.
"""
temp_dir = TemporaryDirectory()
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_PT_MODEL_DIR), temp_dir.name
)
config = AutoConfig.from_pretrained(temp_dir.name)
architecture = config.architectures[0]
model_cls = getattr(
importlib.import_module("transformers"), architecture
)
return model_cls.from_pretrained(temp_dir.name)
save(self, model)
Writes a Model to the specified dir.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model |
transformers.PreTrainedModel |
The Torch Model to write. |
required |
Source code in zenml/integrations/huggingface/materializers/huggingface_pt_model_materializer.py
def save(self, model: PreTrainedModel) -> None:
"""Writes a Model to the specified dir.
Args:
model: The Torch Model to write.
"""
temp_dir = TemporaryDirectory()
model.save_pretrained(temp_dir.name)
io_utils.copy_dir(
temp_dir.name,
os.path.join(self.uri, DEFAULT_PT_MODEL_DIR),
)
huggingface_tf_model_materializer
Implementation of the Huggingface TF model materializer.
HFTFModelMaterializer (BaseMaterializer)
Materializer to read Tensorflow model to and from huggingface pretrained model.
Source code in zenml/integrations/huggingface/materializers/huggingface_tf_model_materializer.py
class HFTFModelMaterializer(BaseMaterializer):
"""Materializer to read Tensorflow model to and from huggingface pretrained model."""
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (TFPreTrainedModel,)
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL
def load(self, data_type: Type[TFPreTrainedModel]) -> TFPreTrainedModel:
"""Reads HFModel.
Args:
data_type: The type of the model to read.
Returns:
The model read from the specified dir.
"""
temp_dir = TemporaryDirectory()
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_TF_MODEL_DIR), temp_dir.name
)
config = AutoConfig.from_pretrained(temp_dir.name)
architecture = "TF" + config.architectures[0]
model_cls = getattr(
importlib.import_module("transformers"), architecture
)
return model_cls.from_pretrained(temp_dir.name)
def save(self, model: TFPreTrainedModel) -> None:
"""Writes a Model to the specified dir.
Args:
model: The TF Model to write.
"""
temp_dir = TemporaryDirectory()
model.save_pretrained(temp_dir.name)
io_utils.copy_dir(
temp_dir.name,
os.path.join(self.uri, DEFAULT_TF_MODEL_DIR),
)
def extract_metadata(
self, model: TFPreTrainedModel
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `PreTrainedModel` object.
Args:
model: The `PreTrainedModel` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return {
"num_layers": len(model.layers),
"num_params": model.num_parameters(only_trainable=False),
"num_trainable_params": model.num_parameters(only_trainable=True),
}
extract_metadata(self, model)
Extract metadata from the given PreTrainedModel
object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model |
transformers.TFPreTrainedModel |
The |
required |
Returns:
Type | Description |
---|---|
Dict[str, MetadataType] |
The extracted metadata as a dictionary. |
Source code in zenml/integrations/huggingface/materializers/huggingface_tf_model_materializer.py
def extract_metadata(
self, model: TFPreTrainedModel
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `PreTrainedModel` object.
Args:
model: The `PreTrainedModel` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return {
"num_layers": len(model.layers),
"num_params": model.num_parameters(only_trainable=False),
"num_trainable_params": model.num_parameters(only_trainable=True),
}
load(self, data_type)
Reads HFModel.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[transformers.TFPreTrainedModel] |
The type of the model to read. |
required |
Returns:
Type | Description |
---|---|
transformers.TFPreTrainedModel |
The model read from the specified dir. |
Source code in zenml/integrations/huggingface/materializers/huggingface_tf_model_materializer.py
def load(self, data_type: Type[TFPreTrainedModel]) -> TFPreTrainedModel:
"""Reads HFModel.
Args:
data_type: The type of the model to read.
Returns:
The model read from the specified dir.
"""
temp_dir = TemporaryDirectory()
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_TF_MODEL_DIR), temp_dir.name
)
config = AutoConfig.from_pretrained(temp_dir.name)
architecture = "TF" + config.architectures[0]
model_cls = getattr(
importlib.import_module("transformers"), architecture
)
return model_cls.from_pretrained(temp_dir.name)
save(self, model)
Writes a Model to the specified dir.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model |
transformers.TFPreTrainedModel |
The TF Model to write. |
required |
Source code in zenml/integrations/huggingface/materializers/huggingface_tf_model_materializer.py
def save(self, model: TFPreTrainedModel) -> None:
"""Writes a Model to the specified dir.
Args:
model: The TF Model to write.
"""
temp_dir = TemporaryDirectory()
model.save_pretrained(temp_dir.name)
io_utils.copy_dir(
temp_dir.name,
os.path.join(self.uri, DEFAULT_TF_MODEL_DIR),
)
huggingface_tokenizer_materializer
Implementation of the Huggingface tokenizer materializer.
HFTokenizerMaterializer (BaseMaterializer)
Materializer to read tokenizer to and from huggingface tokenizer.
Source code in zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py
class HFTokenizerMaterializer(BaseMaterializer):
"""Materializer to read tokenizer to and from huggingface tokenizer."""
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (
PreTrainedTokenizerBase,
)
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL
def load(self, data_type: Type[Any]) -> PreTrainedTokenizerBase:
"""Reads Tokenizer.
Args:
data_type: The type of the tokenizer to read.
Returns:
The tokenizer read from the specified dir.
"""
with TemporaryDirectory() as temp_dir:
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_TOKENIZER_DIR), temp_dir
)
return AutoTokenizer.from_pretrained(temp_dir)
def save(self, tokenizer: Type[Any]) -> None:
"""Writes a Tokenizer to the specified dir.
Args:
tokenizer: The HFTokenizer to write.
"""
with TemporaryDirectory() as temp_dir:
tokenizer.save_pretrained(temp_dir)
io_utils.copy_dir(
temp_dir,
os.path.join(self.uri, DEFAULT_TOKENIZER_DIR),
)
load(self, data_type)
Reads Tokenizer.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
The type of the tokenizer to read. |
required |
Returns:
Type | Description |
---|---|
transformers.tokenization_utils_base.PreTrainedTokenizerBase |
The tokenizer read from the specified dir. |
Source code in zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py
def load(self, data_type: Type[Any]) -> PreTrainedTokenizerBase:
"""Reads Tokenizer.
Args:
data_type: The type of the tokenizer to read.
Returns:
The tokenizer read from the specified dir.
"""
with TemporaryDirectory() as temp_dir:
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_TOKENIZER_DIR), temp_dir
)
return AutoTokenizer.from_pretrained(temp_dir)
save(self, tokenizer)
Writes a Tokenizer to the specified dir.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
tokenizer |
Type[Any] |
The HFTokenizer to write. |
required |
Source code in zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py
def save(self, tokenizer: Type[Any]) -> None:
"""Writes a Tokenizer to the specified dir.
Args:
tokenizer: The HFTokenizer to write.
"""
with TemporaryDirectory() as temp_dir:
tokenizer.save_pretrained(temp_dir)
io_utils.copy_dir(
temp_dir,
os.path.join(self.uri, DEFAULT_TOKENIZER_DIR),
)
model_deployers
special
Initialization of the Hugging Face model deployers.
huggingface_model_deployer
Implementation of the Hugging Face Model Deployer.
HuggingFaceModelDeployer (BaseModelDeployer)
Hugging Face endpoint model deployer.
Source code in zenml/integrations/huggingface/model_deployers/huggingface_model_deployer.py
class HuggingFaceModelDeployer(BaseModelDeployer):
"""Hugging Face endpoint model deployer."""
NAME: ClassVar[str] = "HuggingFace"
FLAVOR: ClassVar[Type[BaseModelDeployerFlavor]] = (
HuggingFaceModelDeployerFlavor
)
@property
def config(self) -> HuggingFaceModelDeployerConfig:
"""Config class for the Hugging Face Model deployer settings class.
Returns:
The configuration.
"""
return cast(HuggingFaceModelDeployerConfig, self._config)
@property
def validator(self) -> Optional[StackValidator]:
"""Validates the stack.
Returns:
A validator that checks that the stack contains a remote artifact
store.
"""
def _validate_if_secret_or_token_is_present(
stack: "Stack",
) -> Tuple[bool, str]:
"""Check if secret or token is present in the stack.
Args:
stack: The stack to validate.
Returns:
A tuple with a boolean indicating whether the stack is valid
and a message describing the validation result.
"""
return bool(self.config.token or self.config.secret_name), (
"The Hugging Face model deployer requires either a secret name"
" or a token to be present in the stack."
)
return StackValidator(
custom_validation_function=_validate_if_secret_or_token_is_present,
)
def _create_new_service(
self, id: UUID, timeout: int, config: HuggingFaceServiceConfig
) -> HuggingFaceDeploymentService:
"""Creates a new Hugging FaceDeploymentService.
Args:
id: the UUID of the model to be deployed with Hugging Face model deployer.
timeout: the timeout in seconds to wait for the Hugging Face inference endpoint
to be provisioned and successfully started or updated.
config: the configuration of the model to be deployed with Hugging Face model deployer.
Returns:
The HuggingFaceServiceConfig object that can be used to interact
with the Hugging Face inference endpoint.
"""
# create a new service for the new model
service = HuggingFaceDeploymentService(uuid=id, config=config)
logger.info(
f"Creating an artifact {HUGGINGFACE_SERVICE_ARTIFACT} with service instance attached as metadata."
" If there's an active pipeline and/or model this artifact will be associated with it."
)
service.start(timeout=timeout)
return service
def _clean_up_existing_service(
self,
timeout: int,
force: bool,
existing_service: HuggingFaceDeploymentService,
) -> None:
"""Stop existing services.
Args:
timeout: the timeout in seconds to wait for the Hugging Face
deployment to be stopped.
force: if True, force the service to stop
existing_service: Existing Hugging Face deployment service
"""
# stop the older service
existing_service.stop(timeout=timeout, force=force)
def perform_deploy_model(
self,
id: UUID,
config: ServiceConfig,
timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT,
) -> BaseService:
"""Create a new Hugging Face deployment service or update an existing one.
This should serve the supplied model and deployment configuration.
Args:
id: the UUID of the model to be deployed with Hugging Face.
config: the configuration of the model to be deployed with Hugging Face.
timeout: the timeout in seconds to wait for the Hugging Face endpoint
to be provisioned and successfully started or updated. If set
to 0, the method will return immediately after the Hugging Face
server is provisioned, without waiting for it to fully start.
Returns:
The ZenML Hugging Face deployment service object that can be used to
interact with the remote Hugging Face inference endpoint server.
"""
with track_handler(AnalyticsEvent.MODEL_DEPLOYED) as analytics_handler:
config = cast(HuggingFaceServiceConfig, config)
# create a new HuggingFaceDeploymentService instance
service = self._create_new_service(
id=id, timeout=timeout, config=config
)
logger.info(
f"Creating a new Hugging Face inference endpoint service: {service}"
)
# Add telemetry with metadata that gets the stack metadata and
# differentiates between pure model and custom code deployments
stack = Client().active_stack
stack_metadata = {
component_type.value: component.flavor
for component_type, component in stack.components.items()
}
analytics_handler.metadata = {
"store_type": Client().zen_store.type.value,
**stack_metadata,
}
return service
def perform_stop_model(
self,
service: BaseService,
timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT,
force: bool = False,
) -> BaseService:
"""Method to stop a model server.
Args:
service: The service to stop.
timeout: Timeout in seconds to wait for the service to stop.
force: If True, force the service to stop.
Returns:
The stopped service.
"""
service.stop(timeout=timeout, force=force)
return service
def perform_start_model(
self,
service: BaseService,
timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT,
) -> BaseService:
"""Method to start a model server.
Args:
service: The service to start.
timeout: Timeout in seconds to wait for the service to start.
Returns:
The started service.
"""
service.start(timeout=timeout)
return service
def perform_delete_model(
self,
service: BaseService,
timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT,
force: bool = False,
) -> None:
"""Method to delete all configuration of a model server.
Args:
service: The service to delete.
timeout: Timeout in seconds to wait for the service to stop.
force: If True, force the service to stop.
"""
service = cast(HuggingFaceDeploymentService, service)
self._clean_up_existing_service(
existing_service=service, timeout=timeout, force=force
)
@staticmethod
def get_model_server_info( # type: ignore[override]
service_instance: "HuggingFaceDeploymentService",
) -> Dict[str, Optional[str]]:
"""Return implementation specific information that might be relevant to the user.
Args:
service_instance: Instance of a HuggingFaceDeploymentService
Returns:
Model server information.
"""
return {
"PREDICTION_URL": service_instance.get_prediction_url(),
"HEALTH_CHECK_URL": service_instance.get_healthcheck_url(),
}
config: HuggingFaceModelDeployerConfig
property
readonly
Config class for the Hugging Face Model deployer settings class.
Returns:
Type | Description |
---|---|
HuggingFaceModelDeployerConfig |
The configuration. |
validator: Optional[zenml.stack.stack_validator.StackValidator]
property
readonly
Validates the stack.
Returns:
Type | Description |
---|---|
Optional[zenml.stack.stack_validator.StackValidator] |
A validator that checks that the stack contains a remote artifact store. |
FLAVOR (BaseModelDeployerFlavor)
Hugging Face Endpoint model deployer flavor.
Source code in zenml/integrations/huggingface/model_deployers/huggingface_model_deployer.py
class HuggingFaceModelDeployerFlavor(BaseModelDeployerFlavor):
"""Hugging Face Endpoint model deployer flavor."""
@property
def name(self) -> str:
"""Name of the flavor.
Returns:
The name of the flavor.
"""
return HUGGINGFACE_MODEL_DEPLOYER_FLAVOR
@property
def docs_url(self) -> Optional[str]:
"""A url to point at docs explaining this flavor.
Returns:
A flavor docs url.
"""
return self.generate_default_docs_url()
@property
def sdk_docs_url(self) -> Optional[str]:
"""A url to point at SDK docs explaining this flavor.
Returns:
A flavor SDK docs url.
"""
return self.generate_default_sdk_docs_url()
@property
def logo_url(self) -> str:
"""A url to represent the flavor in the dashboard.
Returns:
The flavor logo.
"""
return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/model_registry/huggingface.png"
@property
def config_class(self) -> Type[HuggingFaceModelDeployerConfig]:
"""Returns `HuggingFaceModelDeployerConfig` config class.
Returns:
The config class.
"""
return HuggingFaceModelDeployerConfig
@property
def implementation_class(self) -> Type["HuggingFaceModelDeployer"]:
"""Implementation class for this flavor.
Returns:
The implementation class.
"""
from zenml.integrations.huggingface.model_deployers.huggingface_model_deployer import (
HuggingFaceModelDeployer,
)
return HuggingFaceModelDeployer
config_class: Type[zenml.integrations.huggingface.flavors.huggingface_model_deployer_flavor.HuggingFaceModelDeployerConfig]
property
readonly
Returns HuggingFaceModelDeployerConfig
config class.
Returns:
Type | Description |
---|---|
Type[zenml.integrations.huggingface.flavors.huggingface_model_deployer_flavor.HuggingFaceModelDeployerConfig] |
The config class. |
docs_url: Optional[str]
property
readonly
A url to point at docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor docs url. |
implementation_class: Type[HuggingFaceModelDeployer]
property
readonly
Implementation class for this flavor.
Returns:
Type | Description |
---|---|
Type[HuggingFaceModelDeployer] |
The implementation class. |
logo_url: str
property
readonly
A url to represent the flavor in the dashboard.
Returns:
Type | Description |
---|---|
str |
The flavor logo. |
name: str
property
readonly
Name of the flavor.
Returns:
Type | Description |
---|---|
str |
The name of the flavor. |
sdk_docs_url: Optional[str]
property
readonly
A url to point at SDK docs explaining this flavor.
Returns:
Type | Description |
---|---|
Optional[str] |
A flavor SDK docs url. |
get_model_server_info(service_instance)
staticmethod
Return implementation specific information that might be relevant to the user.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
service_instance |
HuggingFaceDeploymentService |
Instance of a HuggingFaceDeploymentService |
required |
Returns:
Type | Description |
---|---|
Dict[str, Optional[str]] |
Model server information. |
Source code in zenml/integrations/huggingface/model_deployers/huggingface_model_deployer.py
@staticmethod
def get_model_server_info( # type: ignore[override]
service_instance: "HuggingFaceDeploymentService",
) -> Dict[str, Optional[str]]:
"""Return implementation specific information that might be relevant to the user.
Args:
service_instance: Instance of a HuggingFaceDeploymentService
Returns:
Model server information.
"""
return {
"PREDICTION_URL": service_instance.get_prediction_url(),
"HEALTH_CHECK_URL": service_instance.get_healthcheck_url(),
}
perform_delete_model(self, service, timeout=300, force=False)
Method to delete all configuration of a model server.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
service |
BaseService |
The service to delete. |
required |
timeout |
int |
Timeout in seconds to wait for the service to stop. |
300 |
force |
bool |
If True, force the service to stop. |
False |
Source code in zenml/integrations/huggingface/model_deployers/huggingface_model_deployer.py
def perform_delete_model(
self,
service: BaseService,
timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT,
force: bool = False,
) -> None:
"""Method to delete all configuration of a model server.
Args:
service: The service to delete.
timeout: Timeout in seconds to wait for the service to stop.
force: If True, force the service to stop.
"""
service = cast(HuggingFaceDeploymentService, service)
self._clean_up_existing_service(
existing_service=service, timeout=timeout, force=force
)
perform_deploy_model(self, id, config, timeout=300)
Create a new Hugging Face deployment service or update an existing one.
This should serve the supplied model and deployment configuration.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
id |
UUID |
the UUID of the model to be deployed with Hugging Face. |
required |
config |
ServiceConfig |
the configuration of the model to be deployed with Hugging Face. |
required |
timeout |
int |
the timeout in seconds to wait for the Hugging Face endpoint to be provisioned and successfully started or updated. If set to 0, the method will return immediately after the Hugging Face server is provisioned, without waiting for it to fully start. |
300 |
Returns:
Type | Description |
---|---|
BaseService |
The ZenML Hugging Face deployment service object that can be used to interact with the remote Hugging Face inference endpoint server. |
Source code in zenml/integrations/huggingface/model_deployers/huggingface_model_deployer.py
def perform_deploy_model(
self,
id: UUID,
config: ServiceConfig,
timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT,
) -> BaseService:
"""Create a new Hugging Face deployment service or update an existing one.
This should serve the supplied model and deployment configuration.
Args:
id: the UUID of the model to be deployed with Hugging Face.
config: the configuration of the model to be deployed with Hugging Face.
timeout: the timeout in seconds to wait for the Hugging Face endpoint
to be provisioned and successfully started or updated. If set
to 0, the method will return immediately after the Hugging Face
server is provisioned, without waiting for it to fully start.
Returns:
The ZenML Hugging Face deployment service object that can be used to
interact with the remote Hugging Face inference endpoint server.
"""
with track_handler(AnalyticsEvent.MODEL_DEPLOYED) as analytics_handler:
config = cast(HuggingFaceServiceConfig, config)
# create a new HuggingFaceDeploymentService instance
service = self._create_new_service(
id=id, timeout=timeout, config=config
)
logger.info(
f"Creating a new Hugging Face inference endpoint service: {service}"
)
# Add telemetry with metadata that gets the stack metadata and
# differentiates between pure model and custom code deployments
stack = Client().active_stack
stack_metadata = {
component_type.value: component.flavor
for component_type, component in stack.components.items()
}
analytics_handler.metadata = {
"store_type": Client().zen_store.type.value,
**stack_metadata,
}
return service
perform_start_model(self, service, timeout=300)
Method to start a model server.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
service |
BaseService |
The service to start. |
required |
timeout |
int |
Timeout in seconds to wait for the service to start. |
300 |
Returns:
Type | Description |
---|---|
BaseService |
The started service. |
Source code in zenml/integrations/huggingface/model_deployers/huggingface_model_deployer.py
def perform_start_model(
self,
service: BaseService,
timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT,
) -> BaseService:
"""Method to start a model server.
Args:
service: The service to start.
timeout: Timeout in seconds to wait for the service to start.
Returns:
The started service.
"""
service.start(timeout=timeout)
return service
perform_stop_model(self, service, timeout=300, force=False)
Method to stop a model server.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
service |
BaseService |
The service to stop. |
required |
timeout |
int |
Timeout in seconds to wait for the service to stop. |
300 |
force |
bool |
If True, force the service to stop. |
False |
Returns:
Type | Description |
---|---|
BaseService |
The stopped service. |
Source code in zenml/integrations/huggingface/model_deployers/huggingface_model_deployer.py
def perform_stop_model(
self,
service: BaseService,
timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT,
force: bool = False,
) -> BaseService:
"""Method to stop a model server.
Args:
service: The service to stop.
timeout: Timeout in seconds to wait for the service to stop.
force: If True, force the service to stop.
Returns:
The stopped service.
"""
service.stop(timeout=timeout, force=force)
return service
services
special
Initialization of the Hugging Face Service.
huggingface_deployment
Implementation of the Hugging Face Deployment service.
HuggingFaceDeploymentService (BaseDeploymentService)
Hugging Face model deployment service.
Attributes:
Name | Type | Description |
---|---|---|
SERVICE_TYPE |
ClassVar[zenml.services.service_type.ServiceType] |
a service type descriptor with information describing the Hugging Face deployment service class |
config |
HuggingFaceServiceConfig |
service configuration |
Source code in zenml/integrations/huggingface/services/huggingface_deployment.py
class HuggingFaceDeploymentService(BaseDeploymentService):
"""Hugging Face model deployment service.
Attributes:
SERVICE_TYPE: a service type descriptor with information describing
the Hugging Face deployment service class
config: service configuration
"""
SERVICE_TYPE = ServiceType(
name="huggingface-deployment",
type="model-serving",
flavor="huggingface",
description="Hugging Face inference endpoint prediction service",
)
config: HuggingFaceServiceConfig
status: HuggingFaceServiceStatus = Field(
default_factory=lambda: HuggingFaceServiceStatus()
)
def __init__(self, config: HuggingFaceServiceConfig, **attrs: Any):
"""Initialize the Hugging Face deployment service.
Args:
config: service configuration
attrs: additional attributes to set on the service
"""
super().__init__(config=config, **attrs)
def get_token(self) -> str:
"""Get the Hugging Face token.
Raises:
ValueError: If token not found.
Returns:
Hugging Face token.
"""
client = Client()
token = None
if self.config.secret_name:
secret = client.get_secret(self.config.secret_name)
token = secret.secret_values["token"]
else:
from zenml.integrations.huggingface.model_deployers.huggingface_model_deployer import (
HuggingFaceModelDeployer,
)
model_deployer = client.active_stack.model_deployer
if not isinstance(model_deployer, HuggingFaceModelDeployer):
raise ValueError(
"HuggingFaceModelDeployer is not active in the stack."
)
token = model_deployer.config.token or None
if not token:
raise ValueError("Token not found.")
return token
@property
def hf_endpoint(self) -> InferenceEndpoint:
"""Get the deployed Hugging Face inference endpoint.
Returns:
Huggingface inference endpoint.
"""
return get_inference_endpoint(
name=self._generate_an_endpoint_name(),
token=self.get_token(),
namespace=self.config.namespace,
)
@property
def prediction_url(self) -> Optional[str]:
"""The prediction URI exposed by the prediction service.
Returns:
The prediction URI exposed by the prediction service, or None if
the service is not yet ready.
"""
return self.hf_endpoint.url if self.is_running else None
@property
def inference_client(self) -> InferenceClient:
"""Get the Hugging Face InferenceClient from Inference Endpoint.
Returns:
Hugging Face inference client.
"""
return self.hf_endpoint.client
def provision(self) -> None:
"""Provision or update remote Hugging Face deployment instance.
Raises:
Exception: If any unexpected error while creating inference endpoint.
"""
try:
# Attempt to create and wait for the inference endpoint
hf_endpoint = create_inference_endpoint(
name=self._generate_an_endpoint_name(),
repository=self.config.repository,
framework=self.config.framework,
accelerator=self.config.accelerator,
instance_size=self.config.instance_size,
instance_type=self.config.instance_type,
region=self.config.region,
vendor=self.config.vendor,
account_id=self.config.account_id,
min_replica=self.config.min_replica,
max_replica=self.config.max_replica,
revision=self.config.revision,
task=self.config.task,
custom_image=self.config.custom_image,
type=self.config.endpoint_type,
token=self.get_token(),
namespace=self.config.namespace,
).wait(timeout=POLLING_TIMEOUT)
except Exception as e:
self.status.update_state(
new_state=ServiceState.ERROR, error=str(e)
)
# Catch-all for any other unexpected errors
raise Exception(
f"An unexpected error occurred while provisioning the Hugging Face inference endpoint: {e}"
)
# Check if the endpoint URL is available after provisioning
if hf_endpoint.url:
logger.info(
f"Hugging Face inference endpoint successfully deployed and available. Endpoint URL: {hf_endpoint.url}"
)
else:
logger.error(
"Failed to start Hugging Face inference endpoint service: No URL available, please check the Hugging Face console for more details."
)
def check_status(self) -> Tuple[ServiceState, str]:
"""Check the the current operational state of the Hugging Face deployment.
Returns:
The operational state of the Hugging Face deployment and a message
providing additional information about that state (e.g. a
description of the error, if one is encountered).
"""
try:
status = self.hf_endpoint.status
if status == InferenceEndpointStatus.RUNNING:
return (ServiceState.ACTIVE, "")
elif status == InferenceEndpointStatus.SCALED_TO_ZERO:
return (
ServiceState.SCALED_TO_ZERO,
"Hugging Face Inference Endpoint is scaled to zero, but still running. It will be started on demand.",
)
elif status == InferenceEndpointStatus.FAILED:
return (
ServiceState.ERROR,
"Hugging Face Inference Endpoint deployment is inactive or not found",
)
elif status == InferenceEndpointStatus.PENDING:
return (ServiceState.PENDING_STARTUP, "")
return (ServiceState.PENDING_STARTUP, "")
except (InferenceEndpointError, HfHubHTTPError):
return (
ServiceState.INACTIVE,
"Hugging Face Inference Endpoint deployment is inactive or not found",
)
def deprovision(self, force: bool = False) -> None:
"""Deprovision the remote Hugging Face deployment instance.
Args:
force: if True, the remote deployment instance will be
forcefully deprovisioned.
"""
try:
self.hf_endpoint.delete()
except HfHubHTTPError:
logger.error(
"Hugging Face Inference Endpoint is deleted or cannot be found."
)
def predict(self, data: "Any", max_new_tokens: int) -> "Any":
"""Make a prediction using the service.
Args:
data: input data
max_new_tokens: Number of new tokens to generate
Returns:
The prediction result.
Raises:
Exception: if the service is not running
NotImplementedError: if task is not supported.
"""
if not self.is_running:
raise Exception(
"Hugging Face endpoint inference service is not running. "
"Please start the service before making predictions."
)
if self.prediction_url is not None:
if self.hf_endpoint.task == "text-generation":
result = self.inference_client.task_generation(
data, max_new_tokens=max_new_tokens
)
else:
# TODO: Add support for all different supported tasks
raise NotImplementedError(
"Tasks other than text-generation is not implemented."
)
return result
def get_logs(
self, follow: bool = False, tail: Optional[int] = None
) -> Generator[str, bool, None]:
"""Retrieve the service logs.
Args:
follow: if True, the logs will be streamed as they are written
tail: only retrieve the last NUM lines of log output.
Returns:
A generator that can be accessed to get the service logs.
"""
logger.info(
"Hugging Face Endpoints provides access to the logs of "
"your Endpoints through the UI in the “Logs” tab of your Endpoint"
)
return # type: ignore
def _generate_an_endpoint_name(self) -> str:
"""Generate a unique name for the Hugging Face Inference Endpoint.
Returns:
A unique name for the Hugging Face Inference Endpoint.
"""
return (
f"{self.config.service_name}-{str(self.uuid)[:UUID_SLICE_LENGTH]}"
)
hf_endpoint: huggingface_hub.InferenceEndpoint
property
readonly
Get the deployed Hugging Face inference endpoint.
Returns:
Type | Description |
---|---|
huggingface_hub.InferenceEndpoint |
Huggingface inference endpoint. |
inference_client: huggingface_hub.InferenceClient
property
readonly
Get the Hugging Face InferenceClient from Inference Endpoint.
Returns:
Type | Description |
---|---|
huggingface_hub.InferenceClient |
Hugging Face inference client. |
prediction_url: Optional[str]
property
readonly
The prediction URI exposed by the prediction service.
Returns:
Type | Description |
---|---|
Optional[str] |
The prediction URI exposed by the prediction service, or None if the service is not yet ready. |
__init__(self, config, **attrs)
special
Initialize the Hugging Face deployment service.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
config |
HuggingFaceServiceConfig |
service configuration |
required |
attrs |
Any |
additional attributes to set on the service |
{} |
Source code in zenml/integrations/huggingface/services/huggingface_deployment.py
def __init__(self, config: HuggingFaceServiceConfig, **attrs: Any):
"""Initialize the Hugging Face deployment service.
Args:
config: service configuration
attrs: additional attributes to set on the service
"""
super().__init__(config=config, **attrs)
check_status(self)
Check the the current operational state of the Hugging Face deployment.
Returns:
Type | Description |
---|---|
Tuple[zenml.services.service_status.ServiceState, str] |
The operational state of the Hugging Face deployment and a message providing additional information about that state (e.g. a description of the error, if one is encountered). |
Source code in zenml/integrations/huggingface/services/huggingface_deployment.py
def check_status(self) -> Tuple[ServiceState, str]:
"""Check the the current operational state of the Hugging Face deployment.
Returns:
The operational state of the Hugging Face deployment and a message
providing additional information about that state (e.g. a
description of the error, if one is encountered).
"""
try:
status = self.hf_endpoint.status
if status == InferenceEndpointStatus.RUNNING:
return (ServiceState.ACTIVE, "")
elif status == InferenceEndpointStatus.SCALED_TO_ZERO:
return (
ServiceState.SCALED_TO_ZERO,
"Hugging Face Inference Endpoint is scaled to zero, but still running. It will be started on demand.",
)
elif status == InferenceEndpointStatus.FAILED:
return (
ServiceState.ERROR,
"Hugging Face Inference Endpoint deployment is inactive or not found",
)
elif status == InferenceEndpointStatus.PENDING:
return (ServiceState.PENDING_STARTUP, "")
return (ServiceState.PENDING_STARTUP, "")
except (InferenceEndpointError, HfHubHTTPError):
return (
ServiceState.INACTIVE,
"Hugging Face Inference Endpoint deployment is inactive or not found",
)
deprovision(self, force=False)
Deprovision the remote Hugging Face deployment instance.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
force |
bool |
if True, the remote deployment instance will be forcefully deprovisioned. |
False |
Source code in zenml/integrations/huggingface/services/huggingface_deployment.py
def deprovision(self, force: bool = False) -> None:
"""Deprovision the remote Hugging Face deployment instance.
Args:
force: if True, the remote deployment instance will be
forcefully deprovisioned.
"""
try:
self.hf_endpoint.delete()
except HfHubHTTPError:
logger.error(
"Hugging Face Inference Endpoint is deleted or cannot be found."
)
get_logs(self, follow=False, tail=None)
Retrieve the service logs.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
follow |
bool |
if True, the logs will be streamed as they are written |
False |
tail |
Optional[int] |
only retrieve the last NUM lines of log output. |
None |
Returns:
Type | Description |
---|---|
Generator[str, bool, NoneType] |
A generator that can be accessed to get the service logs. |
Source code in zenml/integrations/huggingface/services/huggingface_deployment.py
def get_logs(
self, follow: bool = False, tail: Optional[int] = None
) -> Generator[str, bool, None]:
"""Retrieve the service logs.
Args:
follow: if True, the logs will be streamed as they are written
tail: only retrieve the last NUM lines of log output.
Returns:
A generator that can be accessed to get the service logs.
"""
logger.info(
"Hugging Face Endpoints provides access to the logs of "
"your Endpoints through the UI in the “Logs” tab of your Endpoint"
)
return # type: ignore
get_token(self)
Get the Hugging Face token.
Exceptions:
Type | Description |
---|---|
ValueError |
If token not found. |
Returns:
Type | Description |
---|---|
str |
Hugging Face token. |
Source code in zenml/integrations/huggingface/services/huggingface_deployment.py
def get_token(self) -> str:
"""Get the Hugging Face token.
Raises:
ValueError: If token not found.
Returns:
Hugging Face token.
"""
client = Client()
token = None
if self.config.secret_name:
secret = client.get_secret(self.config.secret_name)
token = secret.secret_values["token"]
else:
from zenml.integrations.huggingface.model_deployers.huggingface_model_deployer import (
HuggingFaceModelDeployer,
)
model_deployer = client.active_stack.model_deployer
if not isinstance(model_deployer, HuggingFaceModelDeployer):
raise ValueError(
"HuggingFaceModelDeployer is not active in the stack."
)
token = model_deployer.config.token or None
if not token:
raise ValueError("Token not found.")
return token
predict(self, data, max_new_tokens)
Make a prediction using the service.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Any |
input data |
required |
max_new_tokens |
int |
Number of new tokens to generate |
required |
Returns:
Type | Description |
---|---|
Any |
The prediction result. |
Exceptions:
Type | Description |
---|---|
Exception |
if the service is not running |
NotImplementedError |
if task is not supported. |
Source code in zenml/integrations/huggingface/services/huggingface_deployment.py
def predict(self, data: "Any", max_new_tokens: int) -> "Any":
"""Make a prediction using the service.
Args:
data: input data
max_new_tokens: Number of new tokens to generate
Returns:
The prediction result.
Raises:
Exception: if the service is not running
NotImplementedError: if task is not supported.
"""
if not self.is_running:
raise Exception(
"Hugging Face endpoint inference service is not running. "
"Please start the service before making predictions."
)
if self.prediction_url is not None:
if self.hf_endpoint.task == "text-generation":
result = self.inference_client.task_generation(
data, max_new_tokens=max_new_tokens
)
else:
# TODO: Add support for all different supported tasks
raise NotImplementedError(
"Tasks other than text-generation is not implemented."
)
return result
provision(self)
Provision or update remote Hugging Face deployment instance.
Exceptions:
Type | Description |
---|---|
Exception |
If any unexpected error while creating inference endpoint. |
Source code in zenml/integrations/huggingface/services/huggingface_deployment.py
def provision(self) -> None:
"""Provision or update remote Hugging Face deployment instance.
Raises:
Exception: If any unexpected error while creating inference endpoint.
"""
try:
# Attempt to create and wait for the inference endpoint
hf_endpoint = create_inference_endpoint(
name=self._generate_an_endpoint_name(),
repository=self.config.repository,
framework=self.config.framework,
accelerator=self.config.accelerator,
instance_size=self.config.instance_size,
instance_type=self.config.instance_type,
region=self.config.region,
vendor=self.config.vendor,
account_id=self.config.account_id,
min_replica=self.config.min_replica,
max_replica=self.config.max_replica,
revision=self.config.revision,
task=self.config.task,
custom_image=self.config.custom_image,
type=self.config.endpoint_type,
token=self.get_token(),
namespace=self.config.namespace,
).wait(timeout=POLLING_TIMEOUT)
except Exception as e:
self.status.update_state(
new_state=ServiceState.ERROR, error=str(e)
)
# Catch-all for any other unexpected errors
raise Exception(
f"An unexpected error occurred while provisioning the Hugging Face inference endpoint: {e}"
)
# Check if the endpoint URL is available after provisioning
if hf_endpoint.url:
logger.info(
f"Hugging Face inference endpoint successfully deployed and available. Endpoint URL: {hf_endpoint.url}"
)
else:
logger.error(
"Failed to start Hugging Face inference endpoint service: No URL available, please check the Hugging Face console for more details."
)
HuggingFaceServiceConfig (HuggingFaceBaseConfig, ServiceConfig)
Hugging Face service configurations.
Source code in zenml/integrations/huggingface/services/huggingface_deployment.py
class HuggingFaceServiceConfig(HuggingFaceBaseConfig, ServiceConfig):
"""Hugging Face service configurations."""
HuggingFaceServiceStatus (ServiceStatus)
Hugging Face service status.
Source code in zenml/integrations/huggingface/services/huggingface_deployment.py
class HuggingFaceServiceStatus(ServiceStatus):
"""Hugging Face service status."""
steps
special
Initialization for Hugging Face model deployer step.
accelerate_runner
Step function to run any ZenML step using Accelerate.
run_with_accelerate(step_function, num_processes=None, use_cpu=False)
Run a function with accelerate.
Accelerate package: https://huggingface.co/docs/accelerate/en/index
Examples:
from zenml import step, pipeline
from zenml.integrations.hugginface.steps import run_with_accelerate
@step
def training_step(some_param: int, ...):
# your training code is below
...
@pipeline
def training_pipeline(some_param: int, ...):
run_with_accelerate(training_step, num_processes=4)(some_param, ...)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
step_function |
BaseStep |
The step function to run. |
required |
num_processes |
Optional[int] |
The number of processes to use. |
None |
use_cpu |
bool |
Whether to use the CPU. |
False |
Returns:
Type | Description |
---|---|
BaseStep |
The accelerate-enabled version of the step. |
Source code in zenml/integrations/huggingface/steps/accelerate_runner.py
def run_with_accelerate(
step_function: BaseStep,
num_processes: Optional[int] = None,
use_cpu: bool = False,
) -> BaseStep:
"""Run a function with accelerate.
Accelerate package: https://huggingface.co/docs/accelerate/en/index
Example:
```python
from zenml import step, pipeline
from zenml.integrations.hugginface.steps import run_with_accelerate
@step
def training_step(some_param: int, ...):
# your training code is below
...
@pipeline
def training_pipeline(some_param: int, ...):
run_with_accelerate(training_step, num_processes=4)(some_param, ...)
```
Args:
step_function: The step function to run.
num_processes: The number of processes to use.
use_cpu: Whether to use the CPU.
Returns:
The accelerate-enabled version of the step.
"""
def _decorator(entrypoint: F) -> F:
@functools.wraps(entrypoint)
def inner(*args: Any, **kwargs: Any) -> Any:
if args:
raise ValueError(
"Accelerated steps do not support positional arguments."
)
if not use_cpu:
import torch
logger.info("Starting accelerate job...")
device_count = torch.cuda.device_count()
if num_processes is None:
_num_processes = device_count
else:
if num_processes > device_count:
logger.warning(
f"Number of processes ({num_processes}) is greater than "
f"the number of available GPUs ({device_count}). Using all GPUs."
)
_num_processes = device_count
else:
_num_processes = num_processes
else:
_num_processes = num_processes or 1
with create_cli_wrapped_script(
entrypoint, flavour="accelerate"
) as (
script_path,
output_path,
):
commands = ["--num_processes", str(_num_processes)]
if use_cpu:
commands += [
"--cpu",
"--num_cpu_threads_per_process",
"10",
]
commands.append(str(script_path.absolute()))
for k, v in kwargs.items():
k = _cli_arg_name(k)
if isinstance(v, bool):
if v:
commands.append(f"--{k}")
elif isinstance(v, str):
commands += [f"--{k}", '"{v}"']
elif type(v) in (list, tuple, set):
for each in v:
commands.append(f"--{k}")
if isinstance(each, str):
commands.append(f'"{each}"')
else:
commands.append(f"{each}")
else:
commands += [f"--{k}", f"{v}"]
logger.debug(commands)
parser = launch_command_parser()
args = parser.parse_args(commands)
try:
launch_command(args)
except Exception as e:
logger.error(
"Accelerate training job failed... See error message for details."
)
raise RuntimeError(
"Accelerate training job failed."
) from e
else:
logger.info(
"Accelerate training job finished successfully."
)
return pickle.load(open(output_path, "rb"))
return cast(F, inner)
setattr(step_function, "entrypoint", _decorator(step_function.entrypoint))
return step_function
huggingface_deployer
Implementation of the Hugging Face Deployer step.