Pipelines

`zenml.pipelines` `special`

A ZenML pipeline consists of tasks that execute in order and yield artifacts.

The artifacts are automatically stored within the artifact store and metadata is tracked by ZenML. Each individual task within a pipeline is known as a step. The standard pipelines within ZenML are designed to have easy interfaces to add pre-decided steps, with the order also pre-decided. Other sorts of pipelines can be created as well from scratch, building on the BasePipeline class.

Pipelines can be written as simple functions. They are created by using decorators appropriate to the specific use case you have. The moment it is run, a pipeline is compiled and passed directly to the orchestrator.

`base_pipeline`

Legacy ZenML pipeline class definition.

`BasePipeline (Pipeline, ABC)`

Legacy pipeline class.

Source code in zenml/pipelines/base_pipeline.py

class BasePipeline(Pipeline, ABC):
    """Legacy pipeline class."""

    _CLASS_CONFIGURATION: ClassVar[Optional[Dict[str, Any]]] = None

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        """Initializes a pipeline.

        Args:
            *args: Initialization arguments.
            **kwargs: Initialization keyword arguments.
        """
        config = self._CLASS_CONFIGURATION or {}
        pipeline_name = (
            config.pop(PARAM_PIPELINE_NAME, None) or self.__class__.__name__
        )
        self._steps = self._verify_steps(
            *args, __name__=pipeline_name, **kwargs
        )

        def entrypoint() -> None:
            self.connect(**self._steps)

        super().__init__(
            name=pipeline_name,
            entrypoint=entrypoint,
            **config,
        )

    @property
    def steps(self) -> Dict[str, BaseStep]:
        """Returns the steps of the pipeline.

        Returns:
            The steps of the pipeline.
        """
        return self._steps

    @abstractmethod
    def connect(self, *args: BaseStep, **kwargs: BaseStep) -> None:
        """Abstract method that connects the pipeline steps.

        Args:
            *args: Connect method arguments.
            **kwargs: Connect method keyword arguments.
        """
        raise NotImplementedError

    def resolve(self) -> "Source":
        """Resolves the pipeline.

        Returns:
            The pipeline source.
        """
        return source_utils.resolve(self.__class__)

    @property
    def source_object(self) -> Any:
        """The source object of this pipeline.

        Returns:
            The source object of this pipeline.
        """
        return self.connect

    def run(
        self,
        *,
        run_name: Optional[str] = None,
        enable_cache: Optional[bool] = None,
        enable_artifact_metadata: Optional[bool] = None,
        enable_artifact_visualization: Optional[bool] = None,
        enable_step_logs: Optional[bool] = None,
        schedule: Optional[Schedule] = None,
        build: Union[str, "UUID", "PipelineBuildBaseModel", None] = None,
        settings: Optional[Mapping[str, "SettingsOrDict"]] = None,
        step_configurations: Optional[
            Mapping[str, "StepConfigurationUpdateOrDict"]
        ] = None,
        extra: Optional[Dict[str, Any]] = None,
        config_path: Optional[str] = None,
        unlisted: bool = False,
        prevent_build_reuse: bool = False,
    ) -> None:
        """Runs the pipeline on the active stack.

        Args:
            run_name: Name of the pipeline run.
            enable_cache: If caching should be enabled for this pipeline run.
            enable_artifact_metadata: If artifact metadata should be enabled
                for this pipeline run.
            enable_artifact_visualization: If artifact visualization should be
                enabled for this pipeline run.
            enable_step_logs: If step logs should be enabled for this pipeline
                run.
            schedule: Optional schedule to use for the run.
            build: Optional build to use for the run.
            settings: Settings for this pipeline run.
            step_configurations: Configurations for steps of the pipeline.
            extra: Extra configurations for this pipeline run.
            config_path: Path to a yaml configuration file. This file will
                be parsed as a
                `zenml.config.pipeline_configurations.PipelineRunConfiguration`
                object. Options provided in this file will be overwritten by
                options provided in code using the other arguments of this
                method.
            unlisted: Whether the pipeline run should be unlisted (not assigned
                to any pipeline).
            prevent_build_reuse: Whether to prevent the reuse of a build.
        """
        pipeline_copy = self.with_options(
            run_name=run_name,
            schedule=schedule,
            build=build,
            step_configurations=step_configurations,
            config_path=config_path,
            unlisted=unlisted,
            prevent_build_reuse=prevent_build_reuse,
        )
        new_run_args = dict_utils.remove_none_values(
            {
                "enable_cache": enable_cache,
                "enable_artifact_metadata": enable_artifact_metadata,
                "enable_artifact_visualization": enable_artifact_visualization,
                "enable_step_logs": enable_step_logs,
                "settings": settings,
                "extra": extra,
            }
        )
        pipeline_copy._run_args.update(new_run_args)

        pipeline_copy()

    def _compute_invocation_id(
        self,
        step: "BaseStep",
        custom_id: Optional[str] = None,
        allow_suffix: bool = True,
    ) -> str:
        """Compute the invocation ID.

        Args:
            step: The step for which to compute the ID.
            custom_id: Custom ID to use for the invocation.
            allow_suffix: Whether a suffix can be appended to the invocation
                ID.

        Returns:
            The invocation ID.
        """
        custom_id = getattr(step, TEMPLATE_NAME_ATTRIBUTE, None)

        return super()._compute_invocation_id(
            step=step, custom_id=custom_id, allow_suffix=False
        )

    def _verify_steps(
        self, *args: Any, __name__: str, **kwargs: Any
    ) -> Dict[str, "BaseStep"]:
        """Verifies the initialization args and kwargs of this pipeline.

        This method makes sure that no missing/unexpected arguments or
        arguments of a wrong type are passed when creating a pipeline.

        Args:
            *args: The args passed to the init method of this pipeline.
            __name__: The pipeline name. The naming of this argument is to avoid
                conflicts with other arguments.
            **kwargs: The kwargs passed to the init method of this pipeline.

        Raises:
            PipelineInterfaceError: If there are too many/few arguments or
                arguments with a wrong name/type.

        Returns:
            The verified steps.
        """
        signature = inspect.signature(self.connect, follow_wrapped=True)

        try:
            bound_args = signature.bind(*args, **kwargs)
        except TypeError as e:
            raise PipelineInterfaceError(
                f"Wrong arguments when initializing pipeline '{__name__}': {e}"
            ) from e

        steps = {}

        for key, potential_step in bound_args.arguments.items():
            step_class = type(potential_step)

            if inspect.isclass(potential_step) and issubclass(
                potential_step, BaseStep
            ):
                raise PipelineInterfaceError(
                    f"Wrong argument type (`{step_class}`) for argument "
                    f"'{key}' of pipeline '{__name__}'. "
                    f"A `BaseStep` subclass was provided instead of an "
                    f"instance. "
                    f"This might have been caused due to missing brackets of "
                    f"your steps when creating a pipeline with `@step` "
                    f"decorated functions, "
                    f"for which the correct syntax is `pipeline(step=step())`."
                )

            if not isinstance(potential_step, BaseStep):
                raise PipelineInterfaceError(
                    f"Wrong argument type (`{step_class}`) for argument "
                    f"'{key}' of pipeline '{__name__}'. Only "
                    f"`@step` decorated functions or instances of `BaseStep` "
                    f"subclasses can be used as arguments when creating "
                    f"a pipeline."
                )

            steps[key] = potential_step
            setattr(potential_step, TEMPLATE_NAME_ATTRIBUTE, key)

        return steps

`source_object: Any` `property` `readonly`

The source object of this pipeline.

Returns:

Type	Description
`Any`	The source object of this pipeline.

`steps: Dict[str, zenml.steps.base_step.BaseStep]` `property` `readonly`

Returns the steps of the pipeline.

Returns:

Type	Description
`Dict[str, zenml.steps.base_step.BaseStep]`	The steps of the pipeline.

`init(self, *args, **kwargs)` `special`

Initializes a pipeline.

Parameters:

Name	Type	Description	Default
`*args`	`Any`	Initialization arguments.	`()`
`**kwargs`	`Any`	Initialization keyword arguments.	`{}`

Source code in zenml/pipelines/base_pipeline.py

def __init__(self, *args: Any, **kwargs: Any) -> None:
    """Initializes a pipeline.

    Args:
        *args: Initialization arguments.
        **kwargs: Initialization keyword arguments.
    """
    config = self._CLASS_CONFIGURATION or {}
    pipeline_name = (
        config.pop(PARAM_PIPELINE_NAME, None) or self.__class__.__name__
    )
    self._steps = self._verify_steps(
        *args, __name__=pipeline_name, **kwargs
    )

    def entrypoint() -> None:
        self.connect(**self._steps)

    super().__init__(
        name=pipeline_name,
        entrypoint=entrypoint,
        **config,
    )

`connect(self, *args, **kwargs)`

Abstract method that connects the pipeline steps.

Parameters:

Name	Type	Description	Default
`*args`	`BaseStep`	Connect method arguments.	`()`
`**kwargs`	`BaseStep`	Connect method keyword arguments.	`{}`

Source code in zenml/pipelines/base_pipeline.py

@abstractmethod
def connect(self, *args: BaseStep, **kwargs: BaseStep) -> None:
    """Abstract method that connects the pipeline steps.

    Args:
        *args: Connect method arguments.
        **kwargs: Connect method keyword arguments.
    """
    raise NotImplementedError

`resolve(self)`

Resolves the pipeline.

Returns:

Type	Description
`Source`	The pipeline source.

Source code in zenml/pipelines/base_pipeline.py

def resolve(self) -> "Source":
    """Resolves the pipeline.

    Returns:
        The pipeline source.
    """
    return source_utils.resolve(self.__class__)

`run(self, *, run_name=None, enable_cache=None, enable_artifact_metadata=None, enable_artifact_visualization=None, enable_step_logs=None, schedule=None, build=None, settings=None, step_configurations=None, extra=None, config_path=None, unlisted=False, prevent_build_reuse=False)`

Runs the pipeline on the active stack.

Parameters:

Name	Type	Description	Default
`run_name`	`Optional[str]`	Name of the pipeline run.	`None`
`enable_cache`	`Optional[bool]`	If caching should be enabled for this pipeline run.	`None`
`enable_artifact_metadata`	`Optional[bool]`	If artifact metadata should be enabled for this pipeline run.	`None`
`enable_artifact_visualization`	`Optional[bool]`	If artifact visualization should be enabled for this pipeline run.	`None`
`enable_step_logs`	`Optional[bool]`	If step logs should be enabled for this pipeline run.	`None`
`schedule`	`Optional[zenml.config.schedule.Schedule]`	Optional schedule to use for the run.	`None`
`build`	`Union[str, UUID, PipelineBuildBaseModel]`	Optional build to use for the run.	`None`
`settings`	`Optional[Mapping[str, SettingsOrDict]]`	Settings for this pipeline run.	`None`
`step_configurations`	`Optional[Mapping[str, StepConfigurationUpdateOrDict]]`	Configurations for steps of the pipeline.	`None`
`extra`	`Optional[Dict[str, Any]]`	Extra configurations for this pipeline run.	`None`
`config_path`	`Optional[str]`	Path to a yaml configuration file. This file will be parsed as a `zenml.config.pipeline_configurations.PipelineRunConfiguration` object. Options provided in this file will be overwritten by options provided in code using the other arguments of this method.	`None`
`unlisted`	`bool`	Whether the pipeline run should be unlisted (not assigned to any pipeline).	`False`
`prevent_build_reuse`	`bool`	Whether to prevent the reuse of a build.	`False`

Source code in zenml/pipelines/base_pipeline.py

def run(
    self,
    *,
    run_name: Optional[str] = None,
    enable_cache: Optional[bool] = None,
    enable_artifact_metadata: Optional[bool] = None,
    enable_artifact_visualization: Optional[bool] = None,
    enable_step_logs: Optional[bool] = None,
    schedule: Optional[Schedule] = None,
    build: Union[str, "UUID", "PipelineBuildBaseModel", None] = None,
    settings: Optional[Mapping[str, "SettingsOrDict"]] = None,
    step_configurations: Optional[
        Mapping[str, "StepConfigurationUpdateOrDict"]
    ] = None,
    extra: Optional[Dict[str, Any]] = None,
    config_path: Optional[str] = None,
    unlisted: bool = False,
    prevent_build_reuse: bool = False,
) -> None:
    """Runs the pipeline on the active stack.

    Args:
        run_name: Name of the pipeline run.
        enable_cache: If caching should be enabled for this pipeline run.
        enable_artifact_metadata: If artifact metadata should be enabled
            for this pipeline run.
        enable_artifact_visualization: If artifact visualization should be
            enabled for this pipeline run.
        enable_step_logs: If step logs should be enabled for this pipeline
            run.
        schedule: Optional schedule to use for the run.
        build: Optional build to use for the run.
        settings: Settings for this pipeline run.
        step_configurations: Configurations for steps of the pipeline.
        extra: Extra configurations for this pipeline run.
        config_path: Path to a yaml configuration file. This file will
            be parsed as a
            `zenml.config.pipeline_configurations.PipelineRunConfiguration`
            object. Options provided in this file will be overwritten by
            options provided in code using the other arguments of this
            method.
        unlisted: Whether the pipeline run should be unlisted (not assigned
            to any pipeline).
        prevent_build_reuse: Whether to prevent the reuse of a build.
    """
    pipeline_copy = self.with_options(
        run_name=run_name,
        schedule=schedule,
        build=build,
        step_configurations=step_configurations,
        config_path=config_path,
        unlisted=unlisted,
        prevent_build_reuse=prevent_build_reuse,
    )
    new_run_args = dict_utils.remove_none_values(
        {
            "enable_cache": enable_cache,
            "enable_artifact_metadata": enable_artifact_metadata,
            "enable_artifact_visualization": enable_artifact_visualization,
            "enable_step_logs": enable_step_logs,
            "settings": settings,
            "extra": extra,
        }
    )
    pipeline_copy._run_args.update(new_run_args)

    pipeline_copy()

`pipeline_decorator`

Legacy ZenML pipeline decorator definition.

`pipeline(_func=None, *, name=None, enable_cache=None, enable_artifact_metadata=None, enable_artifact_visualization=None, enable_step_logs=None, settings=None, extra=None, on_failure=None, on_success=None, model_config=None)`

Outer decorator function for the creation of a ZenML pipeline.

Parameters:

Name	Type	Description	Default
`_func`	`Optional[~F]`	The decorated function.	`None`
`name`	`Optional[str]`	The name of the pipeline. If left empty, the name of the decorated function will be used as a fallback.	`None`
`enable_cache`	`Optional[bool]`	Whether to use caching or not.	`None`
`enable_artifact_metadata`	`Optional[bool]`	Whether to enable artifact metadata or not.	`None`
`enable_artifact_visualization`	`Optional[bool]`	Whether to enable artifact visualization.	`None`
`enable_step_logs`	`Optional[bool]`	Whether to enable step logs.	`None`
`settings`	`Optional[Dict[str, SettingsOrDict]]`	Settings for this pipeline.	`None`
`extra`	`Optional[Dict[str, Any]]`	Extra configurations for this pipeline.	`None`
`on_failure`	`Optional[HookSpecification]`	Callback function in event of failure of the step. Can be a function with a single argument of type `BaseException`, or a source path to such a function (e.g. `module.my_function`).	`None`
`on_success`	`Optional[HookSpecification]`	Callback function in event of success of the step. Can be a function with no arguments, or a source path to such a function (e.g. `module.my_function`).	`None`
`model_config`	`Optional[ModelConfig]`	Model(Version) configuration for this step as `ModelConfig` instance.	`None`

Returns:

Type	Description
`Union[Type[zenml.pipelines.base_pipeline.BasePipeline], Callable[[~F], Type[zenml.pipelines.base_pipeline.BasePipeline]]]`	the inner decorator which creates the pipeline class based on the ZenML BasePipeline

Source code in zenml/pipelines/pipeline_decorator.py

def pipeline(
    _func: Optional[F] = None,
    *,
    name: Optional[str] = None,
    enable_cache: Optional[bool] = None,
    enable_artifact_metadata: Optional[bool] = None,
    enable_artifact_visualization: Optional[bool] = None,
    enable_step_logs: Optional[bool] = None,
    settings: Optional[Dict[str, "SettingsOrDict"]] = None,
    extra: Optional[Dict[str, Any]] = None,
    on_failure: Optional["HookSpecification"] = None,
    on_success: Optional["HookSpecification"] = None,
    model_config: Optional["ModelConfig"] = None,
) -> Union[Type[BasePipeline], Callable[[F], Type[BasePipeline]]]:
    """Outer decorator function for the creation of a ZenML pipeline.

    Args:
        _func: The decorated function.
        name: The name of the pipeline. If left empty, the name of the
            decorated function will be used as a fallback.
        enable_cache: Whether to use caching or not.
        enable_artifact_metadata: Whether to enable artifact metadata or not.
        enable_artifact_visualization: Whether to enable artifact visualization.
        enable_step_logs: Whether to enable step logs.
        settings: Settings for this pipeline.
        extra: Extra configurations for this pipeline.
        on_failure: Callback function in event of failure of the step. Can be a
            function with a single argument of type `BaseException`, or a source
            path to such a function (e.g. `module.my_function`).
        on_success: Callback function in event of success of the step. Can be a
            function with no arguments, or a source path to such a function
            (e.g. `module.my_function`).
        model_config: Model(Version) configuration for this step as `ModelConfig` instance.

    Returns:
        the inner decorator which creates the pipeline class based on the
        ZenML BasePipeline
    """

    def inner_decorator(func: F) -> Type[BasePipeline]:
        pipeline_name = name or func.__name__
        logger.warning(
            "The `@pipeline` decorator that you used to define your "
            f"{pipeline_name} pipeline is deprecated. Check out the 0.40.0 "
            "migration guide for more information on how to migrate your "
            "pipelines to the new syntax: "
            "https://docs.zenml.io/reference/migration-guide/migration-zero-forty.html"
        )

        return type(
            name or func.__name__,
            (BasePipeline,),
            {
                PIPELINE_INNER_FUNC_NAME: staticmethod(func),  # type: ignore[arg-type]
                CLASS_CONFIGURATION: {
                    PARAM_PIPELINE_NAME: name,
                    PARAM_ENABLE_CACHE: enable_cache,
                    PARAM_ENABLE_ARTIFACT_METADATA: enable_artifact_metadata,
                    PARAM_ENABLE_ARTIFACT_VISUALIZATION: enable_artifact_visualization,
                    PARAM_ENABLE_STEP_LOGS: enable_step_logs,
                    PARAM_SETTINGS: settings,
                    PARAM_EXTRA_OPTIONS: extra,
                    PARAM_ON_FAILURE: on_failure,
                    PARAM_ON_SUCCESS: on_success,
                    PARAM_MODEL_CONFIG: model_config,
                },
                "__module__": func.__module__,
                "__doc__": func.__doc__,
            },
        )

    return inner_decorator if _func is None else inner_decorator(_func)

Pipelines

zenml.pipelines special

base_pipeline

BasePipeline (Pipeline, ABC)

source_object: Any property readonly

steps: Dict[str, zenml.steps.base_step.BaseStep] property readonly

__init__(self, *args, **kwargs) special

connect(self, *args, **kwargs)

resolve(self)

run(self, *, run_name=None, enable_cache=None, enable_artifact_metadata=None, enable_artifact_visualization=None, enable_step_logs=None, schedule=None, build=None, settings=None, step_configurations=None, extra=None, config_path=None, unlisted=False, prevent_build_reuse=False)

pipeline_decorator

pipeline(_func=None, *, name=None, enable_cache=None, enable_artifact_metadata=None, enable_artifact_visualization=None, enable_step_logs=None, settings=None, extra=None, on_failure=None, on_success=None, model_config=None)

`zenml.pipelines` `special`

`base_pipeline`

`BasePipeline (Pipeline, ABC)`

`source_object: Any` `property` `readonly`

`steps: Dict[str, zenml.steps.base_step.BaseStep]` `property` `readonly`

`init(self, *args, **kwargs)` `special`

`connect(self, *args, **kwargs)`

`resolve(self)`

`run(self, *, run_name=None, enable_cache=None, enable_artifact_metadata=None, enable_artifact_visualization=None, enable_step_logs=None, schedule=None, build=None, settings=None, step_configurations=None, extra=None, config_path=None, unlisted=False, prevent_build_reuse=False)`

`pipeline_decorator`

`pipeline(_func=None, *, name=None, enable_cache=None, enable_artifact_metadata=None, enable_artifact_visualization=None, enable_step_logs=None, settings=None, extra=None, on_failure=None, on_success=None, model_config=None)`