Logging

`zenml.logging`

Modules

`step_logging`

ZenML logging handler.

Classes

`PipelineLogsStorage(logs_uri: str, artifact_store: BaseArtifactStore, max_queue_size: int = LOGS_STORAGE_MAX_QUEUE_SIZE, queue_timeout: int = LOGS_STORAGE_QUEUE_TIMEOUT, write_interval: int = LOGS_WRITE_INTERVAL_SECONDS, merge_files_interval: int = LOGS_MERGE_INTERVAL_SECONDS)`

Helper class which buffers and stores logs to a given URI using a background thread.

Initialization.

Parameters:

Name	Type	Description	Default
`logs_uri`	`str`	the URI of the log file or folder.	required
`artifact_store`	`BaseArtifactStore`	Artifact Store from the current step context	required
`max_queue_size`	`int`	maximum number of individual messages to queue.	`LOGS_STORAGE_MAX_QUEUE_SIZE`
`queue_timeout`	`int`	timeout in seconds for putting items in queue when full. - Positive value: Wait N seconds, then drop logs if queue still full - Negative value: Block indefinitely until queue has space (never drop logs)	`LOGS_STORAGE_QUEUE_TIMEOUT`
`write_interval`	`int`	the amount of seconds before the created files get written to the artifact store.	`LOGS_WRITE_INTERVAL_SECONDS`
`merge_files_interval`	`int`	the amount of seconds before the created files get merged into a single file.	`LOGS_MERGE_INTERVAL_SECONDS`

Source code in src/zenml/logging/step_logging.py

def __init__(
    self,
    logs_uri: str,
    artifact_store: "BaseArtifactStore",
    max_queue_size: int = LOGS_STORAGE_MAX_QUEUE_SIZE,
    queue_timeout: int = LOGS_STORAGE_QUEUE_TIMEOUT,
    write_interval: int = LOGS_WRITE_INTERVAL_SECONDS,
    merge_files_interval: int = LOGS_MERGE_INTERVAL_SECONDS,
) -> None:
    """Initialization.

    Args:
        logs_uri: the URI of the log file or folder.
        artifact_store: Artifact Store from the current step context
        max_queue_size: maximum number of individual messages to queue.
        queue_timeout: timeout in seconds for putting items in queue when full.
            - Positive value: Wait N seconds, then drop logs if queue still full
            - Negative value: Block indefinitely until queue has space (never drop logs)
        write_interval: the amount of seconds before the created files
            get written to the artifact store.
        merge_files_interval: the amount of seconds before the created files
            get merged into a single file.
    """
    # Parameters
    self.logs_uri = logs_uri
    self.max_queue_size = max_queue_size
    self.queue_timeout = queue_timeout
    self.write_interval = write_interval
    self.merge_files_interval = merge_files_interval

    # State
    self.artifact_store = artifact_store

    # Immutable filesystems state
    self.last_merge_time = time.time()

    # Queue and log storage thread for async processing
    self.log_queue: queue.Queue[str] = queue.Queue(maxsize=max_queue_size)
    self.log_storage_thread: Optional[threading.Thread] = None
    self.shutdown_event = threading.Event()
    self.merge_event = threading.Event()

    # Start the log storage thread
    self._start_log_storage_thread()

Functions

merge_log_files(merge_all_files: bool = False) -> None

Merges all log files into one in the given URI.

Called on the logging context exit.

Parameters:

Name	Type	Description	Default
`merge_all_files`	`bool`	whether to merge all files or only raw files	`False`

Source code in src/zenml/logging/step_logging.py

def merge_log_files(self, merge_all_files: bool = False) -> None:
    """Merges all log files into one in the given URI.

    Called on the logging context exit.

    Args:
        merge_all_files: whether to merge all files or only raw files
    """
    # If the artifact store is immutable, merge the log files
    if self.artifact_store.config.IS_IMMUTABLE_FILESYSTEM:
        merged_file_suffix = "_merged"
        files_ = self.artifact_store.listdir(self.logs_uri)
        if not merge_all_files:
            # already merged files will not be merged again
            files_ = [
                f for f in files_ if merged_file_suffix not in str(f)
            ]
        file_name_ = self._get_timestamped_filename(
            suffix=merged_file_suffix
        )
        if len(files_) > 1:
            files_.sort()
            logger.debug("Log files count: %s", len(files_))

            missing_files = set()
            # dump all logs to a local file first
            with self.artifact_store.open(
                os.path.join(self.logs_uri, file_name_), "w"
            ) as merged_file:
                for file in files_:
                    try:
                        merged_file.write(
                            str(
                                _load_file_from_artifact_store(
                                    os.path.join(self.logs_uri, str(file)),
                                    artifact_store=self.artifact_store,
                                    mode="r",
                                )
                            )
                        )
                    except DoesNotExistException:
                        missing_files.add(file)

            # clean up left over files
            for file in files_:
                if file not in missing_files:
                    self.artifact_store.remove(
                        os.path.join(self.logs_uri, str(file))
                    )

        # Update the last merge time
        self.last_merge_time = time.time()

send_merge_event() -> None

Send a merge event to the log storage thread.

Source code in src/zenml/logging/step_logging.py

def send_merge_event(self) -> None:
    """Send a merge event to the log storage thread."""
    self.merge_event.set()

write(text: str) -> None

Main write method that sends individual messages directly to queue.

Parameters:

Name	Type	Description	Default
`text`	`str`	the incoming string.	required

Source code in src/zenml/logging/step_logging.py

def write(self, text: str) -> None:
    """Main write method that sends individual messages directly to queue.

    Args:
        text: the incoming string.
    """
    # Skip empty lines
    if text == "\n":
        return

    # If the current thread is the log storage thread, do nothing
    # to prevent recursion when the storage thread itself generates logs
    if (
        self.log_storage_thread
        and threading.current_thread() == self.log_storage_thread
    ):
        return

    # If the current thread is the fsspec IO thread, do nothing
    if self._is_fsspec_io_thread:
        return

    try:
        # Format the message with timestamp
        timestamp = utc_now().strftime("%Y-%m-%d %H:%M:%S")
        formatted_message = (
            f"[{timestamp} UTC] {remove_ansi_escape_codes(text)}"
        )
        formatted_message = formatted_message.rstrip()

        # Send individual message directly to queue
        if not self.shutdown_event.is_set():
            try:
                if self.queue_timeout < 0:
                    # Negative timeout = block indefinitely until queue has space
                    # Guarantees no log loss but may hang application
                    self.log_queue.put(formatted_message)
                else:
                    # Positive timeout = wait specified time then drop logs
                    # Prevents application hanging but may lose logs
                    self.log_queue.put(
                        formatted_message, timeout=self.queue_timeout
                    )
            except queue.Full:
                # This only happens with positive timeout
                # Queue is full - just skip this message to avoid blocking
                # Better to drop logs than hang the application
                pass

    except Exception:
        # Silently ignore errors to prevent recursion
        pass

write_buffer(buffer_to_write: List[str]) -> None

Write the given buffer to file. This runs in the log storage thread.

Parameters:

Name	Type	Description	Default
`buffer_to_write`	`List[str]`	The buffer contents to write to file.	required

Source code in src/zenml/logging/step_logging.py

def write_buffer(self, buffer_to_write: List[str]) -> None:
    """Write the given buffer to file. This runs in the log storage thread.

    Args:
        buffer_to_write: The buffer contents to write to file.
    """
    if not buffer_to_write:
        return

    # The configured logging handler uses a lock to ensure that
    # logs generated by different threads are not interleaved.
    # Given that most artifact stores are based on fsspec, which
    # use a separate thread for async operations, it may happen that
    # the fsspec library itself will log something, which will end
    # up in a deadlock.
    # To avoid this, we temporarily disable the lock in the logging
    # handler while writing to the file.
    logging_handler = None
    logging_lock = None
    try:
        # Only try to access logging handler if it exists
        root_logger = logging.getLogger()
        if root_logger.handlers:
            logging_handler = root_logger.handlers[0]
            logging_lock = getattr(logging_handler, "lock", None)
            if logging_lock:
                logging_handler.lock = None

        # If the artifact store is immutable, write the buffer to a new file
        if self.artifact_store.config.IS_IMMUTABLE_FILESYSTEM:
            _logs_uri = self._get_timestamped_filename()
            with self.artifact_store.open(
                os.path.join(
                    self.logs_uri,
                    _logs_uri,
                ),
                "w",
            ) as file:
                for message in buffer_to_write:
                    file.write(f"{message}\n")

        # If the artifact store is mutable, append the buffer to the existing file
        else:
            with self.artifact_store.open(self.logs_uri, "a") as file:
                for message in buffer_to_write:
                    file.write(f"{message}\n")
            self.artifact_store._remove_previous_file_versions(
                self.logs_uri
            )

    finally:
        # Re-enable the logging lock
        if logging_handler and logging_lock:
            logging_handler.lock = logging_lock

`PipelineLogsStorageContext(logs_uri: str, artifact_store: BaseArtifactStore, prepend_step_name: bool = True)`

Context manager which patches stdout and stderr during pipeline run execution.

Initializes and prepares a storage object.

Parameters:

Name	Type	Description	Default
`logs_uri`	`str`	the URI of the logs file.	required
`artifact_store`	`BaseArtifactStore`	Artifact Store from the current pipeline run context.	required
`prepend_step_name`	`bool`	Whether to prepend the step name to the logs.	`True`

Source code in src/zenml/logging/step_logging.py

def __init__(
    self,
    logs_uri: str,
    artifact_store: "BaseArtifactStore",
    prepend_step_name: bool = True,
) -> None:
    """Initializes and prepares a storage object.

    Args:
        logs_uri: the URI of the logs file.
        artifact_store: Artifact Store from the current pipeline run context.
        prepend_step_name: Whether to prepend the step name to the logs.
    """
    self.storage = PipelineLogsStorage(
        logs_uri=logs_uri, artifact_store=artifact_store
    )
    self.prepend_step_name = prepend_step_name
    self._original_methods_saved = False

Functions

`fetch_logs(zen_store: BaseZenStore, artifact_store_id: Union[str, UUID], logs_uri: str, offset: int = 0, length: int = 1024 * 1024 * 16, strip_timestamp: bool = False) -> str`

Fetches the logs from the artifact store.

Parameters:

Name	Type	Description	Default
`zen_store`	`BaseZenStore`	The store in which the artifact is stored.	required
`artifact_store_id`	`Union[str, UUID]`	The ID of the artifact store.	required
`logs_uri`	`str`	The URI of the artifact.	required
`offset`	`int`	The offset from which to start reading.	`0`
`length`	`int`	The amount of bytes that should be read.	`1024 * 1024 * 16`
`strip_timestamp`	`bool`	Whether to strip timestamps in logs or not	`False`

Returns:

Type	Description
`str`	The logs as a string.

Raises:

Type	Description
`DoesNotExistException`	If the artifact does not exist in the artifact store.

Source code in src/zenml/logging/step_logging.py

def fetch_logs(
    zen_store: "BaseZenStore",
    artifact_store_id: Union[str, UUID],
    logs_uri: str,
    offset: int = 0,
    length: int = 1024 * 1024 * 16,  # Default to 16MiB of data
    strip_timestamp: bool = False,
) -> str:
    """Fetches the logs from the artifact store.

    Args:
        zen_store: The store in which the artifact is stored.
        artifact_store_id: The ID of the artifact store.
        logs_uri: The URI of the artifact.
        offset: The offset from which to start reading.
        length: The amount of bytes that should be read.
        strip_timestamp: Whether to strip timestamps in logs or not

    Returns:
        The logs as a string.

    Raises:
        DoesNotExistException: If the artifact does not exist in the artifact
            store.
    """

    def _read_file(
        uri: str,
        offset: int = 0,
        length: Optional[int] = None,
        strip_timestamp: bool = False,
    ) -> str:
        file_content = str(
            _load_file_from_artifact_store(
                uri,
                artifact_store=artifact_store,
                mode="rb",
                offset=offset,
                length=length,
            ).decode()
        )
        if strip_timestamp:
            file_content = _strip_timestamp_from_multiline_string(file_content)
        return file_content

    artifact_store = _load_artifact_store(artifact_store_id, zen_store)
    try:
        if not artifact_store.isdir(logs_uri):
            return _read_file(logs_uri, offset, length, strip_timestamp)
        else:
            files = artifact_store.listdir(logs_uri)
            if len(files) == 1:
                return _read_file(
                    os.path.join(logs_uri, str(files[0])),
                    offset,
                    length,
                    strip_timestamp,
                )
            else:
                is_negative_offset = offset < 0
                files.sort(reverse=is_negative_offset)

                # search for the first file we need to read
                latest_file_id = 0
                for i, file in enumerate(files):
                    file_size: int = artifact_store.size(
                        os.path.join(logs_uri, str(file))
                    )  # type: ignore[assignment]

                    if is_negative_offset:
                        if file_size >= -offset:
                            latest_file_id = -(i + 1)
                            break
                        else:
                            offset += file_size
                    else:
                        if file_size > offset:
                            latest_file_id = i
                            break
                        else:
                            offset -= file_size

                # read the files according to pre-filtering
                files.sort()
                ret = []
                for file in files[latest_file_id:]:
                    ret.append(
                        _read_file(
                            os.path.join(logs_uri, str(file)),
                            offset,
                            length,
                            strip_timestamp,
                        )
                    )
                    offset = 0
                    length -= len(ret[-1])
                    if length <= 0:
                        # stop further reading, if the whole length is already read
                        break

                if not ret:
                    raise DoesNotExistException(
                        f"Folder '{logs_uri}' is empty in artifact store "
                        f"'{artifact_store.name}'."
                    )
                return "".join(ret)
    finally:
        artifact_store.cleanup()

`prepare_logs_uri(artifact_store: BaseArtifactStore, step_name: Optional[str] = None, log_key: Optional[str] = None) -> str`

Generates and prepares a URI for the log file or folder for a step.

Parameters:

Name	Type	Description	Default
`artifact_store`	`BaseArtifactStore`	The artifact store on which the artifact will be stored.	required
`step_name`	`Optional[str]`	Name of the step. Skipped for global pipeline run logs.	`None`
`log_key`	`Optional[str]`	The unique identification key of the log file.	`None`

Returns:

Type	Description
`str`	The URI of the log storage (file or folder).

Source code in src/zenml/logging/step_logging.py

def prepare_logs_uri(
    artifact_store: "BaseArtifactStore",
    step_name: Optional[str] = None,
    log_key: Optional[str] = None,
) -> str:
    """Generates and prepares a URI for the log file or folder for a step.

    Args:
        artifact_store: The artifact store on which the artifact will be stored.
        step_name: Name of the step. Skipped for global pipeline run logs.
        log_key: The unique identification key of the log file.

    Returns:
        The URI of the log storage (file or folder).
    """
    if log_key is None:
        log_key = str(uuid4())

    subfolder = step_name or PIPELINE_RUN_LOGS_FOLDER
    logs_base_uri = os.path.join(artifact_store.path, subfolder, "logs")

    # Create the dir
    if not artifact_store.exists(logs_base_uri):
        artifact_store.makedirs(logs_base_uri)

    # Delete the file if it already exists
    if artifact_store.config.IS_IMMUTABLE_FILESYSTEM:
        logs_uri = os.path.join(logs_base_uri, log_key)
        if artifact_store.exists(logs_uri):
            logger.warning(
                f"Logs directory {logs_uri} already exists! Removing old log directory..."
            )
            artifact_store.rmtree(logs_uri)

        artifact_store.makedirs(logs_uri)
    else:
        logs_uri = os.path.join(logs_base_uri, f"{log_key}{LOGS_EXTENSION}")
        if artifact_store.exists(logs_uri):
            logger.warning(
                f"Logs file {logs_uri} already exists! Removing old log file..."
            )
            artifact_store.remove(logs_uri)

    return sanitize_remote_path(logs_uri)

`remove_ansi_escape_codes(text: str) -> str`

Auxiliary function to remove ANSI escape codes from a given string.

Parameters:

Name	Type	Description	Default
`text`	`str`	the input string	required

Returns:

Type	Description
`str`	the version of the input string where the escape codes are removed.

Source code in src/zenml/logging/step_logging.py

def remove_ansi_escape_codes(text: str) -> str:
    """Auxiliary function to remove ANSI escape codes from a given string.

    Args:
        text: the input string

    Returns:
        the version of the input string where the escape codes are removed.
    """
    return ansi_escape.sub("", text)

`setup_orchestrator_logging(run_id: UUID, deployment: PipelineDeploymentResponse, logs_response: Optional[LogsResponse] = None) -> Any`

Set up logging for an orchestrator environment.

This function can be reused by different orchestrators to set up consistent logging behavior.

Parameters:

Name	Type	Description	Default
`run_id`	`UUID`	The pipeline run ID.	required
`deployment`	`PipelineDeploymentResponse`	The deployment of the pipeline run.	required
`logs_response`	`Optional[LogsResponse]`	The logs response to continue from.	`None`

Returns:

Type	Description
`Any`	The logs context (PipelineLogsStorageContext)

Source code in src/zenml/logging/step_logging.py

def setup_orchestrator_logging(
    run_id: UUID,
    deployment: "PipelineDeploymentResponse",
    logs_response: Optional[LogsResponse] = None,
) -> Any:
    """Set up logging for an orchestrator environment.

    This function can be reused by different orchestrators to set up
    consistent logging behavior.

    Args:
        run_id: The pipeline run ID.
        deployment: The deployment of the pipeline run.
        logs_response: The logs response to continue from.

    Returns:
        The logs context (PipelineLogsStorageContext)
    """
    try:
        logging_enabled = True

        if handle_bool_env_var(ENV_ZENML_DISABLE_PIPELINE_LOGS_STORAGE, False):
            logging_enabled = False
        else:
            if (
                deployment.pipeline_configuration.enable_pipeline_logs
                is not None
            ):
                logging_enabled = (
                    deployment.pipeline_configuration.enable_pipeline_logs
                )

        if not logging_enabled:
            return nullcontext()

        # Fetch the active stack
        client = Client()
        active_stack = client.active_stack

        if logs_response:
            logs_uri = logs_response.uri
        else:
            logs_uri = prepare_logs_uri(
                artifact_store=active_stack.artifact_store,
            )
            logs_model = LogsRequest(
                uri=logs_uri,
                source="orchestrator",
                artifact_store_id=active_stack.artifact_store.id,
            )

            # Add orchestrator logs to the pipeline run
            try:
                run_update = PipelineRunUpdate(add_logs=[logs_model])
                client.zen_store.update_run(
                    run_id=run_id, run_update=run_update
                )
            except Exception as e:
                logger.error(
                    f"Failed to add orchestrator logs to the run {run_id}: {e}"
                )
                raise e

        return PipelineLogsStorageContext(
            logs_uri=logs_uri,
            artifact_store=active_stack.artifact_store,
            prepend_step_name=False,
        )
    except Exception as e:
        logger.error(
            f"Failed to setup orchestrator logging for run {run_id}: {e}"
        )
        return nullcontext()