Skip to content

Io

zenml.io special

The io module handles file operations for the ZenML package.

It offers a standard interface for reading, writing and manipulating files and directories. It is heavily influenced and inspired by the io module of tfx.

fileio

Functionality for reading, writing and managing files.

convert_to_str(path)

Converts a "PathType" to a str using UTF-8.

Parameters:

Name Type Description Default
path PathType

The path to convert.

required

Returns:

Type Description
str

The path as a string.

Source code in zenml/io/fileio.py
def convert_to_str(path: "PathType") -> str:
    """Converts a "PathType" to a str using UTF-8.

    Args:
        path: The path to convert.

    Returns:
        The path as a string.
    """
    if isinstance(path, str):
        return path
    else:
        return path.decode("utf-8")

copy(src, dst, overwrite=False)

Copy a file from the source to the destination.

Parameters:

Name Type Description Default
src PathType

The path of the file to copy.

required
dst PathType

The path to copy the source file to.

required
overwrite bool

Whether to overwrite the destination file if it exists.

False

Exceptions:

Type Description
FileExistsError

If a file already exists at the destination and overwrite is not set to True.

Source code in zenml/io/fileio.py
def copy(src: "PathType", dst: "PathType", overwrite: bool = False) -> None:
    """Copy a file from the source to the destination.

    Args:
        src: The path of the file to copy.
        dst: The path to copy the source file to.
        overwrite: Whether to overwrite the destination file if it exists.

    Raises:
        FileExistsError: If a file already exists at the destination and
            `overwrite` is not set to `True`.
    """
    src_fs = _get_filesystem(src)
    dst_fs = _get_filesystem(dst)
    if src_fs is dst_fs:
        src_fs.copyfile(src, dst, overwrite=overwrite)
    else:
        if not overwrite and exists(dst):
            raise FileExistsError(
                f"Destination file '{convert_to_str(dst)}' already exists "
                f"and `overwrite` is false."
            )
        contents = open(src, mode="rb").read()
        open(dst, mode="wb").write(contents)

create_dir_if_not_exists(dir_path)

Creates directory if it does not exist.

Parameters:

Name Type Description Default
dir_path str

Local path in filesystem.

required
Source code in zenml/io/fileio.py
def create_dir_if_not_exists(dir_path: str) -> None:
    """Creates directory if it does not exist.

    Args:
        dir_path: Local path in filesystem.
    """
    if not isdir(dir_path):
        mkdir(dir_path)

create_dir_recursive_if_not_exists(dir_path)

Creates directory recursively if it does not exist.

Parameters:

Name Type Description Default
dir_path str

Local path in filesystem.

required
Source code in zenml/io/fileio.py
def create_dir_recursive_if_not_exists(dir_path: str) -> None:
    """Creates directory recursively if it does not exist.

    Args:
        dir_path: Local path in filesystem.
    """
    if not isdir(dir_path):
        makedirs(dir_path)

create_file_if_not_exists(file_path, file_contents='{}')

Creates file if it does not exist.

Parameters:

Name Type Description Default
file_path str

Local path in filesystem.

required
file_contents str

Contents of file.

'{}'
Source code in zenml/io/fileio.py
def create_file_if_not_exists(
    file_path: str, file_contents: str = "{}"
) -> None:
    """Creates file if it does not exist.

    Args:
        file_path: Local path in filesystem.
        file_contents: Contents of file.
    """
    full_path = Path(file_path)
    if not exists(file_path):
        create_dir_recursive_if_not_exists(str(full_path.parent))
        with open(str(full_path), "w") as f:
            f.write(file_contents)

exists(path)

Check whether a given path exists.

Parameters:

Name Type Description Default
path PathType

The path to check.

required

Returns:

Type Description
bool

True if the given path exists, False otherwise.

Source code in zenml/io/fileio.py
def exists(path: "PathType") -> bool:
    """Check whether a given path exists.

    Args:
        path: The path to check.

    Returns:
        `True` if the given path exists, `False` otherwise.
    """
    return _get_filesystem(path).exists(path)

find_files(dir_path, pattern)

Find files in a directory that match pattern.

Parameters:

Name Type Description Default
dir_path PathType

The path to directory.

required
pattern str

pattern like *.png.

required

Yields:

Type Description
Iterable[str]

All matching filenames in the directory.

Source code in zenml/io/fileio.py
def find_files(dir_path: "PathType", pattern: str) -> Iterable[str]:
    """Find files in a directory that match pattern.

    Args:
        dir_path: The path to directory.
        pattern: pattern like *.png.

    Yields:
         All matching filenames in the directory.
    """
    for root, _, files in walk(dir_path):
        for basename in files:
            if fnmatch.fnmatch(convert_to_str(basename), pattern):
                filename = os.path.join(
                    convert_to_str(root), convert_to_str(basename)
                )
                yield filename

get_grandparent(dir_path)

Get grandparent of dir.

Parameters:

Name Type Description Default
dir_path str

The path to directory.

required

Returns:

Type Description
str

The input paths parents parent.

Source code in zenml/io/fileio.py
def get_grandparent(dir_path: str) -> str:
    """Get grandparent of dir.

    Args:
        dir_path: The path to directory.

    Returns:
        The input paths parents parent.
    """
    return Path(dir_path).parent.parent.stem

get_parent(dir_path)

Get parent of dir.

Parameters:

Name Type Description Default
dir_path str

The path to directory.

required

Returns:

Type Description
str

Parent (stem) of the dir as a string.

Source code in zenml/io/fileio.py
def get_parent(dir_path: str) -> str:
    """Get parent of dir.

    Args:
        dir_path: The path to directory.

    Returns:
        Parent (stem) of the dir as a string.
    """
    return Path(dir_path).parent.stem

glob(pattern)

Find all files matching the given pattern.

Parameters:

Name Type Description Default
pattern PathType

The pattern to match.

required

Returns:

Type Description
List[PathType]

A list of paths matching the pattern.

Source code in zenml/io/fileio.py
def glob(pattern: "PathType") -> List["PathType"]:
    """Find all files matching the given pattern.

    Args:
        pattern: The pattern to match.

    Returns:
        A list of paths matching the pattern.
    """
    return _get_filesystem(pattern).glob(pattern)

is_remote(path)

Returns True if path exists remotely.

Parameters:

Name Type Description Default
path str

Any path as a string.

required

Returns:

Type Description
bool

True if remote path, else False.

Source code in zenml/io/fileio.py
def is_remote(path: str) -> bool:
    """Returns True if path exists remotely.

    Args:
        path: Any path as a string.

    Returns:
        True if remote path, else False.
    """
    return any(path.startswith(prefix) for prefix in REMOTE_FS_PREFIX)

is_root(path)

Returns true if path has no parent in local filesystem.

Parameters:

Name Type Description Default
path str

Local path in filesystem.

required

Returns:

Type Description
bool

True if root, else False.

Source code in zenml/io/fileio.py
def is_root(path: str) -> bool:
    """Returns true if path has no parent in local filesystem.

    Args:
        path: Local path in filesystem.

    Returns:
        True if root, else False.
    """
    return Path(path).parent == Path(path)

isdir(path)

Check whether the given path is a directory.

Parameters:

Name Type Description Default
path PathType

The path to check.

required

Returns:

Type Description
bool

True if the given path is a directory, False otherwise.

Source code in zenml/io/fileio.py
def isdir(path: "PathType") -> bool:
    """Check whether the given path is a directory.

    Args:
        path: The path to check.

    Returns:
        `True` if the given path is a directory, `False` otherwise.
    """
    return _get_filesystem(path).isdir(path)

listdir(path, only_file_names=True)

Lists all files in a directory.

Parameters:

Name Type Description Default
path str

The path to the directory.

required
only_file_names bool

If True, only return the file names, not the full path.

True

Returns:

Type Description
List[str]

A list of files in the directory.

Source code in zenml/io/fileio.py
def listdir(path: str, only_file_names: bool = True) -> List[str]:
    """Lists all files in a directory.

    Args:
        path: The path to the directory.
        only_file_names: If True, only return the file names, not the full path.

    Returns:
        A list of files in the directory.
    """
    try:
        return [
            os.path.join(path, convert_to_str(f))
            if not only_file_names
            else convert_to_str(f)
            for f in _get_filesystem(path).listdir(path)
        ]
    except IOError:
        logger.debug(f"Dir {path} not found.")
        return []

makedirs(path)

Make a directory at the given path, recursively creating parents.

Parameters:

Name Type Description Default
path PathType

The path to the directory.

required
Source code in zenml/io/fileio.py
def makedirs(path: "PathType") -> None:
    """Make a directory at the given path, recursively creating parents.

    Args:
        path: The path to the directory.
    """
    _get_filesystem(path).makedirs(path)

mkdir(path)

Make a directory at the given path; parent directory must exist.

Parameters:

Name Type Description Default
path PathType

The path to the directory.

required
Source code in zenml/io/fileio.py
def mkdir(path: "PathType") -> None:
    """Make a directory at the given path; parent directory must exist.

    Args:
        path: The path to the directory.
    """
    _get_filesystem(path).mkdir(path)

move(source, destination, overwrite=False)

Moves dir or file from source to destination. Can be used to rename.

Parameters:

Name Type Description Default
source str

Local path to copy from.

required
destination str

Local path to copy to.

required
overwrite bool

boolean, if false, then throws an error before overwrite.

False
Source code in zenml/io/fileio.py
def move(source: str, destination: str, overwrite: bool = False) -> None:
    """Moves dir or file from source to destination. Can be used to rename.

    Args:
        source: Local path to copy from.
        destination: Local path to copy to.
        overwrite: boolean, if false, then throws an error before overwrite.
    """
    rename(source, destination, overwrite)

open(path, mode='r')

Opens a file.

Parameters:

Name Type Description Default
path PathType

The path to the file.

required
mode str

The mode to open the file in.

'r'

Returns:

Type Description
Any

The opened file.

Source code in zenml/io/fileio.py
def open(path: "PathType", mode: str = "r") -> Any:  # noqa
    """Opens a file.

    Args:
        path: The path to the file.
        mode: The mode to open the file in.

    Returns:
        The opened file.
    """
    return _get_filesystem(path).open(path, mode=mode)

remove(path)

Remove the file at the given path. Dangerous operation.

Parameters:

Name Type Description Default
path PathType

The path to the file to remove.

required

Exceptions:

Type Description
FileNotFoundError

If the file does not exist.

Source code in zenml/io/fileio.py
def remove(path: "PathType") -> None:
    """Remove the file at the given path. Dangerous operation.

    Args:
        path: The path to the file to remove.

    Raises:
        FileNotFoundError: If the file does not exist.
    """
    if not exists(path):
        raise FileNotFoundError(f"{convert_to_str(path)} does not exist!")
    _get_filesystem(path).remove(path)

rename(src, dst, overwrite=False)

Rename a file.

Parameters:

Name Type Description Default
src PathType

The path of the file to rename.

required
dst PathType

The path to rename the source file to.

required
overwrite bool

If a file already exists at the destination, this method will overwrite it if overwrite=True and raise a FileExistsError otherwise.

False

Exceptions:

Type Description
NotImplementedError

If the source and destination file systems are not the same.

Source code in zenml/io/fileio.py
def rename(src: "PathType", dst: "PathType", overwrite: bool = False) -> None:
    """Rename a file.

    Args:
        src: The path of the file to rename.
        dst: The path to rename the source file to.
        overwrite: If a file already exists at the destination, this
            method will overwrite it if overwrite=`True` and
            raise a FileExistsError otherwise.

    Raises:
        NotImplementedError: If the source and destination file systems are not
            the same.
    """
    src_fs = _get_filesystem(src)
    dst_fs = _get_filesystem(dst)
    if src_fs is dst_fs:
        src_fs.rename(src, dst, overwrite=overwrite)
    else:
        raise NotImplementedError(
            f"Renaming from {convert_to_str(src)} to {convert_to_str(dst)} "
            f"using different file systems plugins is currently not supported."
        )

resolve_relative_path(path)

Takes relative path and resolves it absolutely.

Parameters:

Name Type Description Default
path str

Local path in filesystem.

required

Returns:

Type Description
str

Resolved path.

Source code in zenml/io/fileio.py
def resolve_relative_path(path: str) -> str:
    """Takes relative path and resolves it absolutely.

    Args:
        path: Local path in filesystem.

    Returns:
        Resolved path.
    """
    if is_remote(path):
        return path
    return str(Path(path).resolve())

rmtree(dir_path)

Deletes a directory recursively. Dangerous operation.

Parameters:

Name Type Description Default
dir_path str

The path to the directory to delete.

required

Exceptions:

Type Description
TypeError

If the path is not pointing to a directory.

Source code in zenml/io/fileio.py
def rmtree(dir_path: str) -> None:
    """Deletes a directory recursively. Dangerous operation.

    Args:
        dir_path: The path to the directory to delete.

    Raises:
        TypeError: If the path is not pointing to a directory.
    """
    if not isdir(dir_path):
        raise TypeError(f"Path '{dir_path}' is not a directory.")

    _get_filesystem(dir_path).rmtree(dir_path)

stat(path)

Get the stat descriptor for a given file path.

Parameters:

Name Type Description Default
path PathType

The path to the file.

required

Returns:

Type Description
Any

The stat descriptor.

Source code in zenml/io/fileio.py
def stat(path: "PathType") -> Any:
    """Get the stat descriptor for a given file path.

    Args:
        path: The path to the file.

    Returns:
        The stat descriptor.
    """
    return _get_filesystem(path).stat(path)

walk(top, topdown=True, onerror=None)

Return an iterator that walks the contents of the given directory.

Parameters:

Name Type Description Default
top PathType

The path of directory to walk.

required
topdown bool

Whether to walk directories topdown or bottom-up.

True
onerror Optional[Callable[..., NoneType]]

Callable that gets called if an error occurs.

None

Returns:

Type Description
Iterable[Tuple[PathType, List[PathType], List[PathType]]]

An Iterable of Tuples, each of which contain the path of the current directory path, a list of directories inside the current directory and a list of files inside the current directory.

Source code in zenml/io/fileio.py
def walk(
    top: "PathType",
    topdown: bool = True,
    onerror: Optional[Callable[..., None]] = None,
) -> Iterable[Tuple["PathType", List["PathType"], List["PathType"]]]:
    """Return an iterator that walks the contents of the given directory.

    Args:
        top: The path of directory to walk.
        topdown: Whether to walk directories topdown or bottom-up.
        onerror: Callable that gets called if an error occurs.

    Returns:
        An Iterable of Tuples, each of which contain the path of the current
        directory path, a list of directories inside the current directory
        and a list of files inside the current directory.
    """
    return _get_filesystem(top).walk(top, topdown=topdown, onerror=onerror)

filesystem

Defines the filesystem abstraction of ZenML.

BaseFilesystem (ABC)

Abstract Filesystem base class.

Design inspired by the Filesystem abstraction in TFX: https://github.com/tensorflow/tfx/blob/master/tfx/dsl/io/filesystem.py

Source code in zenml/io/filesystem.py
class BaseFilesystem(ABC):
    """Abstract Filesystem base class.

    Design inspired by the `Filesystem` abstraction in TFX:
    https://github.com/tensorflow/tfx/blob/master/tfx/dsl/io/filesystem.py
    """

    SUPPORTED_SCHEMES: ClassVar[Set[str]]

    @staticmethod
    @abstractmethod
    def open(name: PathType, mode: str = "r") -> Any:
        """Opens a file.

        Args:
            name: The path to the file.
            mode: The mode to open the file in.

        Returns:
            The opened file.
        """

    @staticmethod
    @abstractmethod
    def copyfile(src: PathType, dst: PathType, overwrite: bool = False) -> None:
        """Copy a file from the source to the destination.

        Args:
            src: The path of the file to copy.
            dst: The path to copy the source file to.
            overwrite: Whether to overwrite the destination file if it exists.

        Raises:
            FileExistsError: If a file already exists at the destination and
                `overwrite` is not set to `True`.
        """

    @staticmethod
    @abstractmethod
    def exists(path: PathType) -> bool:
        """Check whether a given path exists.

        Args:
            path: The path to check.

        Returns:
            `True` if the given path exists, `False` otherwise.
        """

    @staticmethod
    @abstractmethod
    def glob(pattern: PathType) -> List[PathType]:
        """Find all files matching the given pattern.

        Args:
            pattern: The pattern to match.

        Returns:
            A list of paths matching the pattern.
        """

    @staticmethod
    @abstractmethod
    def isdir(path: PathType) -> bool:
        """Check whether the given path is a directory.

        Args:
            path: The path to check.

        Returns:
            `True` if the given path is a directory, `False` otherwise.
        """

    @staticmethod
    @abstractmethod
    def listdir(path: PathType) -> List[PathType]:
        """Lists all files in a directory.

        Args:
            path: The path to the directory.

        Returns:
            A list of files in the directory.
        """

    @staticmethod
    @abstractmethod
    def makedirs(path: PathType) -> None:
        """Make a directory at the given path, recursively creating parents.

        Args:
            path: Path to the directory.
        """

    @staticmethod
    @abstractmethod
    def mkdir(path: PathType) -> None:
        """Make a directory at the given path; parent directory must exist.

        Args:
            path: Path to the directory.
        """

    @staticmethod
    @abstractmethod
    def remove(path: PathType) -> None:
        """Remove the file at the given path. Dangerous operation.

        Args:
            path: The path to the file to remove.

        Raises:
            FileNotFoundError: If the file does not exist.
        """

    @staticmethod
    @abstractmethod
    def rename(src: PathType, dst: PathType, overwrite: bool = False) -> None:
        """Rename a file.

        Args:
            src: The path of the file to rename.
            dst: The path to rename the source file to.
            overwrite: If a file already exists at the destination, this
                method will overwrite it if overwrite=`True` and
                raise a FileExistsError otherwise.

        Raises:
            FileExistsError: If a file already exists at the destination
                and overwrite is not set to `True`.
        """

    @staticmethod
    @abstractmethod
    def rmtree(path: PathType) -> None:
        """Deletes a directory recursively. Dangerous operation.

        Args:
            path: The path to the directory to delete.
        """

    @staticmethod
    @abstractmethod
    def stat(path: PathType) -> Any:
        """Get the stat descriptor for a given file path.

        Args:
            path: The path to the file.

        Returns:
            The stat descriptor.
        """

    @staticmethod
    @abstractmethod
    def walk(
        top: PathType,
        topdown: bool = True,
        onerror: Optional[Callable[..., None]] = None,
    ) -> Iterable[Tuple[PathType, List[PathType], List[PathType]]]:
        """Return an iterator that walks the contents of the given directory.

        Args:
            top: The path of directory to walk.
            topdown: Whether to walk directories topdown or bottom-up.
            onerror: Callable that gets called if an error occurs.

        Returns:
            An Iterable of Tuples, each of which contain the path of the current
            directory path, a list of directories inside the current directory
            and a list of files inside the current directory.
        """
copyfile(src, dst, overwrite=False) staticmethod

Copy a file from the source to the destination.

Parameters:

Name Type Description Default
src Union[bytes, str]

The path of the file to copy.

required
dst Union[bytes, str]

The path to copy the source file to.

required
overwrite bool

Whether to overwrite the destination file if it exists.

False

Exceptions:

Type Description
FileExistsError

If a file already exists at the destination and overwrite is not set to True.

Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def copyfile(src: PathType, dst: PathType, overwrite: bool = False) -> None:
    """Copy a file from the source to the destination.

    Args:
        src: The path of the file to copy.
        dst: The path to copy the source file to.
        overwrite: Whether to overwrite the destination file if it exists.

    Raises:
        FileExistsError: If a file already exists at the destination and
            `overwrite` is not set to `True`.
    """
exists(path) staticmethod

Check whether a given path exists.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to check.

required

Returns:

Type Description
bool

True if the given path exists, False otherwise.

Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def exists(path: PathType) -> bool:
    """Check whether a given path exists.

    Args:
        path: The path to check.

    Returns:
        `True` if the given path exists, `False` otherwise.
    """
glob(pattern) staticmethod

Find all files matching the given pattern.

Parameters:

Name Type Description Default
pattern Union[bytes, str]

The pattern to match.

required

Returns:

Type Description
List[Union[bytes, str]]

A list of paths matching the pattern.

Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def glob(pattern: PathType) -> List[PathType]:
    """Find all files matching the given pattern.

    Args:
        pattern: The pattern to match.

    Returns:
        A list of paths matching the pattern.
    """
isdir(path) staticmethod

Check whether the given path is a directory.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to check.

required

Returns:

Type Description
bool

True if the given path is a directory, False otherwise.

Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def isdir(path: PathType) -> bool:
    """Check whether the given path is a directory.

    Args:
        path: The path to check.

    Returns:
        `True` if the given path is a directory, `False` otherwise.
    """
listdir(path) staticmethod

Lists all files in a directory.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to the directory.

required

Returns:

Type Description
List[Union[bytes, str]]

A list of files in the directory.

Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def listdir(path: PathType) -> List[PathType]:
    """Lists all files in a directory.

    Args:
        path: The path to the directory.

    Returns:
        A list of files in the directory.
    """
makedirs(path) staticmethod

Make a directory at the given path, recursively creating parents.

Parameters:

Name Type Description Default
path Union[bytes, str]

Path to the directory.

required
Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def makedirs(path: PathType) -> None:
    """Make a directory at the given path, recursively creating parents.

    Args:
        path: Path to the directory.
    """
mkdir(path) staticmethod

Make a directory at the given path; parent directory must exist.

Parameters:

Name Type Description Default
path Union[bytes, str]

Path to the directory.

required
Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def mkdir(path: PathType) -> None:
    """Make a directory at the given path; parent directory must exist.

    Args:
        path: Path to the directory.
    """
open(name, mode='r') staticmethod

Opens a file.

Parameters:

Name Type Description Default
name Union[bytes, str]

The path to the file.

required
mode str

The mode to open the file in.

'r'

Returns:

Type Description
Any

The opened file.

Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def open(name: PathType, mode: str = "r") -> Any:
    """Opens a file.

    Args:
        name: The path to the file.
        mode: The mode to open the file in.

    Returns:
        The opened file.
    """
remove(path) staticmethod

Remove the file at the given path. Dangerous operation.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to the file to remove.

required

Exceptions:

Type Description
FileNotFoundError

If the file does not exist.

Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def remove(path: PathType) -> None:
    """Remove the file at the given path. Dangerous operation.

    Args:
        path: The path to the file to remove.

    Raises:
        FileNotFoundError: If the file does not exist.
    """
rename(src, dst, overwrite=False) staticmethod

Rename a file.

Parameters:

Name Type Description Default
src Union[bytes, str]

The path of the file to rename.

required
dst Union[bytes, str]

The path to rename the source file to.

required
overwrite bool

If a file already exists at the destination, this method will overwrite it if overwrite=True and raise a FileExistsError otherwise.

False

Exceptions:

Type Description
FileExistsError

If a file already exists at the destination and overwrite is not set to True.

Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def rename(src: PathType, dst: PathType, overwrite: bool = False) -> None:
    """Rename a file.

    Args:
        src: The path of the file to rename.
        dst: The path to rename the source file to.
        overwrite: If a file already exists at the destination, this
            method will overwrite it if overwrite=`True` and
            raise a FileExistsError otherwise.

    Raises:
        FileExistsError: If a file already exists at the destination
            and overwrite is not set to `True`.
    """
rmtree(path) staticmethod

Deletes a directory recursively. Dangerous operation.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to the directory to delete.

required
Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def rmtree(path: PathType) -> None:
    """Deletes a directory recursively. Dangerous operation.

    Args:
        path: The path to the directory to delete.
    """
stat(path) staticmethod

Get the stat descriptor for a given file path.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to the file.

required

Returns:

Type Description
Any

The stat descriptor.

Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def stat(path: PathType) -> Any:
    """Get the stat descriptor for a given file path.

    Args:
        path: The path to the file.

    Returns:
        The stat descriptor.
    """
walk(top, topdown=True, onerror=None) staticmethod

Return an iterator that walks the contents of the given directory.

Parameters:

Name Type Description Default
top Union[bytes, str]

The path of directory to walk.

required
topdown bool

Whether to walk directories topdown or bottom-up.

True
onerror Optional[Callable[..., NoneType]]

Callable that gets called if an error occurs.

None

Returns:

Type Description
Iterable[Tuple[Union[bytes, str], List[Union[bytes, str]], List[Union[bytes, str]]]]

An Iterable of Tuples, each of which contain the path of the current directory path, a list of directories inside the current directory and a list of files inside the current directory.

Source code in zenml/io/filesystem.py
@staticmethod
@abstractmethod
def walk(
    top: PathType,
    topdown: bool = True,
    onerror: Optional[Callable[..., None]] = None,
) -> Iterable[Tuple[PathType, List[PathType], List[PathType]]]:
    """Return an iterator that walks the contents of the given directory.

    Args:
        top: The path of directory to walk.
        topdown: Whether to walk directories topdown or bottom-up.
        onerror: Callable that gets called if an error occurs.

    Returns:
        An Iterable of Tuples, each of which contain the path of the current
        directory path, a list of directories inside the current directory
        and a list of files inside the current directory.
    """

filesystem_registry

Filesystem registry managing filesystem plugins.

FileIORegistry

Registry of pluggable filesystem implementations.

Source code in zenml/io/filesystem_registry.py
class FileIORegistry:
    """Registry of pluggable filesystem implementations."""

    def __init__(self) -> None:
        """Initialize the registry."""
        self._filesystems: Dict["PathType", Type["BaseFilesystem"]] = {}
        self._registration_lock = Lock()

    def register(self, filesystem_cls: Type["BaseFilesystem"]) -> None:
        """Register a filesystem implementation.

        Args:
            filesystem_cls: Subclass of `zenml.io.filesystem.Filesystem`.
        """
        with self._registration_lock:
            for scheme in filesystem_cls.SUPPORTED_SCHEMES:
                current_preferred = self._filesystems.get(scheme)
                if current_preferred is not None:
                    logger.debug(
                        "Overwriting previously registered filesystem for "
                        "scheme `%s`. Old class: %s, new class: %s",
                        scheme,
                        current_preferred.__name__,
                        filesystem_cls.__name__,
                    )

                self._filesystems[scheme] = filesystem_cls

    def get_filesystem_for_scheme(
        self, scheme: "PathType"
    ) -> Type["BaseFilesystem"]:
        """Get filesystem plugin for given scheme string.

        Args:
            scheme: The scheme to get the filesystem for.

        Returns:
            The filesystem plugin for the given scheme.

        Raises:
            ValueError: If no filesystem plugin is registered for the given
                scheme.
        """
        if isinstance(scheme, bytes):
            scheme = scheme.decode("utf-8")
        if scheme not in self._filesystems:
            raise ValueError(
                f"No file systems were found for the scheme: "
                f"{scheme}. Please make sure that you are using "
                f"the right path and the all the necessary "
                f"integrations are properly installed."
            )
        return self._filesystems[scheme]

    def get_filesystem_for_path(
        self, path: "PathType"
    ) -> Type["BaseFilesystem"]:
        """Get filesystem plugin for given path.

        Args:
            path: The path to get the filesystem for.

        Returns:
            The filesystem plugin for the given path.

        Raises:
            ValueError: If no filesystem plugin is registered for the given
                path.
        """
        # Assume local path by default, but extract filesystem prefix if available.
        if isinstance(path, str):
            path_bytes = path.encode("utf-8")
        elif isinstance(path, bytes):
            path_bytes = path
        else:
            raise ValueError("Invalid path type: %r." % path)
        result = re.match(b"^([a-z0-9]+://)", path_bytes)
        if result:
            scheme = result.group(1).decode("utf-8")
        else:
            scheme = ""
        return self.get_filesystem_for_scheme(scheme)
__init__(self) special

Initialize the registry.

Source code in zenml/io/filesystem_registry.py
def __init__(self) -> None:
    """Initialize the registry."""
    self._filesystems: Dict["PathType", Type["BaseFilesystem"]] = {}
    self._registration_lock = Lock()
get_filesystem_for_path(self, path)

Get filesystem plugin for given path.

Parameters:

Name Type Description Default
path PathType

The path to get the filesystem for.

required

Returns:

Type Description
Type[BaseFilesystem]

The filesystem plugin for the given path.

Exceptions:

Type Description
ValueError

If no filesystem plugin is registered for the given path.

Source code in zenml/io/filesystem_registry.py
def get_filesystem_for_path(
    self, path: "PathType"
) -> Type["BaseFilesystem"]:
    """Get filesystem plugin for given path.

    Args:
        path: The path to get the filesystem for.

    Returns:
        The filesystem plugin for the given path.

    Raises:
        ValueError: If no filesystem plugin is registered for the given
            path.
    """
    # Assume local path by default, but extract filesystem prefix if available.
    if isinstance(path, str):
        path_bytes = path.encode("utf-8")
    elif isinstance(path, bytes):
        path_bytes = path
    else:
        raise ValueError("Invalid path type: %r." % path)
    result = re.match(b"^([a-z0-9]+://)", path_bytes)
    if result:
        scheme = result.group(1).decode("utf-8")
    else:
        scheme = ""
    return self.get_filesystem_for_scheme(scheme)
get_filesystem_for_scheme(self, scheme)

Get filesystem plugin for given scheme string.

Parameters:

Name Type Description Default
scheme PathType

The scheme to get the filesystem for.

required

Returns:

Type Description
Type[BaseFilesystem]

The filesystem plugin for the given scheme.

Exceptions:

Type Description
ValueError

If no filesystem plugin is registered for the given scheme.

Source code in zenml/io/filesystem_registry.py
def get_filesystem_for_scheme(
    self, scheme: "PathType"
) -> Type["BaseFilesystem"]:
    """Get filesystem plugin for given scheme string.

    Args:
        scheme: The scheme to get the filesystem for.

    Returns:
        The filesystem plugin for the given scheme.

    Raises:
        ValueError: If no filesystem plugin is registered for the given
            scheme.
    """
    if isinstance(scheme, bytes):
        scheme = scheme.decode("utf-8")
    if scheme not in self._filesystems:
        raise ValueError(
            f"No file systems were found for the scheme: "
            f"{scheme}. Please make sure that you are using "
            f"the right path and the all the necessary "
            f"integrations are properly installed."
        )
    return self._filesystems[scheme]
register(self, filesystem_cls)

Register a filesystem implementation.

Parameters:

Name Type Description Default
filesystem_cls Type[BaseFilesystem]

Subclass of zenml.io.filesystem.Filesystem.

required
Source code in zenml/io/filesystem_registry.py
def register(self, filesystem_cls: Type["BaseFilesystem"]) -> None:
    """Register a filesystem implementation.

    Args:
        filesystem_cls: Subclass of `zenml.io.filesystem.Filesystem`.
    """
    with self._registration_lock:
        for scheme in filesystem_cls.SUPPORTED_SCHEMES:
            current_preferred = self._filesystems.get(scheme)
            if current_preferred is not None:
                logger.debug(
                    "Overwriting previously registered filesystem for "
                    "scheme `%s`. Old class: %s, new class: %s",
                    scheme,
                    current_preferred.__name__,
                    filesystem_cls.__name__,
                )

            self._filesystems[scheme] = filesystem_cls

local_filesystem

Local filesystem using Python's built-in modules (os, shutil, glob).

LocalFilesystem (BaseFilesystem)

Filesystem that uses local file operations.

Implementation inspired by TFX: https://github.com/tensorflow/tfx/blob/master/tfx/dsl/io/plugins/local.py

Source code in zenml/io/local_filesystem.py
class LocalFilesystem(BaseFilesystem):
    """Filesystem that uses local file operations.

    Implementation inspired by TFX:
    https://github.com/tensorflow/tfx/blob/master/tfx/dsl/io/plugins/local.py
    """

    SUPPORTED_SCHEMES: ClassVar[Set[str]] = {""}

    @staticmethod
    def open(name: PathType, mode: str = "r") -> Any:
        """Open a file at the given path.

        Args:
            name: The path to the file.
            mode: The mode to open the file.

        Returns:
            Any: The file object.
        """
        return open(name, mode=mode)

    @staticmethod
    def copyfile(src: PathType, dst: PathType, overwrite: bool = False) -> None:
        """Copy a file from the source to the destination.

        Args:
            src: The source path.
            dst: The destination path.
            overwrite: Whether to overwrite the destination file if it exists.

        Raises:
            FileExistsError: If the destination file exists and overwrite is
                False.
        """
        if not overwrite and os.path.exists(dst):
            raise FileExistsError(
                f"Destination file {str(dst)} already exists and argument "
                f"`overwrite` is false."
            )
        shutil.copyfile(src, dst)

    @staticmethod
    def exists(path: PathType) -> bool:
        """Returns `True` if the given path exists.

        Args:
            path: The path to check.

        Returns:
            bool: Whether the path exists.
        """
        return os.path.exists(path)

    @staticmethod
    def glob(pattern: PathType) -> List[PathType]:
        """Return the paths that match a glob pattern.

        Args:
            pattern: The glob pattern.

        Returns:
            List[PathType]: The paths that match the glob pattern.
        """
        return glob.glob(pattern)  # type: ignore[type-var]

    @staticmethod
    def isdir(path: PathType) -> bool:
        """Returns whether the given path points to a directory.

        Args:
            path: The path to check.

        Returns:
            bool: Whether the path points to a directory.
        """
        return os.path.isdir(path)

    @staticmethod
    def listdir(path: PathType) -> List[PathType]:
        """Returns a list of files under a given directory in the filesystem.

        Args:
            path: The path to the directory.

        Returns:
            List[PathType]: The list of files under the given directory.
        """
        return os.listdir(path)  # type:ignore[return-value]

    @staticmethod
    def makedirs(path: PathType) -> None:
        """Make a directory at the given path, recursively creating parents.

        Args:
            path: The path to the directory.
        """
        os.makedirs(path, exist_ok=True)

    @staticmethod
    def mkdir(path: PathType) -> None:
        """Make a directory at the given path; parent directory must exist.

        Args:
            path: The path to the directory.
        """
        os.mkdir(path)

    @staticmethod
    def remove(path: PathType) -> None:
        """Remove the file at the given path. Dangerous operation.

        Args:
            path: The path to the file.
        """
        os.remove(path)

    @staticmethod
    def rename(src: PathType, dst: PathType, overwrite: bool = False) -> None:
        """Rename source file to destination file.

        Args:
            src: The path of the file to rename.
            dst: The path to rename the source file to.
            overwrite: If a file already exists at the destination, this
                method will overwrite it if overwrite=`True`

        Raises:
            FileExistsError: If the destination file exists and overwrite is
                False.
        """
        if not overwrite and os.path.exists(dst):
            raise FileExistsError(
                f"Destination path {str(dst)} already exists and argument "
                f"`overwrite` is false."
            )
        os.rename(src, dst)

    @staticmethod
    def rmtree(path: PathType) -> None:
        """Deletes dir recursively. Dangerous operation.

        Args:
            path: The path to the directory.
        """
        shutil.rmtree(path)

    @staticmethod
    def stat(path: PathType) -> Any:
        """Return the stat descriptor for a given file path.

        Args:
            path: The path to the file.

        Returns:
            Any: The stat descriptor for the file.
        """
        return os.stat(path)

    @staticmethod
    def walk(
        top: PathType,
        topdown: bool = True,
        onerror: Optional[Callable[..., None]] = None,
    ) -> Iterable[Tuple[PathType, List[PathType], List[PathType]]]:
        """Return an iterator that walks the contents of the given directory.

        Args:
            top: Path of directory to walk.
            topdown: Whether to walk directories topdown or bottom-up.
            onerror: Callable that gets called if an error occurs.

        Yields:
            An Iterable of Tuples, each of which contain the path of the
            current directory path, a list of directories inside the
            current directory and a list of files inside the current
            directory.
        """
        yield from os.walk(  # type: ignore[type-var, misc]
            top, topdown=topdown, onerror=onerror
        )
copyfile(src, dst, overwrite=False) staticmethod

Copy a file from the source to the destination.

Parameters:

Name Type Description Default
src Union[bytes, str]

The source path.

required
dst Union[bytes, str]

The destination path.

required
overwrite bool

Whether to overwrite the destination file if it exists.

False

Exceptions:

Type Description
FileExistsError

If the destination file exists and overwrite is False.

Source code in zenml/io/local_filesystem.py
@staticmethod
def copyfile(src: PathType, dst: PathType, overwrite: bool = False) -> None:
    """Copy a file from the source to the destination.

    Args:
        src: The source path.
        dst: The destination path.
        overwrite: Whether to overwrite the destination file if it exists.

    Raises:
        FileExistsError: If the destination file exists and overwrite is
            False.
    """
    if not overwrite and os.path.exists(dst):
        raise FileExistsError(
            f"Destination file {str(dst)} already exists and argument "
            f"`overwrite` is false."
        )
    shutil.copyfile(src, dst)
exists(path) staticmethod

Returns True if the given path exists.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to check.

required

Returns:

Type Description
bool

Whether the path exists.

Source code in zenml/io/local_filesystem.py
@staticmethod
def exists(path: PathType) -> bool:
    """Returns `True` if the given path exists.

    Args:
        path: The path to check.

    Returns:
        bool: Whether the path exists.
    """
    return os.path.exists(path)
glob(pattern) staticmethod

Return the paths that match a glob pattern.

Parameters:

Name Type Description Default
pattern Union[bytes, str]

The glob pattern.

required

Returns:

Type Description
List[PathType]

The paths that match the glob pattern.

Source code in zenml/io/local_filesystem.py
@staticmethod
def glob(pattern: PathType) -> List[PathType]:
    """Return the paths that match a glob pattern.

    Args:
        pattern: The glob pattern.

    Returns:
        List[PathType]: The paths that match the glob pattern.
    """
    return glob.glob(pattern)  # type: ignore[type-var]
isdir(path) staticmethod

Returns whether the given path points to a directory.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to check.

required

Returns:

Type Description
bool

Whether the path points to a directory.

Source code in zenml/io/local_filesystem.py
@staticmethod
def isdir(path: PathType) -> bool:
    """Returns whether the given path points to a directory.

    Args:
        path: The path to check.

    Returns:
        bool: Whether the path points to a directory.
    """
    return os.path.isdir(path)
listdir(path) staticmethod

Returns a list of files under a given directory in the filesystem.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to the directory.

required

Returns:

Type Description
List[PathType]

The list of files under the given directory.

Source code in zenml/io/local_filesystem.py
@staticmethod
def listdir(path: PathType) -> List[PathType]:
    """Returns a list of files under a given directory in the filesystem.

    Args:
        path: The path to the directory.

    Returns:
        List[PathType]: The list of files under the given directory.
    """
    return os.listdir(path)  # type:ignore[return-value]
makedirs(path) staticmethod

Make a directory at the given path, recursively creating parents.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to the directory.

required
Source code in zenml/io/local_filesystem.py
@staticmethod
def makedirs(path: PathType) -> None:
    """Make a directory at the given path, recursively creating parents.

    Args:
        path: The path to the directory.
    """
    os.makedirs(path, exist_ok=True)
mkdir(path) staticmethod

Make a directory at the given path; parent directory must exist.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to the directory.

required
Source code in zenml/io/local_filesystem.py
@staticmethod
def mkdir(path: PathType) -> None:
    """Make a directory at the given path; parent directory must exist.

    Args:
        path: The path to the directory.
    """
    os.mkdir(path)
open(name, mode='r') staticmethod

Open a file at the given path.

Parameters:

Name Type Description Default
name Union[bytes, str]

The path to the file.

required
mode str

The mode to open the file.

'r'

Returns:

Type Description
Any

The file object.

Source code in zenml/io/local_filesystem.py
@staticmethod
def open(name: PathType, mode: str = "r") -> Any:
    """Open a file at the given path.

    Args:
        name: The path to the file.
        mode: The mode to open the file.

    Returns:
        Any: The file object.
    """
    return open(name, mode=mode)
remove(path) staticmethod

Remove the file at the given path. Dangerous operation.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to the file.

required
Source code in zenml/io/local_filesystem.py
@staticmethod
def remove(path: PathType) -> None:
    """Remove the file at the given path. Dangerous operation.

    Args:
        path: The path to the file.
    """
    os.remove(path)
rename(src, dst, overwrite=False) staticmethod

Rename source file to destination file.

Parameters:

Name Type Description Default
src Union[bytes, str]

The path of the file to rename.

required
dst Union[bytes, str]

The path to rename the source file to.

required
overwrite bool

If a file already exists at the destination, this method will overwrite it if overwrite=True

False

Exceptions:

Type Description
FileExistsError

If the destination file exists and overwrite is False.

Source code in zenml/io/local_filesystem.py
@staticmethod
def rename(src: PathType, dst: PathType, overwrite: bool = False) -> None:
    """Rename source file to destination file.

    Args:
        src: The path of the file to rename.
        dst: The path to rename the source file to.
        overwrite: If a file already exists at the destination, this
            method will overwrite it if overwrite=`True`

    Raises:
        FileExistsError: If the destination file exists and overwrite is
            False.
    """
    if not overwrite and os.path.exists(dst):
        raise FileExistsError(
            f"Destination path {str(dst)} already exists and argument "
            f"`overwrite` is false."
        )
    os.rename(src, dst)
rmtree(path) staticmethod

Deletes dir recursively. Dangerous operation.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to the directory.

required
Source code in zenml/io/local_filesystem.py
@staticmethod
def rmtree(path: PathType) -> None:
    """Deletes dir recursively. Dangerous operation.

    Args:
        path: The path to the directory.
    """
    shutil.rmtree(path)
stat(path) staticmethod

Return the stat descriptor for a given file path.

Parameters:

Name Type Description Default
path Union[bytes, str]

The path to the file.

required

Returns:

Type Description
Any

The stat descriptor for the file.

Source code in zenml/io/local_filesystem.py
@staticmethod
def stat(path: PathType) -> Any:
    """Return the stat descriptor for a given file path.

    Args:
        path: The path to the file.

    Returns:
        Any: The stat descriptor for the file.
    """
    return os.stat(path)
walk(top, topdown=True, onerror=None) staticmethod

Return an iterator that walks the contents of the given directory.

Parameters:

Name Type Description Default
top Union[bytes, str]

Path of directory to walk.

required
topdown bool

Whether to walk directories topdown or bottom-up.

True
onerror Optional[Callable[..., NoneType]]

Callable that gets called if an error occurs.

None

Yields:

Type Description
Iterable[Tuple[Union[bytes, str], List[Union[bytes, str]], List[Union[bytes, str]]]]

An Iterable of Tuples, each of which contain the path of the current directory path, a list of directories inside the current directory and a list of files inside the current directory.

Source code in zenml/io/local_filesystem.py
@staticmethod
def walk(
    top: PathType,
    topdown: bool = True,
    onerror: Optional[Callable[..., None]] = None,
) -> Iterable[Tuple[PathType, List[PathType], List[PathType]]]:
    """Return an iterator that walks the contents of the given directory.

    Args:
        top: Path of directory to walk.
        topdown: Whether to walk directories topdown or bottom-up.
        onerror: Callable that gets called if an error occurs.

    Yields:
        An Iterable of Tuples, each of which contain the path of the
        current directory path, a list of directories inside the
        current directory and a list of files inside the current
        directory.
    """
    yield from os.walk(  # type: ignore[type-var, misc]
        top, topdown=topdown, onerror=onerror
    )