Code Repositories
zenml.code_repositories
special
Initialization of the ZenML code repository base abstraction.
base_code_repository
Base class for code repositories.
BaseCodeRepository (ABC)
Base class for code repositories.
Code repositories are used to connect to a remote code repository and store information about the repository, such as the URL, the owner, the repository name, and the host. They also provide methods to download files from the repository when a pipeline is run remotely.
Source code in zenml/code_repositories/base_code_repository.py
class BaseCodeRepository(ABC):
"""Base class for code repositories.
Code repositories are used to connect to a remote code repository and
store information about the repository, such as the URL, the owner,
the repository name, and the host. They also provide methods to
download files from the repository when a pipeline is run remotely.
"""
def __init__(
self,
id: UUID,
config: Dict[str, Any],
) -> None:
"""Initializes a code repository.
Args:
id: The ID of the code repository.
config: The config of the code repository.
"""
self._id = id
self._config = config
self.login()
@property
def config(self) -> "BaseCodeRepositoryConfig":
"""Config class for Code Repository.
Returns:
The config class.
"""
return BaseCodeRepositoryConfig(**self._config)
@classmethod
def from_model(cls, model: CodeRepositoryResponse) -> "BaseCodeRepository":
"""Loads a code repository from a model.
Args:
model: The CodeRepositoryResponseModel to load from.
Returns:
The loaded code repository object.
"""
class_: Type[BaseCodeRepository] = (
source_utils.load_and_validate_class(
source=model.source, expected_class=BaseCodeRepository
)
)
return class_(id=model.id, config=model.config)
@property
def id(self) -> UUID:
"""ID of the code repository.
Returns:
The ID of the code repository.
"""
return self._id
@property
def requirements(self) -> Set[str]:
"""Set of PyPI requirements for the repository.
Returns:
A set of PyPI requirements for the repository.
"""
from zenml.integrations.utils import get_requirements_for_module
return set(get_requirements_for_module(self.__module__))
@abstractmethod
def login(self) -> None:
"""Logs into the code repository.
This method is called when the code repository is initialized.
It should be used to authenticate with the code repository.
Raises:
RuntimeError: If the login fails.
"""
pass
@abstractmethod
def download_files(
self, commit: str, directory: str, repo_sub_directory: Optional[str]
) -> None:
"""Downloads files from the code repository to a local directory.
Args:
commit: The commit hash to download files from.
directory: The directory to download files to.
repo_sub_directory: The subdirectory in the repository to
download files from.
Raises:
RuntimeError: If the download fails.
"""
pass
@abstractmethod
def get_local_context(
self, path: str
) -> Optional["LocalRepositoryContext"]:
"""Gets a local repository context from a path.
Args:
path: The path to the local repository.
Returns:
The local repository context object.
"""
pass
config: BaseCodeRepositoryConfig
property
readonly
Config class for Code Repository.
Returns:
Type | Description |
---|---|
BaseCodeRepositoryConfig |
The config class. |
id: UUID
property
readonly
ID of the code repository.
Returns:
Type | Description |
---|---|
UUID |
The ID of the code repository. |
requirements: Set[str]
property
readonly
Set of PyPI requirements for the repository.
Returns:
Type | Description |
---|---|
Set[str] |
A set of PyPI requirements for the repository. |
__init__(self, id, config)
special
Initializes a code repository.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
id |
UUID |
The ID of the code repository. |
required |
config |
Dict[str, Any] |
The config of the code repository. |
required |
Source code in zenml/code_repositories/base_code_repository.py
def __init__(
self,
id: UUID,
config: Dict[str, Any],
) -> None:
"""Initializes a code repository.
Args:
id: The ID of the code repository.
config: The config of the code repository.
"""
self._id = id
self._config = config
self.login()
download_files(self, commit, directory, repo_sub_directory)
Downloads files from the code repository to a local directory.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
commit |
str |
The commit hash to download files from. |
required |
directory |
str |
The directory to download files to. |
required |
repo_sub_directory |
Optional[str] |
The subdirectory in the repository to download files from. |
required |
Exceptions:
Type | Description |
---|---|
RuntimeError |
If the download fails. |
Source code in zenml/code_repositories/base_code_repository.py
@abstractmethod
def download_files(
self, commit: str, directory: str, repo_sub_directory: Optional[str]
) -> None:
"""Downloads files from the code repository to a local directory.
Args:
commit: The commit hash to download files from.
directory: The directory to download files to.
repo_sub_directory: The subdirectory in the repository to
download files from.
Raises:
RuntimeError: If the download fails.
"""
pass
from_model(model)
classmethod
Loads a code repository from a model.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model |
CodeRepositoryResponse |
The CodeRepositoryResponseModel to load from. |
required |
Returns:
Type | Description |
---|---|
BaseCodeRepository |
The loaded code repository object. |
Source code in zenml/code_repositories/base_code_repository.py
@classmethod
def from_model(cls, model: CodeRepositoryResponse) -> "BaseCodeRepository":
"""Loads a code repository from a model.
Args:
model: The CodeRepositoryResponseModel to load from.
Returns:
The loaded code repository object.
"""
class_: Type[BaseCodeRepository] = (
source_utils.load_and_validate_class(
source=model.source, expected_class=BaseCodeRepository
)
)
return class_(id=model.id, config=model.config)
get_local_context(self, path)
Gets a local repository context from a path.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
str |
The path to the local repository. |
required |
Returns:
Type | Description |
---|---|
Optional[LocalRepositoryContext] |
The local repository context object. |
Source code in zenml/code_repositories/base_code_repository.py
@abstractmethod
def get_local_context(
self, path: str
) -> Optional["LocalRepositoryContext"]:
"""Gets a local repository context from a path.
Args:
path: The path to the local repository.
Returns:
The local repository context object.
"""
pass
login(self)
Logs into the code repository.
This method is called when the code repository is initialized. It should be used to authenticate with the code repository.
Exceptions:
Type | Description |
---|---|
RuntimeError |
If the login fails. |
Source code in zenml/code_repositories/base_code_repository.py
@abstractmethod
def login(self) -> None:
"""Logs into the code repository.
This method is called when the code repository is initialized.
It should be used to authenticate with the code repository.
Raises:
RuntimeError: If the login fails.
"""
pass
BaseCodeRepositoryConfig (SecretReferenceMixin, ABC)
Base config for code repositories.
Source code in zenml/code_repositories/base_code_repository.py
class BaseCodeRepositoryConfig(SecretReferenceMixin, ABC):
"""Base config for code repositories."""
git
special
Initialization of the local git repository context.
local_git_repository_context
Implementation of the Local git repository context.
LocalGitRepositoryContext (LocalRepositoryContext)
Local git repository context.
Source code in zenml/code_repositories/git/local_git_repository_context.py
class LocalGitRepositoryContext(LocalRepositoryContext):
"""Local git repository context."""
def __init__(
self, code_repository_id: UUID, git_repo: "Repo", remote_name: str
):
"""Initializes a local git repository context.
Args:
code_repository_id: The ID of the code repository.
git_repo: The git repo.
remote_name: Name of the remote.
"""
super().__init__(code_repository_id=code_repository_id)
self._git_repo = git_repo
self._remote = git_repo.remote(name=remote_name)
@classmethod
def at(
cls,
path: str,
code_repository_id: UUID,
remote_url_validation_callback: Callable[[str], bool],
) -> Optional["LocalGitRepositoryContext"]:
"""Returns a local git repository at the given path.
Args:
path: The path to the local git repository.
code_repository_id: The ID of the code repository.
remote_url_validation_callback: A callback that validates the
remote URL of the git repository.
Returns:
A local git repository if the path is a valid git repository
and the remote URL is valid, otherwise None.
"""
try:
# These imports fail when git is not installed on the machine
from git.exc import InvalidGitRepositoryError
from git.repo.base import Repo
except ImportError:
return None
try:
git_repo = Repo(path=path, search_parent_directories=True)
except InvalidGitRepositoryError:
return None
remote_name = None
for remote in git_repo.remotes:
if remote_url_validation_callback(remote.url):
remote_name = remote.name
break
if not remote_name:
return None
return LocalGitRepositoryContext(
code_repository_id=code_repository_id,
git_repo=git_repo,
remote_name=remote_name,
)
@property
def git_repo(self) -> "Repo":
"""The git repo.
Returns:
The git repo object of the local git repository.
"""
return self._git_repo
@property
def remote(self) -> "Remote":
"""The git remote.
Returns:
The remote of the git repo object of the local git repository.
"""
return self._remote
@property
def root(self) -> str:
"""The root of the git repo.
Returns:
The root of the git repo.
"""
assert self.git_repo.working_dir
return str(self.git_repo.working_dir)
@property
def is_dirty(self) -> bool:
"""Whether the git repo is dirty.
A repository counts as dirty if it has any untracked or uncommitted
changes.
Returns:
True if the git repo is dirty, False otherwise.
"""
return self.git_repo.is_dirty(untracked_files=True)
@property
def has_local_changes(self) -> bool:
"""Whether the git repo has local changes.
A repository has local changes if it is dirty or there are some commits
which have not been pushed yet.
Returns:
True if the git repo has local changes, False otherwise.
Raises:
RuntimeError: If the git repo is in a detached head state.
"""
if self.is_dirty:
return True
self.remote.fetch()
local_commit_object = self.git_repo.head.commit
try:
active_branch = self.git_repo.active_branch
except TypeError:
raise RuntimeError(
"Git repo in detached head state is not allowed."
)
try:
remote_commit_object = self.remote.refs[active_branch.name].commit
except IndexError:
# Branch doesn't exist on remote
return True
return cast("Commit", remote_commit_object) != local_commit_object
@property
def current_commit(self) -> str:
"""The current commit.
Returns:
The current commit sha.
"""
return cast(str, self.git_repo.head.object.hexsha)
current_commit: str
property
readonly
The current commit.
Returns:
Type | Description |
---|---|
str |
The current commit sha. |
git_repo: Repo
property
readonly
The git repo.
Returns:
Type | Description |
---|---|
Repo |
The git repo object of the local git repository. |
has_local_changes: bool
property
readonly
Whether the git repo has local changes.
A repository has local changes if it is dirty or there are some commits which have not been pushed yet.
Returns:
Type | Description |
---|---|
bool |
True if the git repo has local changes, False otherwise. |
Exceptions:
Type | Description |
---|---|
RuntimeError |
If the git repo is in a detached head state. |
is_dirty: bool
property
readonly
Whether the git repo is dirty.
A repository counts as dirty if it has any untracked or uncommitted changes.
Returns:
Type | Description |
---|---|
bool |
True if the git repo is dirty, False otherwise. |
remote: Remote
property
readonly
The git remote.
Returns:
Type | Description |
---|---|
Remote |
The remote of the git repo object of the local git repository. |
root: str
property
readonly
The root of the git repo.
Returns:
Type | Description |
---|---|
str |
The root of the git repo. |
__init__(self, code_repository_id, git_repo, remote_name)
special
Initializes a local git repository context.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
code_repository_id |
UUID |
The ID of the code repository. |
required |
git_repo |
Repo |
The git repo. |
required |
remote_name |
str |
Name of the remote. |
required |
Source code in zenml/code_repositories/git/local_git_repository_context.py
def __init__(
self, code_repository_id: UUID, git_repo: "Repo", remote_name: str
):
"""Initializes a local git repository context.
Args:
code_repository_id: The ID of the code repository.
git_repo: The git repo.
remote_name: Name of the remote.
"""
super().__init__(code_repository_id=code_repository_id)
self._git_repo = git_repo
self._remote = git_repo.remote(name=remote_name)
at(path, code_repository_id, remote_url_validation_callback)
classmethod
Returns a local git repository at the given path.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
str |
The path to the local git repository. |
required |
code_repository_id |
UUID |
The ID of the code repository. |
required |
remote_url_validation_callback |
Callable[[str], bool] |
A callback that validates the remote URL of the git repository. |
required |
Returns:
Type | Description |
---|---|
Optional[LocalGitRepositoryContext] |
A local git repository if the path is a valid git repository and the remote URL is valid, otherwise None. |
Source code in zenml/code_repositories/git/local_git_repository_context.py
@classmethod
def at(
cls,
path: str,
code_repository_id: UUID,
remote_url_validation_callback: Callable[[str], bool],
) -> Optional["LocalGitRepositoryContext"]:
"""Returns a local git repository at the given path.
Args:
path: The path to the local git repository.
code_repository_id: The ID of the code repository.
remote_url_validation_callback: A callback that validates the
remote URL of the git repository.
Returns:
A local git repository if the path is a valid git repository
and the remote URL is valid, otherwise None.
"""
try:
# These imports fail when git is not installed on the machine
from git.exc import InvalidGitRepositoryError
from git.repo.base import Repo
except ImportError:
return None
try:
git_repo = Repo(path=path, search_parent_directories=True)
except InvalidGitRepositoryError:
return None
remote_name = None
for remote in git_repo.remotes:
if remote_url_validation_callback(remote.url):
remote_name = remote.name
break
if not remote_name:
return None
return LocalGitRepositoryContext(
code_repository_id=code_repository_id,
git_repo=git_repo,
remote_name=remote_name,
)
local_repository_context
Base class for local code repository contexts.
LocalRepositoryContext (ABC)
Base class for local repository contexts.
This class is used to represent a local repository. It is used to track the current state of the repository and to provide information about the repository, such as the root path, the current commit, and whether the repository is dirty.
Source code in zenml/code_repositories/local_repository_context.py
class LocalRepositoryContext(ABC):
"""Base class for local repository contexts.
This class is used to represent a local repository. It is used
to track the current state of the repository and to provide
information about the repository, such as the root path, the current
commit, and whether the repository is dirty.
"""
def __init__(self, code_repository_id: UUID) -> None:
"""Initializes a local repository context.
Args:
code_repository_id: The ID of the code repository.
"""
self._code_repository_id = code_repository_id
@property
def code_repository_id(self) -> UUID:
"""Returns the ID of the code repository.
Returns:
The ID of the code repository.
"""
return self._code_repository_id
@property
@abstractmethod
def root(self) -> str:
"""Returns the root path of the local repository.
Returns:
The root path of the local repository.
"""
pass
@property
@abstractmethod
def is_dirty(self) -> bool:
"""Returns whether the local repository is dirty.
A repository counts as dirty if it has any untracked or uncommitted
changes.
Returns:
Whether the local repository is dirty.
"""
pass
@property
@abstractmethod
def has_local_changes(self) -> bool:
"""Returns whether the local repository has local changes.
A repository has local changes if it is dirty or there are some commits
which have not been pushed yet.
Returns:
Whether the local repository has local changes.
"""
pass
@property
@abstractmethod
def current_commit(self) -> str:
"""Returns the current commit of the local repository.
Returns:
The current commit of the local repository.
"""
pass
code_repository_id: UUID
property
readonly
Returns the ID of the code repository.
Returns:
Type | Description |
---|---|
UUID |
The ID of the code repository. |
current_commit: str
property
readonly
Returns the current commit of the local repository.
Returns:
Type | Description |
---|---|
str |
The current commit of the local repository. |
has_local_changes: bool
property
readonly
Returns whether the local repository has local changes.
A repository has local changes if it is dirty or there are some commits which have not been pushed yet.
Returns:
Type | Description |
---|---|
bool |
Whether the local repository has local changes. |
is_dirty: bool
property
readonly
Returns whether the local repository is dirty.
A repository counts as dirty if it has any untracked or uncommitted changes.
Returns:
Type | Description |
---|---|
bool |
Whether the local repository is dirty. |
root: str
property
readonly
Returns the root path of the local repository.
Returns:
Type | Description |
---|---|
str |
The root path of the local repository. |
__init__(self, code_repository_id)
special
Initializes a local repository context.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
code_repository_id |
UUID |
The ID of the code repository. |
required |
Source code in zenml/code_repositories/local_repository_context.py
def __init__(self, code_repository_id: UUID) -> None:
"""Initializes a local repository context.
Args:
code_repository_id: The ID of the code repository.
"""
self._code_repository_id = code_repository_id