Skip to content

Lightgbm

zenml.integrations.lightgbm special

Initialization of the LightGBM integration.

LightGBMIntegration (Integration)

Definition of lightgbm integration for ZenML.

Source code in zenml/integrations/lightgbm/__init__.py
class LightGBMIntegration(Integration):
    """Definition of lightgbm integration for ZenML."""

    NAME = LIGHTGBM
    REQUIREMENTS = ["lightgbm>=1.0.0"]
    APT_PACKAGES = ["libgomp1"]

    @classmethod
    def activate(cls) -> None:
        """Activates the integration."""
        from zenml.integrations.lightgbm import materializers  # noqa

activate() classmethod

Activates the integration.

Source code in zenml/integrations/lightgbm/__init__.py
@classmethod
def activate(cls) -> None:
    """Activates the integration."""
    from zenml.integrations.lightgbm import materializers  # noqa

materializers special

Initialization of the Neural Prophet materializer.

lightgbm_booster_materializer

Implementation of the LightGBM booster materializer.

LightGBMBoosterMaterializer (BaseMaterializer)

Materializer to read data to and from lightgbm.Booster.

Source code in zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py
class LightGBMBoosterMaterializer(BaseMaterializer):
    """Materializer to read data to and from lightgbm.Booster."""

    ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (lgb.Booster,)
    ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL

    def load(self, data_type: Type[Any]) -> lgb.Booster:
        """Reads a lightgbm Booster model from a serialized JSON file.

        Args:
            data_type: A lightgbm Booster type.

        Returns:
            A lightgbm Booster object.
        """
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)
        booster = lgb.Booster(model_file=temp_file)

        # Cleanup and return
        fileio.rmtree(temp_dir)
        return booster

    def save(self, booster: lgb.Booster) -> None:
        """Creates a JSON serialization for a lightgbm Booster model.

        Args:
            booster: A lightgbm Booster model.
        """
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        with tempfile.TemporaryDirectory() as tmp_dir:
            tmp_path = os.path.join(tmp_dir, "model.txt")
            booster.save_model(tmp_path)
            fileio.copy(tmp_path, filepath)
load(self, data_type)

Reads a lightgbm Booster model from a serialized JSON file.

Parameters:

Name Type Description Default
data_type Type[Any]

A lightgbm Booster type.

required

Returns:

Type Description
lightgbm.Booster

A lightgbm Booster object.

Source code in zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py
def load(self, data_type: Type[Any]) -> lgb.Booster:
    """Reads a lightgbm Booster model from a serialized JSON file.

    Args:
        data_type: A lightgbm Booster type.

    Returns:
        A lightgbm Booster object.
    """
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)
    booster = lgb.Booster(model_file=temp_file)

    # Cleanup and return
    fileio.rmtree(temp_dir)
    return booster
save(self, booster)

Creates a JSON serialization for a lightgbm Booster model.

Parameters:

Name Type Description Default
booster lightgbm.Booster

A lightgbm Booster model.

required
Source code in zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py
def save(self, booster: lgb.Booster) -> None:
    """Creates a JSON serialization for a lightgbm Booster model.

    Args:
        booster: A lightgbm Booster model.
    """
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    with tempfile.TemporaryDirectory() as tmp_dir:
        tmp_path = os.path.join(tmp_dir, "model.txt")
        booster.save_model(tmp_path)
        fileio.copy(tmp_path, filepath)

lightgbm_dataset_materializer

Implementation of the LightGBM materializer.

LightGBMDatasetMaterializer (BaseMaterializer)

Materializer to read data to and from lightgbm.Dataset.

Source code in zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py
class LightGBMDatasetMaterializer(BaseMaterializer):
    """Materializer to read data to and from lightgbm.Dataset."""

    ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (lgb.Dataset,)
    ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.DATA

    def load(self, data_type: Type[Any]) -> lgb.Dataset:
        """Reads a lightgbm.Dataset binary file and loads it.

        Args:
            data_type: A lightgbm.Dataset type.

        Returns:
            A lightgbm.Dataset object.
        """
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        # Create a temporary folder
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

        # Copy from artifact store to temporary file
        fileio.copy(filepath, temp_file)
        matrix = lgb.Dataset(temp_file, free_raw_data=False)

        # No clean up this time because matrix is lazy loaded
        return matrix

    def save(self, matrix: lgb.Dataset) -> None:
        """Creates a binary serialization for a lightgbm.Dataset object.

        Args:
            matrix: A lightgbm.Dataset object.
        """
        filepath = os.path.join(self.uri, DEFAULT_FILENAME)

        # Make a temporary phantom artifact
        temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
        temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
        matrix.save_binary(temp_file)

        # Copy it into artifact store
        fileio.copy(temp_file, filepath)
        fileio.rmtree(temp_dir)

    def extract_metadata(
        self, matrix: lgb.Dataset
    ) -> Dict[str, "MetadataType"]:
        """Extract metadata from the given `Dataset` object.

        Args:
            matrix: The `Dataset` object to extract metadata from.

        Returns:
            The extracted metadata as a dictionary.
        """
        return {"shape": (matrix.num_data(), matrix.num_feature())}
extract_metadata(self, matrix)

Extract metadata from the given Dataset object.

Parameters:

Name Type Description Default
matrix lightgbm.Dataset

The Dataset object to extract metadata from.

required

Returns:

Type Description
Dict[str, MetadataType]

The extracted metadata as a dictionary.

Source code in zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py
def extract_metadata(
    self, matrix: lgb.Dataset
) -> Dict[str, "MetadataType"]:
    """Extract metadata from the given `Dataset` object.

    Args:
        matrix: The `Dataset` object to extract metadata from.

    Returns:
        The extracted metadata as a dictionary.
    """
    return {"shape": (matrix.num_data(), matrix.num_feature())}
load(self, data_type)

Reads a lightgbm.Dataset binary file and loads it.

Parameters:

Name Type Description Default
data_type Type[Any]

A lightgbm.Dataset type.

required

Returns:

Type Description
lightgbm.Dataset

A lightgbm.Dataset object.

Source code in zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py
def load(self, data_type: Type[Any]) -> lgb.Dataset:
    """Reads a lightgbm.Dataset binary file and loads it.

    Args:
        data_type: A lightgbm.Dataset type.

    Returns:
        A lightgbm.Dataset object.
    """
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    # Create a temporary folder
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

    # Copy from artifact store to temporary file
    fileio.copy(filepath, temp_file)
    matrix = lgb.Dataset(temp_file, free_raw_data=False)

    # No clean up this time because matrix is lazy loaded
    return matrix
save(self, matrix)

Creates a binary serialization for a lightgbm.Dataset object.

Parameters:

Name Type Description Default
matrix lightgbm.Dataset

A lightgbm.Dataset object.

required
Source code in zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py
def save(self, matrix: lgb.Dataset) -> None:
    """Creates a binary serialization for a lightgbm.Dataset object.

    Args:
        matrix: A lightgbm.Dataset object.
    """
    filepath = os.path.join(self.uri, DEFAULT_FILENAME)

    # Make a temporary phantom artifact
    temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
    temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
    matrix.save_binary(temp_file)

    # Copy it into artifact store
    fileio.copy(temp_file, filepath)
    fileio.rmtree(temp_dir)