Lightgbm
zenml.integrations.lightgbm
special
Initialization of the LightGBM integration.
LightGBMIntegration (Integration)
Definition of lightgbm integration for ZenML.
Source code in zenml/integrations/lightgbm/__init__.py
class LightGBMIntegration(Integration):
"""Definition of lightgbm integration for ZenML."""
NAME = LIGHTGBM
REQUIREMENTS = ["lightgbm>=1.0.0"]
APT_PACKAGES = ["libgomp1"]
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.lightgbm import materializers # noqa
activate()
classmethod
Activates the integration.
Source code in zenml/integrations/lightgbm/__init__.py
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.lightgbm import materializers # noqa
materializers
special
Initialization of the Neural Prophet materializer.
lightgbm_booster_materializer
Implementation of the LightGBM booster materializer.
LightGBMBoosterMaterializer (BaseMaterializer)
Materializer to read data to and from lightgbm.Booster.
Source code in zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py
class LightGBMBoosterMaterializer(BaseMaterializer):
"""Materializer to read data to and from lightgbm.Booster."""
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (lgb.Booster,)
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL
def load(self, data_type: Type[Any]) -> lgb.Booster:
"""Reads a lightgbm Booster model from a serialized JSON file.
Args:
data_type: A lightgbm Booster type.
Returns:
A lightgbm Booster object.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
booster = lgb.Booster(model_file=temp_file)
# Cleanup and return
fileio.rmtree(temp_dir)
return booster
def save(self, booster: lgb.Booster) -> None:
"""Creates a JSON serialization for a lightgbm Booster model.
Args:
booster: A lightgbm Booster model.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = os.path.join(tmp_dir, "model.txt")
booster.save_model(tmp_path)
fileio.copy(tmp_path, filepath)
load(self, data_type)
Reads a lightgbm Booster model from a serialized JSON file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
A lightgbm Booster type. |
required |
Returns:
Type | Description |
---|---|
lightgbm.Booster |
A lightgbm Booster object. |
Source code in zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py
def load(self, data_type: Type[Any]) -> lgb.Booster:
"""Reads a lightgbm Booster model from a serialized JSON file.
Args:
data_type: A lightgbm Booster type.
Returns:
A lightgbm Booster object.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
booster = lgb.Booster(model_file=temp_file)
# Cleanup and return
fileio.rmtree(temp_dir)
return booster
save(self, booster)
Creates a JSON serialization for a lightgbm Booster model.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
booster |
lightgbm.Booster |
A lightgbm Booster model. |
required |
Source code in zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py
def save(self, booster: lgb.Booster) -> None:
"""Creates a JSON serialization for a lightgbm Booster model.
Args:
booster: A lightgbm Booster model.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = os.path.join(tmp_dir, "model.txt")
booster.save_model(tmp_path)
fileio.copy(tmp_path, filepath)
lightgbm_dataset_materializer
Implementation of the LightGBM materializer.
LightGBMDatasetMaterializer (BaseMaterializer)
Materializer to read data to and from lightgbm.Dataset.
Source code in zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py
class LightGBMDatasetMaterializer(BaseMaterializer):
"""Materializer to read data to and from lightgbm.Dataset."""
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (lgb.Dataset,)
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.DATA
def load(self, data_type: Type[Any]) -> lgb.Dataset:
"""Reads a lightgbm.Dataset binary file and loads it.
Args:
data_type: A lightgbm.Dataset type.
Returns:
A lightgbm.Dataset object.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
matrix = lgb.Dataset(temp_file, free_raw_data=False)
# No clean up this time because matrix is lazy loaded
return matrix
def save(self, matrix: lgb.Dataset) -> None:
"""Creates a binary serialization for a lightgbm.Dataset object.
Args:
matrix: A lightgbm.Dataset object.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Make a temporary phantom artifact
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
matrix.save_binary(temp_file)
# Copy it into artifact store
fileio.copy(temp_file, filepath)
fileio.rmtree(temp_dir)
def extract_metadata(
self, matrix: lgb.Dataset
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `Dataset` object.
Args:
matrix: The `Dataset` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return {"shape": (matrix.num_data(), matrix.num_feature())}
extract_metadata(self, matrix)
Extract metadata from the given Dataset
object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
matrix |
lightgbm.Dataset |
The |
required |
Returns:
Type | Description |
---|---|
Dict[str, MetadataType] |
The extracted metadata as a dictionary. |
Source code in zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py
def extract_metadata(
self, matrix: lgb.Dataset
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `Dataset` object.
Args:
matrix: The `Dataset` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return {"shape": (matrix.num_data(), matrix.num_feature())}
load(self, data_type)
Reads a lightgbm.Dataset binary file and loads it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
A lightgbm.Dataset type. |
required |
Returns:
Type | Description |
---|---|
lightgbm.Dataset |
A lightgbm.Dataset object. |
Source code in zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py
def load(self, data_type: Type[Any]) -> lgb.Dataset:
"""Reads a lightgbm.Dataset binary file and loads it.
Args:
data_type: A lightgbm.Dataset type.
Returns:
A lightgbm.Dataset object.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
matrix = lgb.Dataset(temp_file, free_raw_data=False)
# No clean up this time because matrix is lazy loaded
return matrix
save(self, matrix)
Creates a binary serialization for a lightgbm.Dataset object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
matrix |
lightgbm.Dataset |
A lightgbm.Dataset object. |
required |
Source code in zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py
def save(self, matrix: lgb.Dataset) -> None:
"""Creates a binary serialization for a lightgbm.Dataset object.
Args:
matrix: A lightgbm.Dataset object.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Make a temporary phantom artifact
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
matrix.save_binary(temp_file)
# Copy it into artifact store
fileio.copy(temp_file, filepath)
fileio.rmtree(temp_dir)