Xgboost
zenml.integrations.xgboost
special
Initialization of the XGBoost integration.
XgboostIntegration (Integration)
Definition of xgboost integration for ZenML.
Source code in zenml/integrations/xgboost/__init__.py
class XgboostIntegration(Integration):
"""Definition of xgboost integration for ZenML."""
NAME = XGBOOST
REQUIREMENTS = ["xgboost>=1.0.0"]
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.xgboost import materializers # noqa
activate()
classmethod
Activates the integration.
Source code in zenml/integrations/xgboost/__init__.py
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.xgboost import materializers # noqa
materializers
special
Initialization of the XGBoost materializers.
xgboost_booster_materializer
Implementation of an XGBoost booster materializer.
XgboostBoosterMaterializer (BaseMaterializer)
Materializer to read data to and from xgboost.Booster.
Source code in zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py
class XgboostBoosterMaterializer(BaseMaterializer):
"""Materializer to read data to and from xgboost.Booster."""
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (xgb.Booster,)
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL
def load(self, data_type: Type[Any]) -> xgb.Booster:
"""Reads a xgboost Booster model from a serialized JSON file.
Args:
data_type: A xgboost Booster type.
Returns:
A xgboost Booster object.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
booster = xgb.Booster()
booster.load_model(temp_file)
# Cleanup and return
fileio.rmtree(temp_dir)
return booster
def save(self, booster: xgb.Booster) -> None:
"""Creates a JSON serialization for a xgboost Booster model.
Args:
booster: A xgboost Booster model.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Make a temporary phantom artifact
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
booster.save_model(f.name)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)
load(self, data_type)
Reads a xgboost Booster model from a serialized JSON file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
A xgboost Booster type. |
required |
Returns:
Type | Description |
---|---|
xgboost.Booster |
A xgboost Booster object. |
Source code in zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py
def load(self, data_type: Type[Any]) -> xgb.Booster:
"""Reads a xgboost Booster model from a serialized JSON file.
Args:
data_type: A xgboost Booster type.
Returns:
A xgboost Booster object.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
booster = xgb.Booster()
booster.load_model(temp_file)
# Cleanup and return
fileio.rmtree(temp_dir)
return booster
save(self, booster)
Creates a JSON serialization for a xgboost Booster model.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
booster |
xgboost.Booster |
A xgboost Booster model. |
required |
Source code in zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py
def save(self, booster: xgb.Booster) -> None:
"""Creates a JSON serialization for a xgboost Booster model.
Args:
booster: A xgboost Booster model.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Make a temporary phantom artifact
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
booster.save_model(f.name)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)
xgboost_dmatrix_materializer
Implementation of the XGBoost dmatrix materializer.
XgboostDMatrixMaterializer (BaseMaterializer)
Materializer to read data to and from xgboost.DMatrix.
Source code in zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py
class XgboostDMatrixMaterializer(BaseMaterializer):
"""Materializer to read data to and from xgboost.DMatrix."""
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (xgb.DMatrix,)
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.DATA
def load(self, data_type: Type[Any]) -> xgb.DMatrix:
"""Reads a xgboost.DMatrix binary file and loads it.
Args:
data_type: The datatype which should be read.
Returns:
Materialized xgboost matrix.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
matrix = xgb.DMatrix(temp_file)
# Cleanup and return
fileio.rmtree(temp_dir)
return matrix
def save(self, matrix: xgb.DMatrix) -> None:
"""Creates a binary serialization for a xgboost.DMatrix object.
Args:
matrix: A xgboost.DMatrix object.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Make a temporary phantom artifact
with tempfile.NamedTemporaryFile(mode="wb", delete=False) as f:
matrix.save_binary(f.name)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)
def extract_metadata(
self, dataset: xgb.DMatrix
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `Dataset` object.
Args:
dataset: The `Dataset` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return {"shape": (dataset.num_row(), dataset.num_col())}
extract_metadata(self, dataset)
Extract metadata from the given Dataset
object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
xgboost.DMatrix |
The |
required |
Returns:
Type | Description |
---|---|
Dict[str, MetadataType] |
The extracted metadata as a dictionary. |
Source code in zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py
def extract_metadata(
self, dataset: xgb.DMatrix
) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given `Dataset` object.
Args:
dataset: The `Dataset` object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return {"shape": (dataset.num_row(), dataset.num_col())}
load(self, data_type)
Reads a xgboost.DMatrix binary file and loads it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
The datatype which should be read. |
required |
Returns:
Type | Description |
---|---|
xgboost.DMatrix |
Materialized xgboost matrix. |
Source code in zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py
def load(self, data_type: Type[Any]) -> xgb.DMatrix:
"""Reads a xgboost.DMatrix binary file and loads it.
Args:
data_type: The datatype which should be read.
Returns:
Materialized xgboost matrix.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
matrix = xgb.DMatrix(temp_file)
# Cleanup and return
fileio.rmtree(temp_dir)
return matrix
save(self, matrix)
Creates a binary serialization for a xgboost.DMatrix object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
matrix |
xgboost.DMatrix |
A xgboost.DMatrix object. |
required |
Source code in zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py
def save(self, matrix: xgb.DMatrix) -> None:
"""Creates a binary serialization for a xgboost.DMatrix object.
Args:
matrix: A xgboost.DMatrix object.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Make a temporary phantom artifact
with tempfile.NamedTemporaryFile(mode="wb", delete=False) as f:
matrix.save_binary(f.name)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)