Llama Index
zenml.integrations.llama_index
special
Initialization of the Llama Index integration.
LlamaIndexIntegration (Integration)
Definition of Llama Index integration for ZenML.
Source code in zenml/integrations/llama_index/__init__.py
class LlamaIndexIntegration(Integration):
"""Definition of Llama Index integration for ZenML."""
NAME = LLAMA_INDEX
REQUIREMENTS = ["llama_index>=0.4.28,<0.6.0"]
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.llama_index import materializers # noqa
activate()
classmethod
Activates the integration.
Source code in zenml/integrations/llama_index/__init__.py
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.llama_index import materializers # noqa
materializers
special
Initialization of the Llama Index materializers.
document_materializer
Implementation of the llama-index document materializer.
LlamaIndexDocumentMaterializer (LangchainDocumentMaterializer)
Handle serialization and deserialization of llama-index documents.
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
class LlamaIndexDocumentMaterializer(LangchainDocumentMaterializer):
"""Handle serialization and deserialization of llama-index documents."""
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.DATA
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (Document,)
def load(self, data_type: Type[Any]) -> Any:
"""Reads a llama-index document from JSON.
Args:
data_type: The type of the data to read.
Returns:
The data read.
"""
return Document.from_langchain_format(super().load(LCDocument))
def save(self, data: Any) -> None:
"""Serialize a llama-index document as a Langchain document.
Args:
data: The data to store.
"""
super().save(data.to_langchain_format())
def extract_metadata(self, data: Any) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given Llama Index document.
Args:
data: The BaseModel object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return super().extract_metadata(data.to_langchain_format())
extract_metadata(self, data)
Extract metadata from the given Llama Index document.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Any |
The BaseModel object to extract metadata from. |
required |
Returns:
Type | Description |
---|---|
Dict[str, MetadataType] |
The extracted metadata as a dictionary. |
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def extract_metadata(self, data: Any) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given Llama Index document.
Args:
data: The BaseModel object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return super().extract_metadata(data.to_langchain_format())
load(self, data_type)
Reads a llama-index document from JSON.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[Any] |
The type of the data to read. |
required |
Returns:
Type | Description |
---|---|
Any |
The data read. |
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def load(self, data_type: Type[Any]) -> Any:
"""Reads a llama-index document from JSON.
Args:
data_type: The type of the data to read.
Returns:
The data read.
"""
return Document.from_langchain_format(super().load(LCDocument))
save(self, data)
Serialize a llama-index document as a Langchain document.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Any |
The data to store. |
required |
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def save(self, data: Any) -> None:
"""Serialize a llama-index document as a Langchain document.
Args:
data: The data to store.
"""
super().save(data.to_langchain_format())
gpt_index_materializer
Implementation of the llama-index GPT index materializer.
LlamaIndexGPTFaissIndexMaterializer (BaseMaterializer)
Materializer for llama_index GPT faiss indices.
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
class LlamaIndexGPTFaissIndexMaterializer(BaseMaterializer):
"""Materializer for llama_index GPT faiss indices."""
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (GPTFaissIndex,)
def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex:
"""Load a llama-index GPT faiss index from disk.
Args:
data_type: The type of the index.
Returns:
The index.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
index = data_type.load_from_disk(
save_path=filepath, faiss_index_save_path=faiss_filepath
)
# Cleanup and return
fileio.rmtree(temp_dir)
return cast(GPTFaissIndex, index)
def save(self, index: GPTFaissIndex) -> None:
"""Save a llama-index GPT faiss index to disk.
Args:
index: The index to save.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
index.save_to_disk(
save_path=f.name, faiss_index_save_path=faiss_filepath
)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)
load(self, data_type)
Load a llama-index GPT faiss index from disk.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[llama_index.indices.vector_store.vector_indices.GPTFaissIndex] |
The type of the index. |
required |
Returns:
Type | Description |
---|---|
GPTFaissIndex |
The index. |
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex:
"""Load a llama-index GPT faiss index from disk.
Args:
data_type: The type of the index.
Returns:
The index.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
index = data_type.load_from_disk(
save_path=filepath, faiss_index_save_path=faiss_filepath
)
# Cleanup and return
fileio.rmtree(temp_dir)
return cast(GPTFaissIndex, index)
save(self, index)
Save a llama-index GPT faiss index to disk.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
index |
GPTFaissIndex |
The index to save. |
required |
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def save(self, index: GPTFaissIndex) -> None:
"""Save a llama-index GPT faiss index to disk.
Args:
index: The index to save.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
index.save_to_disk(
save_path=f.name, faiss_index_save_path=faiss_filepath
)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)
LlamaIndexGPTIndexMaterializer (Generic, BaseMaterializer)
Materializer for llama_index GPT indices.
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
class LlamaIndexGPTIndexMaterializer(Generic[T], BaseMaterializer):
"""Materializer for llama_index GPT indices."""
ASSOCIATED_ARTIFACT_TYPE: ClassVar[ArtifactType] = ArtifactType.MODEL
ASSOCIATED_TYPES: ClassVar[Tuple[Type[Any], ...]] = (BaseGPTIndex,)
def load(self, data_type: Type[T]) -> T:
"""Loads a llama-index GPT index from disk.
Args:
data_type: The type of the index.
Returns:
The index.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
index = data_type.load_from_disk(save_path=filepath)
assert isinstance(index, data_type)
# Cleanup and return
fileio.rmtree(temp_dir)
return index
def save(self, index: T) -> None:
"""Save a llama-index GPT index to disk.
Args:
index: The index to save.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
index.save_to_disk(save_path=f.name)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)
load(self, data_type)
Loads a llama-index GPT index from disk.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[~T] |
The type of the index. |
required |
Returns:
Type | Description |
---|---|
~T |
The index. |
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def load(self, data_type: Type[T]) -> T:
"""Loads a llama-index GPT index from disk.
Args:
data_type: The type of the index.
Returns:
The index.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
index = data_type.load_from_disk(save_path=filepath)
assert isinstance(index, data_type)
# Cleanup and return
fileio.rmtree(temp_dir)
return index
save(self, index)
Save a llama-index GPT index to disk.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
index |
~T |
The index to save. |
required |
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def save(self, index: T) -> None:
"""Save a llama-index GPT index to disk.
Args:
index: The index to save.
"""
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
index.save_to_disk(save_path=f.name)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)