Llama Index
zenml.integrations.llama_index
special
Initialization of the Llama Index integration.
LlamaIndexIntegration (Integration)
Definition of Llama Index integration for ZenML.
Source code in zenml/integrations/llama_index/__init__.py
class LlamaIndexIntegration(Integration):
"""Definition of Llama Index integration for ZenML."""
NAME = LLAMA_INDEX
REQUIREMENTS = ["llama_index>=0.4.28"]
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.llama_index import materializers # noqa
activate()
classmethod
Activates the integration.
Source code in zenml/integrations/llama_index/__init__.py
@classmethod
def activate(cls) -> None:
"""Activates the integration."""
from zenml.integrations.llama_index import materializers # noqa
materializers
special
Initialization of the Llama Index materializers.
document_materializer
Implementation of the llama-index document materializer.
LlamaIndexDocumentMaterializer (BaseMaterializer)
Handle serialization and deserialization of llama-index documents.
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
class LlamaIndexDocumentMaterializer(BaseMaterializer):
"""Handle serialization and deserialization of llama-index documents."""
ASSOCIATED_ARTIFACT_TYPE = ArtifactType.DATA
ASSOCIATED_TYPES = (Document,)
def __init__(self, **kwargs: Any) -> None:
"""Initializes the llama-index document materializer.
Args:
**kwargs: Keyword arguments.
"""
super().__init__(**kwargs)
self._langchain_materializer = LangchainDocumentMaterializer(**kwargs)
def load(self, data_type: Type[Document]) -> Document:
"""Reads a llama-index document from JSON.
Args:
data_type: The type of the data to read.
Returns:
The data read.
"""
contents = super().load(data_type)
data_path = os.path.join(self.uri, DEFAULT_FILENAME)
contents = yaml_utils.read_json(data_path)
langchain_document = LCDocument.parse_raw(contents)
return Document.from_langchain_format(langchain_document)
def save(self, data: Document) -> None:
"""Serialize a llama-index document as a Langchain document.
Args:
data: The data to store.
"""
super().save(data)
lc_doc = data.to_langchain_format()
self._langchain_materializer.save(lc_doc)
def extract_metadata(self, data: Document) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given Llama Index document.
Args:
data: The BaseModel object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return self._langchain_materializer.extract_metadata(
data.to_langchain_format()
)
__init__(self, **kwargs)
special
Initializes the llama-index document materializer.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
**kwargs |
Any |
Keyword arguments. |
{} |
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def __init__(self, **kwargs: Any) -> None:
"""Initializes the llama-index document materializer.
Args:
**kwargs: Keyword arguments.
"""
super().__init__(**kwargs)
self._langchain_materializer = LangchainDocumentMaterializer(**kwargs)
extract_metadata(self, data)
Extract metadata from the given Llama Index document.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Document |
The BaseModel object to extract metadata from. |
required |
Returns:
Type | Description |
---|---|
Dict[str, MetadataType] |
The extracted metadata as a dictionary. |
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def extract_metadata(self, data: Document) -> Dict[str, "MetadataType"]:
"""Extract metadata from the given Llama Index document.
Args:
data: The BaseModel object to extract metadata from.
Returns:
The extracted metadata as a dictionary.
"""
return self._langchain_materializer.extract_metadata(
data.to_langchain_format()
)
load(self, data_type)
Reads a llama-index document from JSON.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[llama_index.readers.schema.base.Document] |
The type of the data to read. |
required |
Returns:
Type | Description |
---|---|
Document |
The data read. |
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def load(self, data_type: Type[Document]) -> Document:
"""Reads a llama-index document from JSON.
Args:
data_type: The type of the data to read.
Returns:
The data read.
"""
contents = super().load(data_type)
data_path = os.path.join(self.uri, DEFAULT_FILENAME)
contents = yaml_utils.read_json(data_path)
langchain_document = LCDocument.parse_raw(contents)
return Document.from_langchain_format(langchain_document)
save(self, data)
Serialize a llama-index document as a Langchain document.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Document |
The data to store. |
required |
Source code in zenml/integrations/llama_index/materializers/document_materializer.py
def save(self, data: Document) -> None:
"""Serialize a llama-index document as a Langchain document.
Args:
data: The data to store.
"""
super().save(data)
lc_doc = data.to_langchain_format()
self._langchain_materializer.save(lc_doc)
gpt_index_materializer
Implementation of the llama-index GPT index materializer.
LlamaIndexGPTFaissIndexMaterializer (BaseMaterializer)
Materializer for llama_index GPT faiss indices.
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
class LlamaIndexGPTFaissIndexMaterializer(BaseMaterializer):
"""Materializer for llama_index GPT faiss indices."""
ASSOCIATED_ARTIFACT_TYPE = ArtifactType.MODEL
ASSOCIATED_TYPES = (GPTFaissIndex,)
def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex:
"""Load a llama-index GPT faiss index from disk.
Args:
data_type: The type of the index.
Returns:
The index.
"""
super().load(data_type)
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
index = data_type.load_from_disk(
save_path=filepath, faiss_index_save_path=faiss_filepath
)
# Cleanup and return
fileio.rmtree(temp_dir)
return cast(GPTFaissIndex, index)
def save(self, index: GPTFaissIndex) -> None:
"""Save a llama-index GPT faiss index to disk.
Args:
index: The index to save.
"""
super().save(index)
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
index.save_to_disk(
save_path=f.name, faiss_index_save_path=faiss_filepath
)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)
load(self, data_type)
Load a llama-index GPT faiss index from disk.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[GPTFaissIndex] |
The type of the index. |
required |
Returns:
Type | Description |
---|---|
GPTFaissIndex |
The index. |
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def load(self, data_type: Type[GPTFaissIndex]) -> GPTFaissIndex:
"""Load a llama-index GPT faiss index from disk.
Args:
data_type: The type of the index.
Returns:
The index.
"""
super().load(data_type)
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
index = data_type.load_from_disk(
save_path=filepath, faiss_index_save_path=faiss_filepath
)
# Cleanup and return
fileio.rmtree(temp_dir)
return cast(GPTFaissIndex, index)
save(self, index)
Save a llama-index GPT faiss index to disk.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
index |
GPTFaissIndex |
The index to save. |
required |
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def save(self, index: GPTFaissIndex) -> None:
"""Save a llama-index GPT faiss index to disk.
Args:
index: The index to save.
"""
super().save(index)
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
faiss_filepath = os.path.join(self.uri, DEFAULT_FAISS_FILENAME)
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
index.save_to_disk(
save_path=f.name, faiss_index_save_path=faiss_filepath
)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)
LlamaIndexGPTIndexMaterializer (Generic, BaseMaterializer)
Materializer for llama_index GPT indices.
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
class LlamaIndexGPTIndexMaterializer(Generic[T], BaseMaterializer):
"""Materializer for llama_index GPT indices."""
ASSOCIATED_ARTIFACT_TYPE = ArtifactType.MODEL
ASSOCIATED_TYPES = (BaseGPTIndex,)
def load(self, data_type: Type[T]) -> T:
"""Loads a llama-index GPT index from disk.
Args:
data_type: The type of the index.
Returns:
The index.
"""
super().load(data_type)
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
index = data_type.load_from_disk(save_path=filepath)
assert isinstance(index, data_type)
# Cleanup and return
fileio.rmtree(temp_dir)
return index
def save(self, index: T) -> None:
"""Save a llama-index GPT index to disk.
Args:
index: The index to save.
"""
super().save(index)
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
index.save_to_disk(save_path=f.name)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)
load(self, data_type)
Loads a llama-index GPT index from disk.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data_type |
Type[T] |
The type of the index. |
required |
Returns:
Type | Description |
---|---|
T |
The index. |
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def load(self, data_type: Type[T]) -> T:
"""Loads a llama-index GPT index from disk.
Args:
data_type: The type of the index.
Returns:
The index.
"""
super().load(data_type)
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
index = data_type.load_from_disk(save_path=filepath)
assert isinstance(index, data_type)
# Cleanup and return
fileio.rmtree(temp_dir)
return index
save(self, index)
Save a llama-index GPT index to disk.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
index |
T |
The index to save. |
required |
Source code in zenml/integrations/llama_index/materializers/gpt_index_materializer.py
def save(self, index: T) -> None:
"""Save a llama-index GPT index to disk.
Args:
index: The index to save.
"""
super().save(index)
filepath = os.path.join(self.uri, DEFAULT_FILENAME)
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False
) as f:
index.save_to_disk(save_path=f.name)
# Copy it into artifact store
fileio.copy(f.name, filepath)
# Close and remove the temporary file
f.close()
fileio.remove(f.name)