haystack adapter provides native Haystack components.
Installation
Copy
pip install memvid-sdk haystack-ai
Quick Start
Copy
from memvid_sdk import use
# Open with Haystack adapter
mem = use('haystack', 'knowledge.mv2')
# Access Haystack components
retriever = mem.as_retriever(top_k=5)
Basic Pipeline
Copy
from haystack import Pipeline
from haystack.components.generators import OpenAIGenerator
from memvid_sdk import use
# Initialize with haystack adapter
mem = use('haystack', 'knowledge.mv2', read_only=True)
# Get retriever component
retriever = mem.as_retriever(top_k=5)
# Create generator
generator = OpenAIGenerator(model="gpt-4o")
# Build pipeline
pipeline = Pipeline()
pipeline.add_component("retriever", retriever)
pipeline.add_component("generator", generator)
pipeline.connect("retriever.documents", "generator.prompt")
# Run query
result = pipeline.run({
"retriever": {"query": "What is the architecture?"}
})
print(result["generator"]["replies"][0])
RAG Pipeline
Copy
from haystack import Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from memvid_sdk import use
# Initialize
mem = use('haystack', 'knowledge.mv2', read_only=True)
# Create prompt template
prompt_template = """
Answer the question based on the following context:
Context:
{% for doc in documents %}
- {{ doc.content }}
{% endfor %}
Question: {{ question }}
Answer:
"""
# Build RAG pipeline
rag_pipeline = Pipeline()
rag_pipeline.add_component("retriever", mem.as_retriever(top_k=5))
rag_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))
rag_pipeline.add_component("generator", OpenAIGenerator(model="gpt-4o"))
rag_pipeline.connect("retriever.documents", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "generator")
# Run
result = rag_pipeline.run({
"retriever": {"query": "deployment"},
"prompt_builder": {"question": "How do I deploy to production?"}
})
print(result["generator"]["replies"][0])
Custom Retriever Component
Copy
from haystack import component, Document
from memvid_sdk import use
from typing import List
@component
class MemvidRetriever:
def __init__(self, memory_path: str, top_k: int = 5, mode: str = 'auto'):
self.memory_path = memory_path
self.top_k = top_k
self.mode = mode
self._mem = None
def warm_up(self):
"""Initialize the memory connection."""
self._mem = use('haystack', self.memory_path, read_only=True)
@component.output_types(documents=List[Document])
def run(self, query: str) -> dict:
if self._mem is None:
self.warm_up()
results = self._mem.find(query, k=self.top_k, mode=self.mode)
documents = [
Document(
content=r.snippet,
meta={
'frame_id': r.frame_id,
'title': r.title,
'score': r.score,
'uri': r.uri
}
)
for r in results
]
return {"documents": documents}
# Usage
retriever = MemvidRetriever(memory_path='knowledge.mv2', top_k=10)
Hybrid Search Pipeline
Copy
from haystack import Pipeline
from haystack.components.joiners import DocumentJoiner
from memvid_sdk import use
mem = use('haystack', 'knowledge.mv2', read_only=True)
# Create retrievers with different modes
lexical_retriever = mem.as_retriever(top_k=10, mode='lex')
semantic_retriever = mem.as_retriever(top_k=10, mode='sem')
# Join results
joiner = DocumentJoiner(join_mode="reciprocal_rank_fusion")
# Build hybrid pipeline
hybrid_pipeline = Pipeline()
hybrid_pipeline.add_component("lexical", lexical_retriever)
hybrid_pipeline.add_component("semantic", semantic_retriever)
hybrid_pipeline.add_component("joiner", joiner)
hybrid_pipeline.connect("lexical.documents", "joiner.documents")
hybrid_pipeline.connect("semantic.documents", "joiner.documents")
# Run
result = hybrid_pipeline.run({
"lexical": {"query": "authentication methods"},
"semantic": {"query": "authentication methods"}
})
print(f"Found {len(result['joiner']['documents'])} documents")
Document Store
Copy
from memvid_sdk import use
# Use as a document store
mem = use('haystack', 'knowledge.mv2')
# Get document store interface
doc_store = mem.as_document_store()
# Write documents
doc_store.write_documents([
{"content": "Document 1 content", "meta": {"title": "Doc 1"}},
{"content": "Document 2 content", "meta": {"title": "Doc 2"}}
])
# Count documents
count = doc_store.count_documents()
print(f"Total documents: {count}")
# Filter documents
filtered = doc_store.filter_documents(
filters={"field": "meta.title", "operator": "==", "value": "Doc 1"}
)
Best Practices
- Use read-only mode for retrieval pipelines
- Warm up components before running pipelines
- Use hybrid search for best results
- Close the memory when done
Copy
mem = use('haystack', 'knowledge.mv2', read_only=True)
try:
pipeline = Pipeline()
pipeline.add_component("retriever", mem.as_retriever(top_k=10))
# ... build and run pipeline
finally:
mem.seal()