Skip to main content
Integrate Memvid with Haystack to build powerful search and RAG pipelines. The haystack adapter provides native Haystack components.

Installation

pip install memvid-sdk haystack-ai

Quick Start

from memvid_sdk import use

# Open with Haystack adapter
mem = use('haystack', 'knowledge.mv2')

# Access Haystack components
retriever = mem.as_retriever(top_k=5)

Basic Pipeline

from haystack import Pipeline
from haystack.components.generators import OpenAIGenerator
from memvid_sdk import use

# Initialize with haystack adapter
mem = use('haystack', 'knowledge.mv2', read_only=True)

# Get retriever component
retriever = mem.as_retriever(top_k=5)

# Create generator
generator = OpenAIGenerator(model="gpt-4o")

# Build pipeline
pipeline = Pipeline()
pipeline.add_component("retriever", retriever)
pipeline.add_component("generator", generator)
pipeline.connect("retriever.documents", "generator.prompt")

# Run query
result = pipeline.run({
    "retriever": {"query": "What is the architecture?"}
})
print(result["generator"]["replies"][0])

RAG Pipeline

from haystack import Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from memvid_sdk import use

# Initialize
mem = use('haystack', 'knowledge.mv2', read_only=True)

# Create prompt template
prompt_template = """
Answer the question based on the following context:

Context:
{% for doc in documents %}
- {{ doc.content }}
{% endfor %}

Question: {{ question }}
Answer:
"""

# Build RAG pipeline
rag_pipeline = Pipeline()
rag_pipeline.add_component("retriever", mem.as_retriever(top_k=5))
rag_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))
rag_pipeline.add_component("generator", OpenAIGenerator(model="gpt-4o"))

rag_pipeline.connect("retriever.documents", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "generator")

# Run
result = rag_pipeline.run({
    "retriever": {"query": "deployment"},
    "prompt_builder": {"question": "How do I deploy to production?"}
})
print(result["generator"]["replies"][0])

Custom Retriever Component

from haystack import component, Document
from memvid_sdk import use
from typing import List

@component
class MemvidRetriever:
    def __init__(self, memory_path: str, top_k: int = 5, mode: str = 'auto'):
        self.memory_path = memory_path
        self.top_k = top_k
        self.mode = mode
        self._mem = None

    def warm_up(self):
        """Initialize the memory connection."""
        self._mem = use('haystack', self.memory_path, read_only=True)

    @component.output_types(documents=List[Document])
    def run(self, query: str) -> dict:
        if self._mem is None:
            self.warm_up()

        results = self._mem.find(query, k=self.top_k, mode=self.mode)

        documents = [
            Document(
                content=r.snippet,
                meta={
                    'frame_id': r.frame_id,
                    'title': r.title,
                    'score': r.score,
                    'uri': r.uri
                }
            )
            for r in results
        ]

        return {"documents": documents}

# Usage
retriever = MemvidRetriever(memory_path='knowledge.mv2', top_k=10)

Hybrid Search Pipeline

from haystack import Pipeline
from haystack.components.joiners import DocumentJoiner
from memvid_sdk import use

mem = use('haystack', 'knowledge.mv2', read_only=True)

# Create retrievers with different modes
lexical_retriever = mem.as_retriever(top_k=10, mode='lex')
semantic_retriever = mem.as_retriever(top_k=10, mode='sem')

# Join results
joiner = DocumentJoiner(join_mode="reciprocal_rank_fusion")

# Build hybrid pipeline
hybrid_pipeline = Pipeline()
hybrid_pipeline.add_component("lexical", lexical_retriever)
hybrid_pipeline.add_component("semantic", semantic_retriever)
hybrid_pipeline.add_component("joiner", joiner)

hybrid_pipeline.connect("lexical.documents", "joiner.documents")
hybrid_pipeline.connect("semantic.documents", "joiner.documents")

# Run
result = hybrid_pipeline.run({
    "lexical": {"query": "authentication methods"},
    "semantic": {"query": "authentication methods"}
})
print(f"Found {len(result['joiner']['documents'])} documents")

Document Store

from memvid_sdk import use

# Use as a document store
mem = use('haystack', 'knowledge.mv2')

# Get document store interface
doc_store = mem.as_document_store()

# Write documents
doc_store.write_documents([
    {"content": "Document 1 content", "meta": {"title": "Doc 1"}},
    {"content": "Document 2 content", "meta": {"title": "Doc 2"}}
])

# Count documents
count = doc_store.count_documents()
print(f"Total documents: {count}")

# Filter documents
filtered = doc_store.filter_documents(
    filters={"field": "meta.title", "operator": "==", "value": "Doc 1"}
)

Best Practices

  1. Use read-only mode for retrieval pipelines
  2. Warm up components before running pipelines
  3. Use hybrid search for best results
  4. Close the memory when done
mem = use('haystack', 'knowledge.mv2', read_only=True)
try:
    pipeline = Pipeline()
    pipeline.add_component("retriever", mem.as_retriever(top_k=10))
    # ... build and run pipeline
finally:
    mem.seal()

Next Steps