Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.memvid.com/llms.txt

Use this file to discover all available pages before exploring further.

Integrate Memvid with Haystack to build powerful search and RAG pipelines. The haystack adapter provides native Haystack components.

Installation

pip install memvid-sdk haystack-ai

Quick Start

from memvid_sdk import use

# Open with Haystack adapter
mem = use('haystack', 'knowledge.mv2')

# Access Haystack components
retriever = mem.as_retriever(top_k=5)

Basic Pipeline

from haystack import Pipeline
from haystack.components.generators import OpenAIGenerator
from memvid_sdk import use

# Initialize with haystack adapter
mem = use('haystack', 'knowledge.mv2', read_only=True)

# Get retriever component
retriever = mem.as_retriever(top_k=5)

# Create generator
generator = OpenAIGenerator(model="gpt-4o")

# Build pipeline
pipeline = Pipeline()
pipeline.add_component("retriever", retriever)
pipeline.add_component("generator", generator)
pipeline.connect("retriever.documents", "generator.prompt")

# Run query
result = pipeline.run({
    "retriever": {"query": "What is the architecture?"}
})
print(result["generator"]["replies"][0])

RAG Pipeline

from haystack import Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from memvid_sdk import use

# Initialize
mem = use('haystack', 'knowledge.mv2', read_only=True)

# Create prompt template
prompt_template = """
Answer the question based on the following context:

Context:
{% for doc in documents %}
- {{ doc.content }}
{% endfor %}

Question: {{ question }}
Answer:
"""

# Build RAG pipeline
rag_pipeline = Pipeline()
rag_pipeline.add_component("retriever", mem.as_retriever(top_k=5))
rag_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))
rag_pipeline.add_component("generator", OpenAIGenerator(model="gpt-4o"))

rag_pipeline.connect("retriever.documents", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "generator")

# Run
result = rag_pipeline.run({
    "retriever": {"query": "deployment"},
    "prompt_builder": {"question": "How do I deploy to production?"}
})
print(result["generator"]["replies"][0])

Custom Retriever Component

from haystack import component, Document
from memvid_sdk import use
from typing import List

@component
class MemvidRetriever:
    def __init__(self, memory_path: str, top_k: int = 5, mode: str = 'auto'):
        self.memory_path = memory_path
        self.top_k = top_k
        self.mode = mode
        self._mem = None

    def warm_up(self):
        """Initialize the memory connection."""
        self._mem = use('haystack', self.memory_path, read_only=True)

    @component.output_types(documents=List[Document])
    def run(self, query: str) -> dict:
        if self._mem is None:
            self.warm_up()

        results = self._mem.find(query, k=self.top_k, mode=self.mode)

        documents = [
            Document(
                content=r.snippet,
                meta={
                    'frame_id': r.frame_id,
                    'title': r.title,
                    'score': r.score,
                    'uri': r.uri
                }
            )
            for r in results
        ]

        return {"documents": documents}

# Usage
retriever = MemvidRetriever(memory_path='knowledge.mv2', top_k=10)

Hybrid Search Pipeline

from haystack import Pipeline
from haystack.components.joiners import DocumentJoiner
from memvid_sdk import use

mem = use('haystack', 'knowledge.mv2', read_only=True)

# Create retrievers with different modes
lexical_retriever = mem.as_retriever(top_k=10, mode='lex')
semantic_retriever = mem.as_retriever(top_k=10, mode='sem')

# Join results
joiner = DocumentJoiner(join_mode="reciprocal_rank_fusion")

# Build hybrid pipeline
hybrid_pipeline = Pipeline()
hybrid_pipeline.add_component("lexical", lexical_retriever)
hybrid_pipeline.add_component("semantic", semantic_retriever)
hybrid_pipeline.add_component("joiner", joiner)

hybrid_pipeline.connect("lexical.documents", "joiner.documents")
hybrid_pipeline.connect("semantic.documents", "joiner.documents")

# Run
result = hybrid_pipeline.run({
    "lexical": {"query": "authentication methods"},
    "semantic": {"query": "authentication methods"}
})
print(f"Found {len(result['joiner']['documents'])} documents")

Document Store

from memvid_sdk import use

# Use as a document store
mem = use('haystack', 'knowledge.mv2')

# Get document store interface
doc_store = mem.as_document_store()

# Write documents
doc_store.write_documents([
    {"content": "Document 1 content", "meta": {"title": "Doc 1"}},
    {"content": "Document 2 content", "meta": {"title": "Doc 2"}}
])

# Count documents
count = doc_store.count_documents()
print(f"Total documents: {count}")

# Filter documents
filtered = doc_store.filter_documents(
    filters={"field": "meta.title", "operator": "==", "value": "Doc 1"}
)

Best Practices

  1. Use read-only mode for retrieval pipelines
  2. Warm up components before running pipelines
  3. Use hybrid search for best results
  4. Close the memory when done
mem = use('haystack', 'knowledge.mv2', read_only=True)
try:
    pipeline = Pipeline()
    pipeline.add_component("retriever", mem.as_retriever(top_k=10))
    # ... build and run pipeline
finally:
    mem.seal()

Next Steps

Semantic Kernel

Semantic Kernel integration

Python SDK

Full Python SDK documentation