What you’ll build: A production-ready knowledge base that lets users search company docs, wikis, and FAQs using natural language.Time: 25 minutes | Difficulty: Intermediate
Overview
Build a searchable knowledge base that:- 📚 Ingests documents from multiple sources
- 🔍 Supports natural language search
- 💬 Answers questions with AI
- 🔐 Respects access controls
- 📊 Tracks popular queries
Architecture
Quick Start
- Node.js
- Python
Copy
import { use } from '@memvid/sdk';
// Create knowledge base
const kb = await use('basic', 'company-kb.mv2', { mode: 'create' });
// Ingest documentation
await kb.put({
title: 'Employee Handbook',
label: 'hr',
file: './docs/handbook.pdf',
tags: ['hr', 'policies', 'onboarding'],
});
// Search
const results = await kb.find('vacation policy', { k: 5 });
// Q&A
const answer = await kb.ask('How many vacation days do employees get?');
console.log(answer.answer);
Copy
from memvid_sdk import use
import os
# Create knowledge base
kb = use('basic', 'company-kb.mv2', mode='create')
# Ingest documentation
kb.put({
"title": "Employee Handbook",
"label": "hr",
"file": "./docs/handbook.pdf",
"tags": ["hr", "policies", "onboarding"]
})
kb.put({
"title": "API Documentation",
"label": "engineering",
"file": "./docs/api-guide.md",
"tags": ["api", "development", "integration"]
})
# Search
results = kb.find("vacation policy", k=5)
# Q&A
answer = kb.ask("How many vacation days do employees get?")
print(answer["answer"])
Full Implementation
Knowledge Base Class
Copy
from memvid_sdk import use
from pathlib import Path
from typing import List, Dict, Optional
from datetime import datetime
import json
class KnowledgeBase:
"""Company knowledge base with Memvid."""
def __init__(self, memory_path: str = "knowledge.mv2"):
self.mem = use('basic', memory_path, mode='auto')
self.sources = {}
def add_document(
self,
title: str,
content: str,
category: str,
tags: Optional[List[str]] = None,
source: Optional[str] = None
) -> int:
"""Add a document to the knowledge base."""
frame_id = self.mem.put({
"title": title,
"label": category,
"text": content,
"tags": tags or [],
"metadata": {
"source": source,
"added_at": datetime.now().isoformat()
}
})
return frame_id
def add_file(
self,
filepath: str,
category: str,
tags: Optional[List[str]] = None
) -> int:
"""Add a file to the knowledge base."""
path = Path(filepath)
return self.mem.put({
"title": path.name,
"label": category,
"file": str(path.absolute()),
"tags": tags or [],
"metadata": {
"source": "file",
"original_path": str(path)
}
})
def add_folder(self, folder_path: str, category: str) -> int:
"""Add all documents from a folder."""
count = 0
for path in Path(folder_path).rglob("*"):
if path.is_file() and path.suffix in ['.pdf', '.md', '.txt', '.docx']:
self.add_file(str(path), category)
count += 1
return count
def search(self, query: str, category: Optional[str] = None, k: int = 10) -> List[Dict]:
"""Search the knowledge base."""
scope = f"label:{category}" if category else None
results = self.mem.find(query, k=k, scope=scope)
return [
{
"title": hit.title,
"snippet": hit.snippet,
"score": hit.score,
"category": hit.label
}
for hit in results.hits
]
def ask(self, question: str, category: Optional[str] = None) -> Dict:
"""Ask a question."""
scope = f"label:{category}" if category else None
answer = self.mem.ask(question, k=5, scope=scope)
return {
"answer": answer.get("answer"),
"sources": [s.get("title") for s in (answer.get("sources") or [])],
"confidence": getattr(answer, 'confidence', None)
}
def get_categories(self) -> List[str]:
"""Get all categories."""
stats = self.mem.stats()
# This is a simplified version
return list(set(entry.label for entry in self.mem.timeline(limit=1000).entries))
Web API with FastAPI
Copy
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, List
app = FastAPI(title="Knowledge Base API")
kb = KnowledgeBase("company-kb.mv2")
class SearchRequest(BaseModel):
query: str
category: Optional[str] = None
limit: int = 10
class AskRequest(BaseModel):
question: str
category: Optional[str] = None
class AddDocumentRequest(BaseModel):
title: str
content: str
category: str
tags: Optional[List[str]] = None
@app.post("/search")
async def search(request: SearchRequest):
results = kb.search(request.query, request.category, request.limit)
return {"results": results, "total": len(results)}
@app.post("/ask")
async def ask(request: AskRequest):
return kb.ask(request.question, request.category)
@app.post("/documents")
async def add_document(request: AddDocumentRequest):
frame_id = kb.add_document(
request.title,
request.content,
request.category,
request.tags
)
return {"frame_id": frame_id, "status": "added"}
@app.get("/categories")
async def get_categories():
return {"categories": kb.get_categories()}
React Frontend Component
Copy
import { useState } from 'react';
export function KnowledgeSearch() {
const [query, setQuery] = useState('');
const [results, setResults] = useState([]);
const [answer, setAnswer] = useState('');
const handleSearch = async () => {
const res = await fetch('/api/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query, limit: 10 }),
});
const data = await res.json();
setResults(data.results);
};
const handleAsk = async () => {
const res = await fetch('/api/ask', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ question: query }),
});
const data = await res.json();
setAnswer(data.answer);
};
return (
<div className="knowledge-search">
<input
type="text"
value={query}
onChange={(e) => setQuery(e.target.value)}
placeholder="Search or ask a question..."
/>
<button onClick={handleSearch}>Search</button>
<button onClick={handleAsk}>Ask AI</button>
{answer && (
<div className="answer-box">
<h3>Answer</h3>
<p>{answer}</p>
</div>
)}
<div className="results">
{results.map((r, i) => (
<div key={i} className="result-card">
<h4>{r.title}</h4>
<p>{r.snippet}</p>
<span className="category">{r.category}</span>
</div>
))}
</div>
</div>
);
}
Integrations
Sync from Notion
Copy
from notion_client import Client
notion = Client(auth=os.environ["NOTION_TOKEN"])
def sync_notion_pages(database_id: str, kb: KnowledgeBase):
"""Sync pages from a Notion database."""
pages = notion.databases.query(database_id=database_id)
for page in pages["results"]:
# Extract content
title = page["properties"]["Name"]["title"][0]["plain_text"]
blocks = notion.blocks.children.list(page["id"])
content = extract_text_from_blocks(blocks)
kb.add_document(
title=title,
content=content,
category="notion",
tags=["notion", "synced"]
)
Sync from Google Drive
Copy
from googleapiclient.discovery import build
def sync_google_drive(folder_id: str, kb: KnowledgeBase, creds):
"""Sync documents from Google Drive."""
service = build('drive', 'v3', credentials=creds)
results = service.files().list(
q=f"'{folder_id}' in parents",
fields="files(id, name, mimeType)"
).execute()
for file in results.get('files', []):
# Download and add to KB
content = download_file(service, file['id'])
kb.add_document(
title=file['name'],
content=content,
category="google-drive"
)
Deployment
Docker Compose
Copy
version: '3.8'
services:
api:
build: .
ports:
- "8000:8000"
volumes:
- ./data:/app/data
environment:
- MEMVID_FILE=/app/data/knowledge.mv2
web:
build: ./frontend
ports:
- "3000:3000"
depends_on:
- api