Skip to main content
What you’ll build: A production-ready knowledge base that lets users search company docs, wikis, and FAQs using natural language.Time: 25 minutes | Difficulty: Intermediate

Overview

Build a searchable knowledge base that:
  • 📚 Ingests documents from multiple sources
  • 🔍 Supports natural language search
  • 💬 Answers questions with AI
  • 🔐 Respects access controls
  • 📊 Tracks popular queries

Architecture


Quick Start

import { use } from '@memvid/sdk';

// Create knowledge base
const kb = await use('basic', 'company-kb.mv2', { mode: 'create' });

// Ingest documentation
await kb.put({
  title: 'Employee Handbook',
  label: 'hr',
  file: './docs/handbook.pdf',
  tags: ['hr', 'policies', 'onboarding'],
});

// Search
const results = await kb.find('vacation policy', { k: 5 });

// Q&A
const answer = await kb.ask('How many vacation days do employees get?');
console.log(answer.answer);

Full Implementation

Knowledge Base Class

from memvid_sdk import use
from pathlib import Path
from typing import List, Dict, Optional
from datetime import datetime
import json

class KnowledgeBase:
    """Company knowledge base with Memvid."""

    def __init__(self, memory_path: str = "knowledge.mv2"):
        self.mem = use('basic', memory_path, mode='auto')
        self.sources = {}

    def add_document(
        self,
        title: str,
        content: str,
        category: str,
        tags: Optional[List[str]] = None,
        source: Optional[str] = None
    ) -> int:
        """Add a document to the knowledge base."""
        frame_id = self.mem.put({
            "title": title,
            "label": category,
            "text": content,
            "tags": tags or [],
            "metadata": {
                "source": source,
                "added_at": datetime.now().isoformat()
            }
        })
        return frame_id

    def add_file(
        self,
        filepath: str,
        category: str,
        tags: Optional[List[str]] = None
    ) -> int:
        """Add a file to the knowledge base."""
        path = Path(filepath)
        return self.mem.put({
            "title": path.name,
            "label": category,
            "file": str(path.absolute()),
            "tags": tags or [],
            "metadata": {
                "source": "file",
                "original_path": str(path)
            }
        })

    def add_folder(self, folder_path: str, category: str) -> int:
        """Add all documents from a folder."""
        count = 0
        for path in Path(folder_path).rglob("*"):
            if path.is_file() and path.suffix in ['.pdf', '.md', '.txt', '.docx']:
                self.add_file(str(path), category)
                count += 1
        return count

    def search(self, query: str, category: Optional[str] = None, k: int = 10) -> List[Dict]:
        """Search the knowledge base."""
        scope = f"label:{category}" if category else None
        results = self.mem.find(query, k=k, scope=scope)
        return [
            {
                "title": hit.title,
                "snippet": hit.snippet,
                "score": hit.score,
                "category": hit.label
            }
            for hit in results.hits
        ]

    def ask(self, question: str, category: Optional[str] = None) -> Dict:
        """Ask a question."""
        scope = f"label:{category}" if category else None
        answer = self.mem.ask(question, k=5, scope=scope)
        return {
            "answer": answer.get("answer"),
            "sources": [s.get("title") for s in (answer.get("sources") or [])],
            "confidence": getattr(answer, 'confidence', None)
        }

    def get_categories(self) -> List[str]:
        """Get all categories."""
        stats = self.mem.stats()
        # This is a simplified version
        return list(set(entry.label for entry in self.mem.timeline(limit=1000).entries))

Web API with FastAPI

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, List

app = FastAPI(title="Knowledge Base API")
kb = KnowledgeBase("company-kb.mv2")


class SearchRequest(BaseModel):
    query: str
    category: Optional[str] = None
    limit: int = 10


class AskRequest(BaseModel):
    question: str
    category: Optional[str] = None


class AddDocumentRequest(BaseModel):
    title: str
    content: str
    category: str
    tags: Optional[List[str]] = None


@app.post("/search")
async def search(request: SearchRequest):
    results = kb.search(request.query, request.category, request.limit)
    return {"results": results, "total": len(results)}


@app.post("/ask")
async def ask(request: AskRequest):
    return kb.ask(request.question, request.category)


@app.post("/documents")
async def add_document(request: AddDocumentRequest):
    frame_id = kb.add_document(
        request.title,
        request.content,
        request.category,
        request.tags
    )
    return {"frame_id": frame_id, "status": "added"}


@app.get("/categories")
async def get_categories():
    return {"categories": kb.get_categories()}

React Frontend Component

import { useState } from 'react';

export function KnowledgeSearch() {
  const [query, setQuery] = useState('');
  const [results, setResults] = useState([]);
  const [answer, setAnswer] = useState('');

  const handleSearch = async () => {
    const res = await fetch('/api/search', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ query, limit: 10 }),
    });
    const data = await res.json();
    setResults(data.results);
  };

  const handleAsk = async () => {
    const res = await fetch('/api/ask', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ question: query }),
    });
    const data = await res.json();
    setAnswer(data.answer);
  };

  return (
    <div className="knowledge-search">
      <input
        type="text"
        value={query}
        onChange={(e) => setQuery(e.target.value)}
        placeholder="Search or ask a question..."
      />
      <button onClick={handleSearch}>Search</button>
      <button onClick={handleAsk}>Ask AI</button>

      {answer && (
        <div className="answer-box">
          <h3>Answer</h3>
          <p>{answer}</p>
        </div>
      )}

      <div className="results">
        {results.map((r, i) => (
          <div key={i} className="result-card">
            <h4>{r.title}</h4>
            <p>{r.snippet}</p>
            <span className="category">{r.category}</span>
          </div>
        ))}
      </div>
    </div>
  );
}

Integrations

Sync from Notion

from notion_client import Client

notion = Client(auth=os.environ["NOTION_TOKEN"])

def sync_notion_pages(database_id: str, kb: KnowledgeBase):
    """Sync pages from a Notion database."""
    pages = notion.databases.query(database_id=database_id)

    for page in pages["results"]:
        # Extract content
        title = page["properties"]["Name"]["title"][0]["plain_text"]
        blocks = notion.blocks.children.list(page["id"])
        content = extract_text_from_blocks(blocks)

        kb.add_document(
            title=title,
            content=content,
            category="notion",
            tags=["notion", "synced"]
        )

Sync from Google Drive

from googleapiclient.discovery import build

def sync_google_drive(folder_id: str, kb: KnowledgeBase, creds):
    """Sync documents from Google Drive."""
    service = build('drive', 'v3', credentials=creds)

    results = service.files().list(
        q=f"'{folder_id}' in parents",
        fields="files(id, name, mimeType)"
    ).execute()

    for file in results.get('files', []):
        # Download and add to KB
        content = download_file(service, file['id'])
        kb.add_document(
            title=file['name'],
            content=content,
            category="google-drive"
        )

Deployment

Docker Compose

version: '3.8'
services:
  api:
    build: .
    ports:
      - "8000:8000"
    volumes:
      - ./data:/app/data
    environment:
      - MEMVID_FILE=/app/data/knowledge.mv2

  web:
    build: ./frontend
    ports:
      - "3000:3000"
    depends_on:
      - api

Next Steps