Cache and retrieve structured LLM extractions with CortexDB and Instructor.

Instructor Integration

CortexDB integrates with Instructor to cache structured extraction results and provide few-shot examples from past extractions. Instructor patches LLM clients to return validated Pydantic models — CortexDB persists those models for reuse and semantic retrieval.

Installation

pip install cortexdb-instructor

Quick Start

from cortexdb import Cortex
from cortexdb_instructor import CortexDBCache

client = Cortex(base_url="http://localhost:3141")
cache = CortexDBCache(client=client, tenant_id="my-app")

# After extracting a structured result with Instructor
from pydantic import BaseModel

class UserInfo(BaseModel):
    name: str
    age: int
    email: str

# Cache the extraction result
user = UserInfo(name="Alice", age=30, email="[email protected]")
cache.set("Extract user info from: Alice is 30, [email protected]", user)

# On subsequent calls, check the cache first
cached = cache.get("Extract user info from: Alice is 30, [email protected]", UserInfo)
if cached is not None:
    print(f"Cache hit: {cached.name}")

Caching Structured Outputs

The CortexDBCache stores extraction results keyed by a deterministic hash of the prompt and model class. This means identical extraction requests are served from memory without an LLM call.

import instructor
from openai import OpenAI
from cortexdb import Cortex
from cortexdb_instructor import CortexDBCache

openai_client = instructor.from_openai(OpenAI())
cortex = Cortex(base_url="http://localhost:3141")
cache = CortexDBCache(client=cortex, tenant_id="extractions")

prompt = "Extract: John Smith, age 42, works at Acme Corp"

# Check cache first
cached = cache.get(prompt, UserInfo)
if cached:
    user = cached
else:
    user = openai_client.chat.completions.create(
        model="gpt-4o",
        response_model=UserInfo,
        messages=[{"role": "user", "content": prompt}],
    )
    cache.set(prompt, user)

Few-Shot Examples from Past Extractions

Use remember_extraction and recall_extractions to build a library of past extractions that can be injected as few-shot examples into new prompts.

from cortexdb import Cortex
from cortexdb_instructor import remember_extraction, recall_extractions

cortex = Cortex(base_url="http://localhost:3141")

class OrderDetails(BaseModel):
    order_id: str
    total: float
    items: list[str]

# Store extractions as they happen
order = OrderDetails(order_id="ORD-123", total=99.50, items=["Widget", "Gadget"])
remember_extraction(
    cortex,
    prompt="Extract order: Order #ORD-123, total $99.50 for Widget and Gadget",
    result=order,
    tenant_id="my-app",
)

# Later, recall similar extractions for few-shot prompting
examples = recall_extractions(
    cortex,
    query="order extraction examples",
    model_class=OrderDetails,
    tenant_id="my-app",
    top_k=3,
)

# Use examples to build a few-shot prompt
few_shot_messages = []
for ex in examples:
    few_shot_messages.append({
        "role": "assistant",
        "content": ex.model_dump_json(),
    })

Configuration

| Parameter | Default | Description | |---|---|---| | client | Required | Initialised Cortex client instance | | tenant_id | "default" | Tenant identifier for multi-tenant isolation | | namespace | None | Optional namespace to scope cached extractions |

Helper Function Parameters

| Parameter | Default | Description | |---|---|---| | client | Required | Initialised Cortex client instance | | prompt | Required | The prompt text used for extraction | | result | Required | The Pydantic model returned by Instructor | | tenant_id | "default" | Tenant identifier | | namespace | None | Optional namespace scope | | top_k | 5 | Max results for recall_extractions | | metadata | None | Optional additional metadata dict |

Complete Example

import instructor
from openai import OpenAI
from pydantic import BaseModel
from cortexdb import Cortex
from cortexdb_instructor import CortexDBCache, remember_extraction, recall_extractions

# Setup
openai_client = instructor.from_openai(OpenAI())
cortex = Cortex(base_url="http://localhost:3141")
cache = CortexDBCache(client=cortex, tenant_id="support-tickets")

class TicketClassification(BaseModel):
    category: str
    priority: str
    summary: str

def classify_ticket(text: str) -> TicketClassification:
    """Classify a support ticket with caching and few-shot examples."""

    # 1. Check the cache
    cached = cache.get(text, TicketClassification)
    if cached is not None:
        return cached

    # 2. Recall similar past classifications for few-shot context
    examples = recall_extractions(
        cortex,
        query=text,
        model_class=TicketClassification,
        tenant_id="support-tickets",
        top_k=3,
    )

    messages = [{"role": "system", "content": "Classify the support ticket."}]
    for ex in examples:
        messages.append({
            "role": "assistant",
            "content": f"Example: {ex.model_dump_json()}",
        })
    messages.append({"role": "user", "content": text})

    # 3. Extract with Instructor
    result = openai_client.chat.completions.create(
        model="gpt-4o",
        response_model=TicketClassification,
        messages=messages,
    )

    # 4. Cache and remember the result
    cache.set(text, result)
    remember_extraction(cortex, prompt=text, result=result, tenant_id="support-tickets")

    return result