Cache and retrieve structured LLM extractions with CortexDB and Instructor.

Instructor Integration

CortexDB integrates with Instructor to cache structured extraction results and provide few-shot examples from past extractions. Instructor patches LLM clients to return validated Pydantic models — CortexDB persists those models for reuse and semantic retrieval.

Installation

pip install cortexdbai[instructor]

Quick Start

from cortexdb import Cortex
from cortexdb_instructor import CortexDBCache

client = Cortex(base_url="https://api.cortexdb.ai")
cache = CortexDBCache(client=client, tenant_id="my-app")

# After extracting a structured result with Instructor
from pydantic import BaseModel

class UserInfo(BaseModel):
    name: str
    age: int
    email: str

# Cache the extraction result
user = UserInfo(name="Alice", age=30, email="[email protected]")
cache.set("Extract user info from: Alice is 30, [email protected]", user)

# On subsequent calls, check the cache first
cached = cache.get("Extract user info from: Alice is 30, [email protected]", UserInfo)
if cached is not None:
    print(f"Cache hit: {cached.name}")

Caching Structured Outputs

The CortexDBCache stores extraction results keyed by a deterministic hash of the prompt and model class. This means identical extraction requests are served from memory without an LLM call.

import instructor
from openai import OpenAI
from cortexdb import Cortex
from cortexdb_instructor import CortexDBCache

openai_client = instructor.from_openai(OpenAI())
cortex = Cortex(base_url="https://api.cortexdb.ai")
cache = CortexDBCache(client=cortex, tenant_id="extractions")

prompt = "Extract: John Smith, age 42, works at Acme Corp"

# Check cache first
cached = cache.get(prompt, UserInfo)
if cached:
    user = cached
else:
    user = openai_client.chat.completions.create(
        model="gpt-4o",
        response_model=UserInfo,
        messages=[{"role": "user", "content": prompt}],
    )
    cache.set(prompt, user)

Few-Shot Examples from Past Extractions

Use remember_extraction and recall_extraction to build a library of past extractions that can be injected as few-shot examples into new prompts.

from cortexdb import Cortex
from cortexdb_instructor import remember_extraction, recall_extraction

cortex = Cortex(base_url="https://api.cortexdb.ai")

class OrderDetails(BaseModel):
    order_id: str
    total: float
    items: list[str]

# Store extractions as they happen
order = OrderDetails(order_id="ORD-123", total=99.50, items=["Widget", "Gadget"])
remember_extraction(
    cortex,
    prompt="Extract order: Order #ORD-123, total $99.50 for Widget and Gadget",
    result=order,
    tenant_id="my-app",
)

# Later, recall a similar extraction for few-shot prompting
result = recall_extraction(
    cortex,
    query="order extraction examples",
    model_class=OrderDetails,
    tenant_id="my-app",
)

# Use the result to build a few-shot prompt
if result.context:
    few_shot_messages = [{
        "role": "assistant",
        "content": result.context,
    }]

Configuration

| Parameter | Default | Description | |---|---|---| | client | Required | Initialised Cortex client instance | | tenant_id | "default" | Tenant identifier for multi-tenant isolation |

Helper Function Parameters

| Parameter | Default | Description | |---|---|---| | client | Required | Initialised Cortex client instance | | prompt | Required | The prompt text used for extraction | | result | Required | The Pydantic model returned by Instructor | | tenant_id | "default" | Tenant identifier |

Under the Hood

The integration wrapper maps to CortexDB's REST API:

# cache.set(prompt, user) / remember_extraction(...)
curl -X POST https://api.cortexdb.ai/v1/remember \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "content": "Extract: John Smith, age 42 -> {\"name\": \"John Smith\", \"age\": 42, ...}",
    "tenant_id": "my-app"
  }'
# Returns: { "event_id": "019d6359-d3cc-7671-9e4c-9151011fa016" }

# recall_extraction(cortex, query="order extraction", ...)
curl -X POST https://api.cortexdb.ai/v1/recall \
  -H "Authorization: Bearer your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "query": "order extraction",
    "tenant_id": "my-app"
  }'
# Returns: { "context": "...", "confidence": 0.88, "latency_ms": 11 }

Complete Example

import instructor
from openai import OpenAI
from pydantic import BaseModel
from cortexdb import Cortex
from cortexdb_instructor import CortexDBCache, remember_extraction, recall_extraction

# Setup
openai_client = instructor.from_openai(OpenAI())
cortex = Cortex(base_url="https://api.cortexdb.ai")
cache = CortexDBCache(client=cortex, tenant_id="support-tickets")

class TicketClassification(BaseModel):
    category: str
    priority: str
    summary: str

def classify_ticket(text: str) -> TicketClassification:
    """Classify a support ticket with caching and few-shot examples."""

    # 1. Check the cache
    cached = cache.get(text, TicketClassification)
    if cached is not None:
        return cached

    # 2. Recall a similar past classification for few-shot context
    example = recall_extraction(
        cortex,
        query=text,
        model_class=TicketClassification,
        tenant_id="support-tickets",
    )

    messages = [{"role": "system", "content": "Classify the support ticket."}]
    if example.context:
        messages.append({
            "role": "assistant",
            "content": f"Example: {example.context}",
        })
    messages.append({"role": "user", "content": text})

    # 3. Extract with Instructor
    result = openai_client.chat.completions.create(
        model="gpt-4o",
        response_model=TicketClassification,
        messages=messages,
    )

    # 4. Cache and remember the result
    cache.set(text, result)
    remember_extraction(cortex, prompt=text, result=result, tenant_id="support-tickets")

    return result