Cache and retrieve structured LLM extractions with CortexDB and Instructor.
Instructor Integration
CortexDB integrates with Instructor to cache structured extraction results and provide few-shot examples from past extractions. Instructor patches LLM clients to return validated Pydantic models — CortexDB persists those models for reuse and semantic retrieval.
Installation
pip install cortexdb-instructor
Quick Start
from cortexdb import Cortex
from cortexdb_instructor import CortexDBCache
client = Cortex(base_url="http://localhost:3141")
cache = CortexDBCache(client=client, tenant_id="my-app")
# After extracting a structured result with Instructor
from pydantic import BaseModel
class UserInfo(BaseModel):
name: str
age: int
email: str
# Cache the extraction result
user = UserInfo(name="Alice", age=30, email="[email protected]")
cache.set("Extract user info from: Alice is 30, [email protected]", user)
# On subsequent calls, check the cache first
cached = cache.get("Extract user info from: Alice is 30, [email protected]", UserInfo)
if cached is not None:
print(f"Cache hit: {cached.name}")
Caching Structured Outputs
The CortexDBCache stores extraction results keyed by a deterministic hash of the prompt and model class. This means identical extraction requests are served from memory without an LLM call.
import instructor
from openai import OpenAI
from cortexdb import Cortex
from cortexdb_instructor import CortexDBCache
openai_client = instructor.from_openai(OpenAI())
cortex = Cortex(base_url="http://localhost:3141")
cache = CortexDBCache(client=cortex, tenant_id="extractions")
prompt = "Extract: John Smith, age 42, works at Acme Corp"
# Check cache first
cached = cache.get(prompt, UserInfo)
if cached:
user = cached
else:
user = openai_client.chat.completions.create(
model="gpt-4o",
response_model=UserInfo,
messages=[{"role": "user", "content": prompt}],
)
cache.set(prompt, user)
Few-Shot Examples from Past Extractions
Use remember_extraction and recall_extractions to build a library of past extractions that can be injected as few-shot examples into new prompts.
from cortexdb import Cortex
from cortexdb_instructor import remember_extraction, recall_extractions
cortex = Cortex(base_url="http://localhost:3141")
class OrderDetails(BaseModel):
order_id: str
total: float
items: list[str]
# Store extractions as they happen
order = OrderDetails(order_id="ORD-123", total=99.50, items=["Widget", "Gadget"])
remember_extraction(
cortex,
prompt="Extract order: Order #ORD-123, total $99.50 for Widget and Gadget",
result=order,
tenant_id="my-app",
)
# Later, recall similar extractions for few-shot prompting
examples = recall_extractions(
cortex,
query="order extraction examples",
model_class=OrderDetails,
tenant_id="my-app",
top_k=3,
)
# Use examples to build a few-shot prompt
few_shot_messages = []
for ex in examples:
few_shot_messages.append({
"role": "assistant",
"content": ex.model_dump_json(),
})
Configuration
| Parameter | Default | Description |
|---|---|---|
| client | Required | Initialised Cortex client instance |
| tenant_id | "default" | Tenant identifier for multi-tenant isolation |
| namespace | None | Optional namespace to scope cached extractions |
Helper Function Parameters
| Parameter | Default | Description |
|---|---|---|
| client | Required | Initialised Cortex client instance |
| prompt | Required | The prompt text used for extraction |
| result | Required | The Pydantic model returned by Instructor |
| tenant_id | "default" | Tenant identifier |
| namespace | None | Optional namespace scope |
| top_k | 5 | Max results for recall_extractions |
| metadata | None | Optional additional metadata dict |
Complete Example
import instructor
from openai import OpenAI
from pydantic import BaseModel
from cortexdb import Cortex
from cortexdb_instructor import CortexDBCache, remember_extraction, recall_extractions
# Setup
openai_client = instructor.from_openai(OpenAI())
cortex = Cortex(base_url="http://localhost:3141")
cache = CortexDBCache(client=cortex, tenant_id="support-tickets")
class TicketClassification(BaseModel):
category: str
priority: str
summary: str
def classify_ticket(text: str) -> TicketClassification:
"""Classify a support ticket with caching and few-shot examples."""
# 1. Check the cache
cached = cache.get(text, TicketClassification)
if cached is not None:
return cached
# 2. Recall similar past classifications for few-shot context
examples = recall_extractions(
cortex,
query=text,
model_class=TicketClassification,
tenant_id="support-tickets",
top_k=3,
)
messages = [{"role": "system", "content": "Classify the support ticket."}]
for ex in examples:
messages.append({
"role": "assistant",
"content": f"Example: {ex.model_dump_json()}",
})
messages.append({"role": "user", "content": text})
# 3. Extract with Instructor
result = openai_client.chat.completions.create(
model="gpt-4o",
response_model=TicketClassification,
messages=messages,
)
# 4. Cache and remember the result
cache.set(text, result)
remember_extraction(cortex, prompt=text, result=result, tenant_id="support-tickets")
return result