Introduction
Without memory, every agent interaction starts from scratch. The agent forgets what it just did, what the user prefers, and what it learned. Memory systems transform stateless agents into learning, contextual assistants.
In this section, we'll implement practical memory systems for your agent: working memory for current tasks, conversation memory for session context, and semantic memory for long-term knowledge.
Core Insight: Memory is about relevance, not storage. The challenge isn't remembering everything—it's remembering what matters for the current task.
Memory Types for Agents
Different memory types serve different purposes:
| Type | Duration | Content | Use Case |
|---|---|---|---|
| Working | Current task | Intermediate results, current state | Multi-step reasoning |
| Conversation | Current session | Recent exchanges, context | Coherent dialogue |
| Semantic | Long-term | Facts, learned preferences | Personalization |
| Episodic | Long-term | Past experiences, outcomes | Learning from history |
Memory Architecture
1from abc import ABC, abstractmethod
2from dataclasses import dataclass, field
3from typing import Any, Optional
4from datetime import datetime
5
6@dataclass
7class MemoryItem:
8 """A single memory entry."""
9 id: str
10 content: str
11 metadata: dict = field(default_factory=dict)
12 created_at: datetime = field(default_factory=datetime.now)
13 importance: float = 0.5 # 0-1 scale
14 access_count: int = 0
15 last_accessed: datetime = None
16
17 def access(self) -> None:
18 """Record memory access."""
19 self.access_count += 1
20 self.last_accessed = datetime.now()
21
22class MemoryStore(ABC):
23 """Abstract base for memory stores."""
24
25 @abstractmethod
26 async def add(self, item: MemoryItem) -> None:
27 """Add a memory."""
28 pass
29
30 @abstractmethod
31 async def get(self, id: str) -> Optional[MemoryItem]:
32 """Get a specific memory."""
33 pass
34
35 @abstractmethod
36 async def search(self, query: str, limit: int = 5) -> list[MemoryItem]:
37 """Search memories."""
38 pass
39
40 @abstractmethod
41 async def delete(self, id: str) -> bool:
42 """Delete a memory."""
43 passWorking Memory Implementation
Working memory holds information needed for the current task:
1from dataclasses import dataclass, field
2from typing import Any, Optional
3import uuid
4
5@dataclass
6class WorkingMemory:
7 """
8 Working memory for current task execution.
9
10 Holds intermediate results, current goals,
11 and task-relevant context.
12 """
13
14 # Current task
15 task: str = ""
16 task_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
17
18 # Intermediate results
19 results: dict[str, Any] = field(default_factory=dict)
20
21 # Current focus/goals
22 current_goal: str = ""
23 subgoals: list[str] = field(default_factory=list)
24
25 # Scratchpad for reasoning
26 scratchpad: list[str] = field(default_factory=list)
27
28 # Variables and state
29 variables: dict[str, Any] = field(default_factory=dict)
30
31 def set_task(self, task: str) -> None:
32 """Initialize for a new task."""
33 self.task = task
34 self.task_id = str(uuid.uuid4())[:8]
35 self.results = {}
36 self.current_goal = task
37 self.subgoals = []
38 self.scratchpad = []
39
40 def add_result(self, key: str, value: Any) -> None:
41 """Store an intermediate result."""
42 self.results[key] = value
43
44 def get_result(self, key: str) -> Any:
45 """Retrieve a stored result."""
46 return self.results.get(key)
47
48 def add_thought(self, thought: str) -> None:
49 """Add to scratchpad."""
50 self.scratchpad.append(thought)
51
52 def set_variable(self, name: str, value: Any) -> None:
53 """Set a working variable."""
54 self.variables[name] = value
55
56 def get_variable(self, name: str, default: Any = None) -> Any:
57 """Get a working variable."""
58 return self.variables.get(name, default)
59
60 def push_subgoal(self, goal: str) -> None:
61 """Push a subgoal onto the stack."""
62 self.subgoals.append(goal)
63
64 def pop_subgoal(self) -> Optional[str]:
65 """Pop the current subgoal."""
66 return self.subgoals.pop() if self.subgoals else None
67
68 def get_context_string(self) -> str:
69 """Get working memory as context for LLM."""
70 lines = [
71 f"Current Task: {self.task}",
72 f"Current Goal: {self.current_goal}",
73 ]
74
75 if self.subgoals:
76 lines.append(f"Subgoals: {' > '.join(self.subgoals)}")
77
78 if self.results:
79 lines.append("\nIntermediate Results:")
80 for key, value in self.results.items():
81 lines.append(f" {key}: {str(value)[:200]}")
82
83 if self.scratchpad:
84 lines.append("\nRecent Thoughts:")
85 for thought in self.scratchpad[-5:]:
86 lines.append(f" - {thought}")
87
88 return "\n".join(lines)
89
90 def clear(self) -> None:
91 """Clear working memory."""
92 self.task = ""
93 self.results = {}
94 self.current_goal = ""
95 self.subgoals = []
96 self.scratchpad = []
97 self.variables = {}Conversation Memory
Conversation memory maintains dialogue context within a session:
1from dataclasses import dataclass, field
2from typing import Literal, Optional
3from datetime import datetime
4import json
5
6@dataclass
7class Message:
8 """A conversation message."""
9 role: Literal["user", "assistant", "system"]
10 content: str
11 timestamp: datetime = field(default_factory=datetime.now)
12 metadata: dict = field(default_factory=dict)
13
14class ConversationMemory:
15 """
16 Manages conversation history with summarization.
17 """
18
19 def __init__(
20 self,
21 max_messages: int = 50,
22 max_tokens: int = 10000,
23 summarize_threshold: int = 30
24 ):
25 self.messages: list[Message] = []
26 self.max_messages = max_messages
27 self.max_tokens = max_tokens
28 self.summarize_threshold = summarize_threshold
29 self.summary: Optional[str] = None
30 self.client = None # Set to LLM client for summarization
31
32 def add_message(self, role: str, content: str, **metadata) -> None:
33 """Add a message to the conversation."""
34 self.messages.append(Message(
35 role=role,
36 content=content,
37 metadata=metadata
38 ))
39
40 # Check if we need to summarize
41 if len(self.messages) > self.summarize_threshold:
42 self._summarize_old_messages()
43
44 def _summarize_old_messages(self) -> None:
45 """Summarize older messages to save context."""
46 if len(self.messages) <= self.summarize_threshold:
47 return
48
49 # Keep recent messages, summarize older ones
50 old_messages = self.messages[:-10]
51 recent_messages = self.messages[-10:]
52
53 # Create summary (simplified - use LLM in production)
54 old_content = "\n".join(
55 f"{m.role}: {m.content[:100]}"
56 for m in old_messages
57 )
58
59 self.summary = f"Previous conversation summary:\n{old_content[:1000]}"
60 self.messages = recent_messages
61
62 def get_messages_for_llm(self) -> list[dict]:
63 """Get messages formatted for LLM API."""
64 result = []
65
66 # Add summary if exists
67 if self.summary:
68 result.append({
69 "role": "user",
70 "content": f"[Context from earlier conversation]\n{self.summary}"
71 })
72 result.append({
73 "role": "assistant",
74 "content": "I understand the context. Please continue."
75 })
76
77 # Add recent messages
78 for msg in self.messages:
79 result.append({
80 "role": msg.role,
81 "content": msg.content
82 })
83
84 return result
85
86 def search(self, query: str, limit: int = 5) -> list[Message]:
87 """Simple keyword search through messages."""
88 query_lower = query.lower()
89 matching = [
90 msg for msg in self.messages
91 if query_lower in msg.content.lower()
92 ]
93 return matching[:limit]
94
95 def get_last_n(self, n: int) -> list[Message]:
96 """Get last n messages."""
97 return self.messages[-n:]
98
99 def clear(self) -> None:
100 """Clear conversation history."""
101 self.messages = []
102 self.summary = None
103
104 def export(self) -> dict:
105 """Export conversation to dict."""
106 return {
107 "messages": [
108 {
109 "role": m.role,
110 "content": m.content,
111 "timestamp": m.timestamp.isoformat()
112 }
113 for m in self.messages
114 ],
115 "summary": self.summary
116 }
117
118 def import_from(self, data: dict) -> None:
119 """Import conversation from dict."""
120 self.summary = data.get("summary")
121 self.messages = [
122 Message(
123 role=m["role"],
124 content=m["content"],
125 timestamp=datetime.fromisoformat(m["timestamp"])
126 )
127 for m in data.get("messages", [])
128 ]Semantic Memory with Vectors
Semantic memory enables retrieval by meaning, not just keywords:
1import numpy as np
2from typing import Optional
3import uuid
4
5class SemanticMemory:
6 """
7 Long-term semantic memory using embeddings.
8 """
9
10 def __init__(
11 self,
12 embedding_fn: callable = None,
13 similarity_threshold: float = 0.7
14 ):
15 self.memories: dict[str, MemoryItem] = {}
16 self.embeddings: dict[str, np.ndarray] = {}
17 self.embedding_fn = embedding_fn
18 self.similarity_threshold = similarity_threshold
19
20 async def add(
21 self,
22 content: str,
23 importance: float = 0.5,
24 metadata: dict = None
25 ) -> str:
26 """Add a memory with its embedding."""
27 memory_id = str(uuid.uuid4())[:8]
28
29 # Create memory item
30 item = MemoryItem(
31 id=memory_id,
32 content=content,
33 importance=importance,
34 metadata=metadata or {}
35 )
36 self.memories[memory_id] = item
37
38 # Generate embedding
39 if self.embedding_fn:
40 embedding = await self.embedding_fn(content)
41 self.embeddings[memory_id] = np.array(embedding)
42
43 return memory_id
44
45 async def search(
46 self,
47 query: str,
48 limit: int = 5,
49 min_importance: float = 0.0
50 ) -> list[MemoryItem]:
51 """Search memories by semantic similarity."""
52 if not self.embedding_fn or not self.embeddings:
53 return []
54
55 # Get query embedding
56 query_embedding = np.array(await self.embedding_fn(query))
57
58 # Calculate similarities
59 similarities = []
60 for memory_id, embedding in self.embeddings.items():
61 item = self.memories[memory_id]
62
63 # Skip low importance
64 if item.importance < min_importance:
65 continue
66
67 # Cosine similarity
68 similarity = np.dot(query_embedding, embedding) / (
69 np.linalg.norm(query_embedding) * np.linalg.norm(embedding)
70 )
71
72 if similarity >= self.similarity_threshold:
73 similarities.append((memory_id, similarity))
74
75 # Sort by similarity
76 similarities.sort(key=lambda x: x[1], reverse=True)
77
78 # Return top matches
79 results = []
80 for memory_id, sim in similarities[:limit]:
81 item = self.memories[memory_id]
82 item.access()
83 item.metadata["last_similarity"] = float(sim)
84 results.append(item)
85
86 return results
87
88 async def update_importance(
89 self,
90 memory_id: str,
91 importance: float
92 ) -> bool:
93 """Update memory importance."""
94 if memory_id not in self.memories:
95 return False
96 self.memories[memory_id].importance = importance
97 return True
98
99 async def forget(
100 self,
101 memory_id: str
102 ) -> bool:
103 """Remove a memory."""
104 if memory_id not in self.memories:
105 return False
106 del self.memories[memory_id]
107 if memory_id in self.embeddings:
108 del self.embeddings[memory_id]
109 return True
110
111 async def consolidate(self) -> int:
112 """Remove low-importance, rarely accessed memories."""
113 to_remove = []
114
115 for memory_id, item in self.memories.items():
116 # Remove if low importance and rarely accessed
117 if item.importance < 0.3 and item.access_count < 2:
118 to_remove.append(memory_id)
119
120 for memory_id in to_remove:
121 await self.forget(memory_id)
122
123 return len(to_remove)Embedding Function Example
1from anthropic import Anthropic
2import voyageai
3
4class EmbeddingService:
5 """Generate embeddings for semantic memory."""
6
7 def __init__(self, provider: str = "voyage"):
8 self.provider = provider
9 if provider == "voyage":
10 self.client = voyageai.Client()
11
12 async def embed(self, text: str) -> list[float]:
13 """Generate embedding for text."""
14 if self.provider == "voyage":
15 result = self.client.embed(
16 [text],
17 model="voyage-2"
18 )
19 return result.embeddings[0]
20
21 raise ValueError(f"Unknown provider: {self.provider}")
22
23 async def embed_batch(self, texts: list[str]) -> list[list[float]]:
24 """Generate embeddings for multiple texts."""
25 if self.provider == "voyage":
26 result = self.client.embed(
27 texts,
28 model="voyage-2"
29 )
30 return result.embeddings
31
32 raise ValueError(f"Unknown provider: {self.provider}")Integrating Memory into Agents
Let's bring all memory types together:
1from dataclasses import dataclass, field
2
3@dataclass
4class AgentMemorySystem:
5 """
6 Unified memory system for agents.
7 """
8
9 working: WorkingMemory = field(default_factory=WorkingMemory)
10 conversation: ConversationMemory = field(default_factory=ConversationMemory)
11 semantic: SemanticMemory = None
12
13 def __post_init__(self):
14 if self.semantic is None:
15 self.semantic = SemanticMemory()
16
17 async def get_relevant_context(
18 self,
19 query: str,
20 max_semantic_results: int = 3
21 ) -> str:
22 """Get all relevant context for current query."""
23 context_parts = []
24
25 # Working memory context
26 if self.working.task:
27 context_parts.append(
28 "## Current Task Context\n" +
29 self.working.get_context_string()
30 )
31
32 # Semantic memory search
33 if self.semantic:
34 memories = await self.semantic.search(query, limit=max_semantic_results)
35 if memories:
36 context_parts.append(
37 "## Relevant Knowledge\n" +
38 "\n".join(f"- {m.content}" for m in memories)
39 )
40
41 return "\n\n".join(context_parts)
42
43 async def learn(
44 self,
45 content: str,
46 importance: float = 0.5,
47 source: str = "conversation"
48 ) -> None:
49 """Store new information in semantic memory."""
50 await self.semantic.add(
51 content=content,
52 importance=importance,
53 metadata={"source": source}
54 )
55
56 def start_task(self, task: str) -> None:
57 """Initialize memory for a new task."""
58 self.working.set_task(task)
59 self.conversation.add_message("user", task)
60
61 def record_step(
62 self,
63 thought: str,
64 action: str,
65 result: str
66 ) -> None:
67 """Record a task step."""
68 self.working.add_thought(thought)
69 self.working.add_result(action, result)
70
71 async def end_task(self, success: bool, outcome: str) -> None:
72 """Finalize task and consolidate memories."""
73 # Store important outcomes in semantic memory
74 if success:
75 await self.learn(
76 f"Successfully completed: {self.working.task}. "
77 f"Outcome: {outcome}",
78 importance=0.7
79 )
80
81 self.working.clear()
82
83
84class AgentWithMemory:
85 """Agent with integrated memory system."""
86
87 def __init__(self, tools: list, model: str = "claude-sonnet-4-20250514"):
88 self.client = Anthropic()
89 self.model = model
90 self.tools = tools
91 self.memory = AgentMemorySystem()
92
93 async def run(self, task: str) -> str:
94 """Run agent with memory-augmented context."""
95
96 # Initialize memory for task
97 self.memory.start_task(task)
98
99 # Get relevant context from memory
100 context = await self.memory.get_relevant_context(task)
101
102 # Build system prompt with memory context
103 system = f"""You are a helpful assistant with access to memory.
104
105{context}
106
107Use the information above to help with the current task.
108"""
109
110 # Main agent loop
111 messages = self.memory.conversation.get_messages_for_llm()
112
113 while True:
114 response = self.client.messages.create(
115 model=self.model,
116 max_tokens=4096,
117 system=system,
118 messages=messages,
119 tools=[t.to_api_schema() for t in self.tools]
120 )
121
122 # Process response...
123 # (Agent loop implementation from previous section)
124
125 # Record steps in memory
126 # self.memory.record_step(thought, action, result)
127
128 if response.stop_reason == "end_turn":
129 break
130
131 # End task
132 final_answer = response.content[0].text
133 await self.memory.end_task(True, final_answer)
134
135 return final_answerMemory-Aware Prompting
1def create_memory_aware_prompt(
2 memory: AgentMemorySystem,
3 task: str,
4 include_history: bool = True
5) -> str:
6 """Create prompt that leverages memory."""
7
8 prompt_parts = [f"Current task: {task}"]
9
10 # Add working memory
11 if memory.working.results:
12 prompt_parts.append(
13 "\nWhat you've already done:\n" +
14 "\n".join(
15 f"- {k}: {v}"
16 for k, v in memory.working.results.items()
17 )
18 )
19
20 # Add relevant past knowledge
21 if memory.working.task:
22 prompt_parts.append(
23 f"\nCurrent goal: {memory.working.current_goal}"
24 )
25
26 return "\n".join(prompt_parts)Summary
Memory transforms stateless agents into contextual, learning systems. We covered:
- Memory types: Working, conversation, and semantic memory for different purposes
- Working memory: Task state, intermediate results, and scratchpad
- Conversation memory: Session context with summarization
- Semantic memory: Long-term storage with vector similarity search
- Integration: Unified memory system for context-aware agents
In the next section, we'll tackle error handling and recovery—making agents resilient when things go wrong.