Introduction
LLMs are stateless - each API call is independent. Agents need memory to maintain context across interactions, learn from experience, and build on previous work. This section explores how to implement effective memory systems.
Memory is Identity: An agent without memory is like meeting someone for the first time at every interaction. Memory allows agents to build relationships, learn patterns, and improve over time.
Types of Memory
| Type | Duration | Purpose | Implementation |
|---|---|---|---|
| Working | Single action | Current context | Function parameters |
| Short-term | Current session | Recent history | In-memory list |
| Long-term | Persistent | Learned knowledge | Vector database |
| Episodic | Per task | Task experiences | Structured storage |
| Semantic | Persistent | Facts and concepts | Knowledge graph |
Working Memory
The immediate context for the current action:
🐍working_memory.py
1@dataclass
2class WorkingMemory:
3 """Immediate context for decision-making."""
4 goal: str
5 current_step: str
6 recent_observation: str
7 available_tools: list[dict]
8
9 def to_context(self) -> str:
10 return f"""
11Goal: {self.goal}
12Current Step: {self.current_step}
13Last Result: {self.recent_observation}
14Available Tools: {[t['name'] for t in self.available_tools]}
15"""Short-Term Memory
Recent actions and their results within the current session:
🐍short_term_memory.py
1@dataclass
2class ActionRecord:
3 """Record of a single action."""
4 action: str
5 params: dict
6 result: str
7 timestamp: datetime
8 success: bool
9
10
11class ShortTermMemory:
12 """Maintains recent action history."""
13
14 def __init__(self, max_entries: int = 100):
15 self.entries: list[ActionRecord] = []
16 self.max_entries = max_entries
17
18 def add(self, action: str, params: dict, result: str, success: bool) -> None:
19 entry = ActionRecord(
20 action=action,
21 params=params,
22 result=result,
23 timestamp=datetime.now(),
24 success=success,
25 )
26 self.entries.append(entry)
27
28 # Trim if needed
29 if len(self.entries) > self.max_entries:
30 self.entries = self.entries[-self.max_entries:]
31
32 def get_recent(self, n: int = 10) -> list[ActionRecord]:
33 """Get the n most recent entries."""
34 return self.entries[-n:]
35
36 def get_failures(self) -> list[ActionRecord]:
37 """Get all failed actions (for learning)."""
38 return [e for e in self.entries if not e.success]
39
40 def summarize(self) -> str:
41 """Create a summary of recent actions."""
42 recent = self.get_recent(5)
43 lines = []
44 for entry in recent:
45 status = "✓" if entry.success else "✗"
46 lines.append(f"{status} {entry.action}: {entry.result[:100]}")
47 return "\n".join(lines)Long-Term Memory
Persistent knowledge stored across sessions, typically using vector databases:
🐍long_term_memory.py
1class LongTermMemory:
2 """Persistent memory using vector similarity search."""
3
4 def __init__(self, vector_store, embedding_model):
5 self.store = vector_store
6 self.embedder = embedding_model
7
8 def add(self, content: str, metadata: dict | None = None) -> None:
9 """Store a memory with optional metadata."""
10 embedding = self.embedder.embed(content)
11 self.store.add(
12 id=str(uuid4()),
13 embedding=embedding,
14 content=content,
15 metadata=metadata or {},
16 )
17
18 def recall(self, query: str, k: int = 5) -> list[dict]:
19 """Retrieve relevant memories."""
20 query_embedding = self.embedder.embed(query)
21 results = self.store.search(query_embedding, k=k)
22 return [
23 {
24 "content": r.content,
25 "score": r.score,
26 "metadata": r.metadata,
27 }
28 for r in results
29 ]
30
31 def forget(self, filter_fn) -> int:
32 """Remove memories matching a filter."""
33 # Implementation depends on vector store
34 passState Management
Agent state tracks everything needed to continue execution:
🐍agent_state.py
1@dataclass
2class AgentState:
3 """Complete state of an agent execution."""
4
5 # Goal and planning
6 goal: str
7 plan: Plan | None = None
8 current_step_index: int = 0
9
10 # Progress tracking
11 started_at: datetime = field(default_factory=datetime.now)
12 completed: bool = False
13 final_result: str | None = None
14
15 # History
16 actions: list[ActionRecord] = field(default_factory=list)
17 observations: list[str] = field(default_factory=list)
18
19 # Error tracking
20 errors: list[str] = field(default_factory=list)
21 consecutive_failures: int = 0
22
23 # Metrics
24 total_tokens: int = 0
25 total_cost: float = 0.0
26
27 def add_action(self, action: ActionRecord) -> None:
28 """Record an action."""
29 self.actions.append(action)
30 if not action.success:
31 self.consecutive_failures += 1
32 self.errors.append(action.result)
33 else:
34 self.consecutive_failures = 0
35
36 def progress_summary(self) -> str:
37 """Human-readable progress summary."""
38 if self.plan:
39 completed = sum(1 for s in self.plan.steps if s.status == "completed")
40 total = len(self.plan.steps)
41 return f"{completed}/{total} steps completed"
42 return f"{len(self.actions)} actions taken"
43
44 def should_replan(self) -> bool:
45 """Determine if replanning is needed."""
46 return self.consecutive_failures >= 3Checkpointing
For long-running agents, save state periodically:
🐍checkpointing.py
1import json
2from pathlib import Path
3
4
5class StateCheckpointer:
6 """Save and restore agent state."""
7
8 def __init__(self, checkpoint_dir: str = ".agent_checkpoints"):
9 self.dir = Path(checkpoint_dir)
10 self.dir.mkdir(exist_ok=True)
11
12 def save(self, state: AgentState, name: str = "latest") -> Path:
13 """Save state to disk."""
14 path = self.dir / f"{name}.json"
15
16 # Serialize state
17 data = {
18 "goal": state.goal,
19 "plan": self._serialize_plan(state.plan),
20 "actions": [self._serialize_action(a) for a in state.actions],
21 "completed": state.completed,
22 "current_step_index": state.current_step_index,
23 "saved_at": datetime.now().isoformat(),
24 }
25
26 path.write_text(json.dumps(data, indent=2))
27 return path
28
29 def load(self, name: str = "latest") -> AgentState | None:
30 """Load state from disk."""
31 path = self.dir / f"{name}.json"
32 if not path.exists():
33 return None
34
35 data = json.loads(path.read_text())
36 return self._deserialize_state(data)
37
38 def list_checkpoints(self) -> list[str]:
39 """List available checkpoints."""
40 return [p.stem for p in self.dir.glob("*.json")]Short-Term Memory
Efficient short-term memory with summarization for long sessions:
🐍efficient_short_term.py
1class EfficientShortTermMemory:
2 """Short-term memory with automatic summarization."""
3
4 def __init__(
5 self,
6 llm,
7 max_detailed: int = 20,
8 max_summarized: int = 100,
9 ):
10 self.llm = llm
11 self.detailed: list[ActionRecord] = []
12 self.summarized: list[str] = []
13 self.max_detailed = max_detailed
14 self.max_summarized = max_summarized
15
16 def add(self, record: ActionRecord) -> None:
17 """Add a new action record."""
18 self.detailed.append(record)
19
20 # Summarize older entries if needed
21 if len(self.detailed) > self.max_detailed:
22 self._compress()
23
24 def _compress(self) -> None:
25 """Compress older detailed entries into summary."""
26 # Take oldest 10 entries
27 to_compress = self.detailed[:10]
28 self.detailed = self.detailed[10:]
29
30 # Create summary
31 summary = self._summarize(to_compress)
32 self.summarized.append(summary)
33
34 # Trim summarized if needed
35 if len(self.summarized) > self.max_summarized:
36 # Combine old summaries
37 self._consolidate_summaries()
38
39 def _summarize(self, records: list[ActionRecord]) -> str:
40 """Summarize a batch of records."""
41 prompt = f"""
42Summarize these agent actions concisely (2-3 sentences):
43
44{self._format_records(records)}
45
46Focus on: what was done, key results, and any errors.
47"""
48 response = self.llm.generate(prompt)
49 return response.text
50
51 def get_context(self) -> str:
52 """Get full context for LLM."""
53 parts = []
54
55 if self.summarized:
56 parts.append("## Previous Context (Summarized)")
57 for summary in self.summarized[-3:]: # Last 3 summaries
58 parts.append(summary)
59
60 if self.detailed:
61 parts.append("## Recent Actions (Detailed)")
62 for record in self.detailed[-10:]:
63 parts.append(self._format_record(record))
64
65 return "\n\n".join(parts)Long-Term Memory
Implementing long-term memory with ChromaDB:
🐍chromadb_memory.py
1import chromadb
2from chromadb.utils import embedding_functions
3
4
5class ChromaLongTermMemory:
6 """Long-term memory backed by ChromaDB."""
7
8 def __init__(
9 self,
10 collection_name: str = "agent_memory",
11 persist_dir: str = ".memory",
12 ):
13 # Initialize ChromaDB
14 self.client = chromadb.PersistentClient(path=persist_dir)
15
16 # Use OpenAI embeddings (or any other)
17 self.embedding_fn = embedding_functions.OpenAIEmbeddingFunction(
18 model_name="text-embedding-3-small"
19 )
20
21 # Get or create collection
22 self.collection = self.client.get_or_create_collection(
23 name=collection_name,
24 embedding_function=self.embedding_fn,
25 )
26
27 def add(
28 self,
29 content: str,
30 metadata: dict | None = None,
31 id: str | None = None,
32 ) -> str:
33 """Add a memory."""
34 memory_id = id or str(uuid4())
35 self.collection.add(
36 ids=[memory_id],
37 documents=[content],
38 metadatas=[metadata or {}],
39 )
40 return memory_id
41
42 def recall(
43 self,
44 query: str,
45 k: int = 5,
46 filter: dict | None = None,
47 ) -> list[dict]:
48 """Recall relevant memories."""
49 results = self.collection.query(
50 query_texts=[query],
51 n_results=k,
52 where=filter,
53 )
54
55 memories = []
56 for i, doc in enumerate(results["documents"][0]):
57 memories.append({
58 "id": results["ids"][0][i],
59 "content": doc,
60 "metadata": results["metadatas"][0][i],
61 "distance": results["distances"][0][i],
62 })
63 return memories
64
65 def delete(self, id: str) -> None:
66 """Delete a memory."""
67 self.collection.delete(ids=[id])Memory Quality Matters
What you store matters as much as how you store it. Save insights and learnings, not just raw action logs. "Fixed auth bug by checking token expiry" is more useful than "Edited auth.py line 42".
Integrating Memory
Combining memory types into a unified system:
🐍unified_memory.py
1class UnifiedMemory:
2 """Combines all memory types into a unified interface."""
3
4 def __init__(
5 self,
6 llm,
7 vector_store,
8 short_term_limit: int = 50,
9 long_term_threshold: float = 0.7,
10 ):
11 self.short_term = EfficientShortTermMemory(llm, short_term_limit)
12 self.long_term = LongTermMemory(vector_store)
13 self.long_term_threshold = long_term_threshold
14
15 def add_action(self, record: ActionRecord) -> None:
16 """Add an action to memory."""
17 # Always add to short-term
18 self.short_term.add(record)
19
20 # Store significant results in long-term
21 if self._is_significant(record):
22 self.long_term.add(
23 content=self._format_for_storage(record),
24 metadata={
25 "action": record.action,
26 "success": record.success,
27 "timestamp": record.timestamp.isoformat(),
28 },
29 )
30
31 def recall(self, query: str) -> dict:
32 """Recall relevant context."""
33 # Get recent from short-term
34 recent_context = self.short_term.get_context()
35
36 # Get relevant from long-term
37 relevant = self.long_term.recall(query, k=5)
38
39 return {
40 "recent": recent_context,
41 "relevant": relevant,
42 }
43
44 def _is_significant(self, record: ActionRecord) -> bool:
45 """Determine if an action is worth long-term storage."""
46 # Store errors (to learn from)
47 if not record.success:
48 return True
49 # Store if result is substantial
50 if len(record.result) > 500:
51 return True
52 # Store important actions
53 significant_actions = ["write_file", "create_plan", "complete_task"]
54 if record.action in significant_actions:
55 return True
56 return FalseSummary
Memory and state management essentials:
- Memory Types: Working, short-term, long-term, episodic
- State: Goal, plan, actions, progress, errors
- Short-term: Recent actions with summarization
- Long-term: Vector similarity for persistent knowledge
- Integration: Unified interface combining all types
Coming Up: With memory in place, agents can remember. But to tackle complex goals, they need to plan. The next section covers planning and execution strategies.