Introduction
Just as human memory operates at different timescales—remembering what you had for breakfast differently than remembering your childhood home—agent memory systems benefit from separating short-term and long-term storage. Each serves different purposes and requires different implementation strategies.
The Key Insight: Short-term memory optimizes for speed and recency; long-term memory optimizes for persistence and relevance. An effective agent needs both.
Short-Term Memory
Short-term memory handles the immediate context of the current interaction:
Characteristics
| Property | Short-Term Memory |
|---|---|
| Duration | Current session (minutes to hours) |
| Access speed | Immediate (in-memory) |
| Capacity | Limited (context window) |
| Storage | RAM, in-process |
| Purpose | Immediate reasoning context |
What Goes in Short-Term Memory
🐍short_term_contents.py
1@dataclass
2class ShortTermMemory:
3 """Components of short-term memory."""
4
5 # Current conversation messages
6 conversation_history: list[Message]
7
8 # Active task state
9 current_task: Optional[Task]
10 task_progress: dict[str, Any]
11
12 # Recently used tools and their results
13 recent_tool_calls: list[ToolCall]
14
15 # Entities mentioned in current session
16 mentioned_entities: set[str]
17
18 # Temporary scratchpad for reasoning
19 scratchpad: str
20
21 # Session metadata
22 session_start: datetime
23 turn_count: intImplementing Short-Term Memory
🐍short_term_implementation.py
1from collections import deque
2from dataclasses import dataclass, field
3from datetime import datetime
4from typing import Optional
5
6@dataclass
7class Message:
8 role: str
9 content: str
10 timestamp: datetime = field(default_factory=datetime.now)
11
12class ShortTermMemoryManager:
13 """Manages short-term memory for an agent session."""
14
15 def __init__(
16 self,
17 max_messages: int = 50,
18 max_tokens: int = 100000
19 ):
20 self.max_messages = max_messages
21 self.max_tokens = max_tokens
22 self.messages: deque[Message] = deque(maxlen=max_messages)
23 self.current_task: Optional[dict] = None
24 self.scratchpad: str = ""
25 self.session_start = datetime.now()
26
27 def add_message(self, role: str, content: str) -> None:
28 """Add a message to conversation history."""
29 self.messages.append(Message(role=role, content=content))
30
31 # Trim if exceeding token limit
32 while self._estimate_tokens() > self.max_tokens:
33 if len(self.messages) > 2:
34 self.messages.popleft()
35 else:
36 break
37
38 def get_messages_for_context(self) -> list[dict]:
39 """Get messages formatted for LLM context."""
40 return [
41 {"role": msg.role, "content": msg.content}
42 for msg in self.messages
43 ]
44
45 def _estimate_tokens(self) -> int:
46 """Rough token count estimate."""
47 total_chars = sum(len(msg.content) for msg in self.messages)
48 return total_chars // 4 # Rough approximation
49
50 def set_task(self, task: dict) -> None:
51 """Set the current active task."""
52 self.current_task = task
53
54 def update_scratchpad(self, content: str) -> None:
55 """Update the reasoning scratchpad."""
56 self.scratchpad = content
57
58 def get_full_context(self) -> dict:
59 """Get complete short-term memory state."""
60 return {
61 "messages": self.get_messages_for_context(),
62 "current_task": self.current_task,
63 "scratchpad": self.scratchpad,
64 "session_duration": (datetime.now() - self.session_start).seconds,
65 "turn_count": len([m for m in self.messages if m.role == "user"])
66 }
67
68 def clear(self) -> None:
69 """Clear short-term memory (end of session)."""
70 self.messages.clear()
71 self.current_task = None
72 self.scratchpad = ""
73 self.session_start = datetime.now()Long-Term Memory
Long-term memory persists across sessions and accumulates over time:
Characteristics
| Property | Long-Term Memory |
|---|---|
| Duration | Days to years (persistent) |
| Access speed | Slower (database queries) |
| Capacity | Large (scalable storage) |
| Storage | Database, vector store, file system |
| Purpose | Accumulated knowledge and history |
What Goes in Long-Term Memory
🐍long_term_contents.py
1@dataclass
2class LongTermMemory:
3 """Components of long-term memory."""
4
5 # User profiles and preferences
6 user_profiles: dict[str, UserProfile]
7
8 # Accumulated facts and knowledge
9 knowledge_base: list[KnowledgeFact]
10
11 # Past conversation summaries
12 conversation_summaries: list[ConversationSummary]
13
14 # Learned patterns and corrections
15 learned_patterns: list[LearnedPattern]
16
17 # Project and task history
18 project_history: list[Project]
19
20 # Entity relationships
21 entity_graph: KnowledgeGraphImplementing Long-Term Memory
🐍long_term_implementation.py
1from dataclasses import dataclass
2from datetime import datetime
3from typing import Optional
4import json
5
6@dataclass
7class MemoryEntry:
8 id: str
9 user_id: str
10 content: str
11 memory_type: str # "fact", "preference", "event", "summary"
12 embedding: list[float]
13 importance: float
14 created_at: datetime
15 last_accessed: datetime
16 access_count: int
17 metadata: dict
18
19class LongTermMemoryManager:
20 """Manages persistent long-term memory."""
21
22 def __init__(
23 self,
24 vector_store, # Vector database client
25 metadata_store, # SQL/NoSQL database client
26 embedding_model # Model for generating embeddings
27 ):
28 self.vectors = vector_store
29 self.metadata = metadata_store
30 self.embedder = embedding_model
31
32 async def store(
33 self,
34 user_id: str,
35 content: str,
36 memory_type: str,
37 importance: float = 0.5,
38 metadata: dict = None
39 ) -> str:
40 """Store a new long-term memory."""
41
42 # Generate embedding
43 embedding = await self.embedder.embed(content)
44
45 # Create entry
46 entry = MemoryEntry(
47 id=self._generate_id(),
48 user_id=user_id,
49 content=content,
50 memory_type=memory_type,
51 embedding=embedding,
52 importance=importance,
53 created_at=datetime.now(),
54 last_accessed=datetime.now(),
55 access_count=0,
56 metadata=metadata or {}
57 )
58
59 # Store in vector database
60 await self.vectors.upsert(
61 id=entry.id,
62 vector=embedding,
63 metadata={
64 "user_id": user_id,
65 "memory_type": memory_type,
66 "importance": importance
67 }
68 )
69
70 # Store full entry in metadata store
71 await self.metadata.insert("memories", entry.__dict__)
72
73 return entry.id
74
75 async def retrieve(
76 self,
77 user_id: str,
78 query: str,
79 limit: int = 10,
80 memory_types: list[str] = None,
81 min_importance: float = 0.0
82 ) -> list[MemoryEntry]:
83 """Retrieve relevant memories."""
84
85 # Generate query embedding
86 query_embedding = await self.embedder.embed(query)
87
88 # Search vector store
89 filters = {"user_id": user_id}
90 if memory_types:
91 filters["memory_type"] = {"$in": memory_types}
92 if min_importance > 0:
93 filters["importance"] = {"$gte": min_importance}
94
95 results = await self.vectors.query(
96 vector=query_embedding,
97 limit=limit * 2, # Fetch extra for filtering
98 filter=filters
99 )
100
101 # Get full entries from metadata store
102 entries = []
103 for result in results:
104 entry_data = await self.metadata.find_one(
105 "memories",
106 {"id": result.id}
107 )
108 if entry_data:
109 entry = MemoryEntry(**entry_data)
110
111 # Update access stats
112 entry.last_accessed = datetime.now()
113 entry.access_count += 1
114 await self.metadata.update(
115 "memories",
116 {"id": entry.id},
117 {"last_accessed": entry.last_accessed, "access_count": entry.access_count}
118 )
119
120 entries.append(entry)
121
122 return entries[:limit]
123
124 async def update_importance(self, memory_id: str, new_importance: float) -> None:
125 """Update the importance score of a memory."""
126 await self.vectors.update_metadata(
127 id=memory_id,
128 metadata={"importance": new_importance}
129 )
130 await self.metadata.update(
131 "memories",
132 {"id": memory_id},
133 {"importance": new_importance}
134 )
135
136 async def delete_user_memories(self, user_id: str) -> int:
137 """Delete all memories for a user (GDPR compliance)."""
138 # Get all memory IDs for user
139 memories = await self.metadata.find(
140 "memories",
141 {"user_id": user_id}
142 )
143
144 # Delete from vector store
145 memory_ids = [m["id"] for m in memories]
146 await self.vectors.delete(ids=memory_ids)
147
148 # Delete from metadata store
149 result = await self.metadata.delete_many(
150 "memories",
151 {"user_id": user_id}
152 )
153
154 return result.deleted_countMemory Transitions
The key to effective memory is knowing when and how to move information between short-term and long-term storage:
Short-Term to Long-Term (Consolidation)
🐍consolidation.py
1class MemoryConsolidator:
2 """Moves important information from short-term to long-term memory."""
3
4 def __init__(
5 self,
6 short_term: ShortTermMemoryManager,
7 long_term: LongTermMemoryManager,
8 llm # For extraction and summarization
9 ):
10 self.short_term = short_term
11 self.long_term = long_term
12 self.llm = llm
13
14 async def consolidate_session(self, user_id: str) -> dict:
15 """Called at end of session to extract persistent memories."""
16
17 messages = self.short_term.get_messages_for_context()
18 if len(messages) < 2:
19 return {"extracted": 0}
20
21 # Extract different types of memories
22 results = {
23 "preferences": await self._extract_preferences(messages, user_id),
24 "facts": await self._extract_facts(messages, user_id),
25 "summary": await self._create_summary(messages, user_id)
26 }
27
28 return results
29
30 async def _extract_preferences(
31 self,
32 messages: list[dict],
33 user_id: str
34 ) -> int:
35 """Extract user preferences from conversation."""
36
37 prompt = """Analyze this conversation and extract any user preferences expressed.
38Return as JSON list: [{"preference": "...", "importance": 0.0-1.0}]
39
40Conversation:
41{conversation}
42
43Look for preferences about:
44- Communication style
45- Technical depth
46- Topics of interest
47- Working patterns
48- Tool preferences
49"""
50
51 response = await self.llm.generate(
52 prompt.format(conversation=self._format_messages(messages))
53 )
54
55 preferences = json.loads(response)
56 stored = 0
57
58 for pref in preferences:
59 await self.long_term.store(
60 user_id=user_id,
61 content=pref["preference"],
62 memory_type="preference",
63 importance=pref["importance"]
64 )
65 stored += 1
66
67 return stored
68
69 async def _extract_facts(
70 self,
71 messages: list[dict],
72 user_id: str
73 ) -> int:
74 """Extract important facts from conversation."""
75
76 prompt = """Extract important facts from this conversation.
77Return as JSON list: [{"fact": "...", "importance": 0.0-1.0}]
78
79Only extract facts that would be useful to remember for future conversations.
80Skip trivial or temporary information.
81
82Conversation:
83{conversation}
84"""
85
86 response = await self.llm.generate(
87 prompt.format(conversation=self._format_messages(messages))
88 )
89
90 facts = json.loads(response)
91 stored = 0
92
93 for fact in facts:
94 if fact["importance"] > 0.3: # Only store somewhat important facts
95 await self.long_term.store(
96 user_id=user_id,
97 content=fact["fact"],
98 memory_type="fact",
99 importance=fact["importance"]
100 )
101 stored += 1
102
103 return stored
104
105 async def _create_summary(
106 self,
107 messages: list[dict],
108 user_id: str
109 ) -> str:
110 """Create a summary of the conversation."""
111
112 prompt = """Summarize this conversation in 2-3 sentences.
113Focus on: main topics, outcomes, any decisions made.
114
115Conversation:
116{conversation}
117"""
118
119 summary = await self.llm.generate(
120 prompt.format(conversation=self._format_messages(messages))
121 )
122
123 await self.long_term.store(
124 user_id=user_id,
125 content=summary,
126 memory_type="summary",
127 importance=0.5,
128 metadata={"message_count": len(messages)}
129 )
130
131 return summary
132
133 def _format_messages(self, messages: list[dict]) -> str:
134 return "\n".join([
135 f"{m['role'].upper()}: {m['content']}"
136 for m in messages
137 ])Long-Term to Short-Term (Retrieval)
🐍retrieval.py
1class MemoryRetriever:
2 """Retrieves relevant long-term memories for current context."""
3
4 def __init__(
5 self,
6 long_term: LongTermMemoryManager,
7 max_memories: int = 10,
8 max_tokens: int = 2000
9 ):
10 self.long_term = long_term
11 self.max_memories = max_memories
12 self.max_tokens = max_tokens
13
14 async def get_context_memories(
15 self,
16 user_id: str,
17 current_message: str,
18 conversation_history: list[dict]
19 ) -> str:
20 """Get relevant long-term memories for current context."""
21
22 # Build query from recent context
23 query = self._build_query(current_message, conversation_history)
24
25 # Retrieve from different memory types
26 memories = []
27
28 # Always include user preferences
29 prefs = await self.long_term.retrieve(
30 user_id=user_id,
31 query=query,
32 limit=3,
33 memory_types=["preference"]
34 )
35 memories.extend(prefs)
36
37 # Get relevant facts
38 facts = await self.long_term.retrieve(
39 user_id=user_id,
40 query=query,
41 limit=5,
42 memory_types=["fact"]
43 )
44 memories.extend(facts)
45
46 # Get relevant past summaries
47 summaries = await self.long_term.retrieve(
48 user_id=user_id,
49 query=query,
50 limit=2,
51 memory_types=["summary"]
52 )
53 memories.extend(summaries)
54
55 # Format for inclusion in prompt
56 return self._format_memories(memories)
57
58 def _build_query(
59 self,
60 current_message: str,
61 history: list[dict]
62 ) -> str:
63 """Build a query combining current and recent messages."""
64 recent = history[-3:] if len(history) > 3 else history
65 recent_text = " ".join([m["content"] for m in recent])
66 return f"{current_message} {recent_text}"
67
68 def _format_memories(self, memories: list[MemoryEntry]) -> str:
69 """Format memories for prompt inclusion."""
70 if not memories:
71 return ""
72
73 sections = {
74 "preference": [],
75 "fact": [],
76 "summary": []
77 }
78
79 for mem in memories:
80 sections[mem.memory_type].append(mem.content)
81
82 formatted = []
83
84 if sections["preference"]:
85 formatted.append("User Preferences:")
86 for pref in sections["preference"]:
87 formatted.append(f" - {pref}")
88
89 if sections["fact"]:
90 formatted.append("\nRelevant Facts:")
91 for fact in sections["fact"]:
92 formatted.append(f" - {fact}")
93
94 if sections["summary"]:
95 formatted.append("\nPrevious Conversations:")
96 for summary in sections["summary"]:
97 formatted.append(f" - {summary}")
98
99 return "\n".join(formatted)Implementation Patterns
Common patterns for combining short-term and long-term memory:
The Memory-Augmented Agent Pattern
🐍memory_augmented_agent.py
1class MemoryAugmentedAgent:
2 """Agent with integrated short and long-term memory."""
3
4 def __init__(
5 self,
6 llm,
7 short_term: ShortTermMemoryManager,
8 long_term: LongTermMemoryManager,
9 consolidator: MemoryConsolidator,
10 retriever: MemoryRetriever
11 ):
12 self.llm = llm
13 self.short_term = short_term
14 self.long_term = long_term
15 self.consolidator = consolidator
16 self.retriever = retriever
17 self.current_user_id: Optional[str] = None
18
19 async def start_session(self, user_id: str) -> None:
20 """Initialize a new session."""
21 self.current_user_id = user_id
22 self.short_term.clear()
23
24 # Load user profile into short-term context
25 profile = await self.long_term.retrieve(
26 user_id=user_id,
27 query="user profile preferences",
28 limit=5,
29 memory_types=["preference"]
30 )
31
32 if profile:
33 self.short_term.update_scratchpad(
34 f"User preferences: {[p.content for p in profile]}"
35 )
36
37 async def process_message(self, message: str) -> str:
38 """Process a user message with memory augmentation."""
39
40 # Add to short-term memory
41 self.short_term.add_message("user", message)
42
43 # Retrieve relevant long-term memories
44 memory_context = await self.retriever.get_context_memories(
45 user_id=self.current_user_id,
46 current_message=message,
47 conversation_history=self.short_term.get_messages_for_context()
48 )
49
50 # Build augmented prompt
51 system_prompt = self._build_system_prompt(memory_context)
52
53 # Generate response
54 response = await self.llm.generate(
55 system=system_prompt,
56 messages=self.short_term.get_messages_for_context()
57 )
58
59 # Add response to short-term memory
60 self.short_term.add_message("assistant", response)
61
62 return response
63
64 async def end_session(self) -> dict:
65 """End session and consolidate memories."""
66 results = await self.consolidator.consolidate_session(
67 user_id=self.current_user_id
68 )
69 self.short_term.clear()
70 return results
71
72 def _build_system_prompt(self, memory_context: str) -> str:
73 base_prompt = "You are a helpful AI assistant."
74
75 if memory_context:
76 return f"""{base_prompt}
77
78MEMORY CONTEXT (from previous interactions):
79{memory_context}
80
81Use this context to personalize your responses and maintain continuity."""
82
83 return base_promptMemory Budget
Reserve a portion of your context window for memory context (e.g., 20%). This ensures you always have room for the current conversation while still benefiting from retrieved memories.
Summary
Key differences between short-term and long-term memory:
| Aspect | Short-Term | Long-Term |
|---|---|---|
| Duration | Current session | Persistent |
| Storage | In-memory | Database |
| Speed | Immediate | Query required |
| Capacity | Limited | Scalable |
| Content | Messages, task state | Facts, preferences, summaries |
| Update frequency | Every turn | End of session |
- Use short-term for immediate conversation context and active task state
- Use long-term for persistent knowledge, preferences, and history
- Consolidate important information from short to long-term at session end
- Retrieve relevant long-term memories at session start and during conversation
- Balance memory context with current conversation in prompt
Next: We'll explore vector databases—the technology that makes efficient semantic memory retrieval possible.