Introduction
Information synthesis is where research becomes valuable. It's not enough to gather sources; the agent must combine, compare, and distill information into coherent findings. This section covers strategies for synthesizing research from multiple sources.
Key Insight: Good synthesis isn't just summarization. It's about identifying patterns, resolving conflicts, and creating new understanding from diverse sources.
Synthesis Strategies
Different research tasks require different synthesis approaches:
| Strategy | Best For | Approach |
|---|---|---|
| Aggregative | Factual questions | Combine consistent facts from multiple sources |
| Comparative | Analyzing options | Compare and contrast different perspectives |
| Analytical | Understanding complex topics | Build a conceptual framework from sources |
| Narrative | Historical or process topics | Create a coherent story from fragments |
🐍python
1from abc import ABC, abstractmethod
2from dataclasses import dataclass
3from typing import List, Dict, Any, Optional
4
5
6@dataclass
7class SynthesisInput:
8 """Input for synthesis."""
9 sources: List[Dict[str, str]] # url, content, title
10 query: str
11 focus: str = ""
12
13
14@dataclass
15class SynthesisOutput:
16 """Output from synthesis."""
17 summary: str
18 key_findings: List[str]
19 source_contributions: Dict[str, str]
20 confidence: float
21 conflicts: List[Dict[str, Any]]
22
23
24class SynthesisStrategy(ABC):
25 """Base class for synthesis strategies."""
26
27 def __init__(self, llm_client):
28 self.llm = llm_client
29
30 @property
31 @abstractmethod
32 def name(self) -> str:
33 pass
34
35 @abstractmethod
36 async def synthesize(
37 self,
38 input_data: SynthesisInput
39 ) -> SynthesisOutput:
40 pass
41
42
43class AggregativeSynthesis(SynthesisStrategy):
44 """
45 Combine facts from multiple sources into a unified answer.
46 Best for factual, well-defined questions.
47 """
48
49 @property
50 def name(self) -> str:
51 return "aggregative"
52
53 async def synthesize(
54 self,
55 input_data: SynthesisInput
56 ) -> SynthesisOutput:
57 sources_text = self._format_sources(input_data.sources)
58
59 prompt = f"""Synthesize information from multiple sources to answer a question.
60
61QUESTION: [input_data.query]
62[f"FOCUS: [input_data.focus]" if input_data.focus else ""]
63
64SOURCES:
65[sources_text]
66
67Instructions:
681. Extract relevant facts from each source
692. Identify where sources agree
703. Note any conflicting information
714. Create a unified, factual answer
72
73Provide:
74SUMMARY: A comprehensive answer combining all sources
75KEY_FINDINGS: Bullet points of main facts (one per line, starting with -)
76CONFLICTS: Any contradictions between sources (one per line, starting with *)
77CONFIDENCE: High/Medium/Low based on source agreement"""
78
79 response = await self.llm.generate(prompt)
80 return self._parse_response(response, input_data.sources)
81
82 def _format_sources(self, sources: List[Dict]) -> str:
83 formatted = []
84 for i, source in enumerate(sources, 1):
85 formatted.append(
86 f"[Source [i]]: [source.get('title', 'Untitled')]\n"
87 f"[source.get('content', '')[:2000]]\n"
88 )
89 return "\n---\n".join(formatted)
90
91 def _parse_response(
92 self,
93 response: str,
94 sources: List[Dict]
95 ) -> SynthesisOutput:
96 sections = {}
97 current_section = None
98
99 for line in response.split("\n"):
100 line = line.strip()
101 if line.startswith("SUMMARY:"):
102 current_section = "summary"
103 sections[current_section] = line[8:].strip()
104 elif line.startswith("KEY_FINDINGS:"):
105 current_section = "findings"
106 sections[current_section] = []
107 elif line.startswith("CONFLICTS:"):
108 current_section = "conflicts"
109 sections[current_section] = []
110 elif line.startswith("CONFIDENCE:"):
111 sections["confidence"] = line[11:].strip().lower()
112 elif current_section == "summary":
113 sections["summary"] += " " + line
114 elif current_section == "findings" and line.startswith("-"):
115 sections["findings"].append(line[1:].strip())
116 elif current_section == "conflicts" and line.startswith("*"):
117 sections["conflicts"].append({"description": line[1:].strip()})
118
119 confidence_map = {"high": 0.9, "medium": 0.6, "low": 0.3}
120
121 return SynthesisOutput(
122 summary=sections.get("summary", ""),
123 key_findings=sections.get("findings", []),
124 source_contributions={s.get("url", ""): "" for s in sources},
125 confidence=confidence_map.get(sections.get("confidence", "medium"), 0.6),
126 conflicts=sections.get("conflicts", [])
127 )
128
129
130class ComparativeSynthesis(SynthesisStrategy):
131 """
132 Compare and contrast different perspectives or options.
133 Best for decision-making and analysis.
134 """
135
136 @property
137 def name(self) -> str:
138 return "comparative"
139
140 async def synthesize(
141 self,
142 input_data: SynthesisInput
143 ) -> SynthesisOutput:
144 sources_text = self._format_sources(input_data.sources)
145
146 prompt = f"""Compare and contrast information from multiple sources.
147
148TOPIC: [input_data.query]
149[f"FOCUS: [input_data.focus]" if input_data.focus else ""]
150
151SOURCES:
152[sources_text]
153
154Instructions:
1551. Identify the main perspectives or options presented
1562. Compare their similarities
1573. Contrast their differences
1584. Evaluate strengths and weaknesses of each
159
160Provide:
161SUMMARY: Overview of the comparison
162PERSPECTIVES: Different viewpoints found (one per line, starting with -)
163SIMILARITIES: What sources agree on (one per line, starting with +)
164DIFFERENCES: Where sources differ (one per line, starting with ~)
165CONCLUSION: Overall assessment"""
166
167 response = await self.llm.generate(prompt)
168
169 # Parse comparative response
170 findings = []
171 conflicts = []
172
173 for line in response.split("\n"):
174 line = line.strip()
175 if line.startswith("-") or line.startswith("+"):
176 findings.append(line[1:].strip())
177 elif line.startswith("~"):
178 conflicts.append({"type": "difference", "description": line[1:].strip()})
179
180 # Extract summary
181 summary = ""
182 if "SUMMARY:" in response:
183 summary = response.split("SUMMARY:")[1].split("\n")[0].strip()
184
185 return SynthesisOutput(
186 summary=summary,
187 key_findings=findings,
188 source_contributions={},
189 confidence=0.7,
190 conflicts=conflicts
191 )
192
193 def _format_sources(self, sources: List[Dict]) -> str:
194 formatted = []
195 for i, source in enumerate(sources, 1):
196 formatted.append(
197 f"[Source [i] - [source.get('title', 'Untitled')]]\n"
198 f"[source.get('content', '')[:1500]]"
199 )
200 return "\n\n".join(formatted)Multi-Source Synthesis
When dealing with many sources, we need a hierarchical approach:
🐍python
1class HierarchicalSynthesizer:
2 """
3 Synthesize many sources through hierarchical summarization.
4 """
5
6 def __init__(
7 self,
8 llm_client,
9 chunk_size: int = 3,
10 max_depth: int = 3
11 ):
12 self.llm = llm_client
13 self.chunk_size = chunk_size
14 self.max_depth = max_depth
15
16 async def synthesize(
17 self,
18 sources: List[Dict[str, str]],
19 query: str
20 ) -> SynthesisOutput:
21 """
22 Synthesize many sources hierarchically.
23
24 First, group sources into chunks and summarize each.
25 Then, combine summaries until we have a final synthesis.
26 """
27 if len(sources) <= self.chunk_size:
28 # Direct synthesis for small number of sources
29 strategy = AggregativeSynthesis(self.llm)
30 return await strategy.synthesize(SynthesisInput(
31 sources=sources,
32 query=query
33 ))
34
35 # Hierarchical approach
36 current_level = sources
37 depth = 0
38
39 while len(current_level) > self.chunk_size and depth < self.max_depth:
40 # Process in chunks
41 next_level = []
42 for i in range(0, len(current_level), self.chunk_size):
43 chunk = current_level[i:i + self.chunk_size]
44
45 summary = await self._summarize_chunk(chunk, query)
46 next_level.append({
47 "title": f"Summary [len(next_level) + 1]",
48 "content": summary,
49 "url": f"summary://level[depth]/[len(next_level)]"
50 })
51
52 current_level = next_level
53 depth += 1
54
55 # Final synthesis
56 strategy = AggregativeSynthesis(self.llm)
57 result = await strategy.synthesize(SynthesisInput(
58 sources=current_level,
59 query=query
60 ))
61
62 # Add metadata about hierarchy
63 result.source_contributions["hierarchy_depth"] = str(depth)
64 result.source_contributions["original_sources"] = str(len(sources))
65
66 return result
67
68 async def _summarize_chunk(
69 self,
70 sources: List[Dict[str, str]],
71 query: str
72 ) -> str:
73 """Summarize a chunk of sources."""
74 sources_text = "\n\n".join([
75 f"[Source: [s.get('title', 'Untitled')]]: [s.get('content', '')[:1000]}"
76 for s in sources
77 ])
78
79 prompt = f"""Summarize these sources with respect to the question.
80
81QUESTION: [query]
82
83SOURCES:
84[sources_text]
85
86Provide a focused summary that:
871. Captures key information relevant to the question
882. Notes any important details or nuances
893. Identifies areas of agreement or disagreement
90
91SUMMARY:"""
92
93 return await self.llm.generate(prompt)
94
95
96class StreamingSynthesizer:
97 """
98 Synthesize sources incrementally as they arrive.
99 Useful for real-time research.
100 """
101
102 def __init__(self, llm_client):
103 self.llm = llm_client
104 self.running_summary = ""
105 self.source_count = 0
106 self.key_points = []
107
108 async def add_source(
109 self,
110 source: Dict[str, str],
111 query: str
112 ) -> Dict[str, Any]:
113 """Add a new source and update synthesis."""
114 self.source_count += 1
115
116 prompt = f"""Update a research synthesis with new information.
117
118RESEARCH QUESTION: [query]
119
120CURRENT SYNTHESIS:
121[self.running_summary if self.running_summary else "No synthesis yet."]
122
123NEW SOURCE: [source.get('title', 'Untitled')]
124[source.get('content', '')[:2000]]
125
126Instructions:
1271. Extract new relevant information from this source
1282. Update the synthesis to incorporate it
1293. Note if this source confirms, adds to, or contradicts existing findings
130
131Provide:
132UPDATED_SYNTHESIS: The new combined synthesis
133NEW_POINTS: New information from this source (one per line, starting with -)
134STATUS: confirms/adds/contradicts"""
135
136 response = await self.llm.generate(prompt)
137
138 # Parse response
139 updated_synthesis = ""
140 new_points = []
141 status = "adds"
142
143 for line in response.split("\n"):
144 if line.startswith("UPDATED_SYNTHESIS:"):
145 updated_synthesis = line[18:].strip()
146 elif line.startswith("-"):
147 new_points.append(line[1:].strip())
148 elif line.startswith("STATUS:"):
149 status = line[7:].strip().lower()
150
151 self.running_summary = updated_synthesis
152 self.key_points.extend(new_points)
153
154 return {
155 "source_number": self.source_count,
156 "current_synthesis": self.running_summary,
157 "new_points": new_points,
158 "status": status,
159 "total_key_points": len(self.key_points)
160 }
161
162 def get_final_synthesis(self) -> SynthesisOutput:
163 """Get the final synthesis after all sources."""
164 return SynthesisOutput(
165 summary=self.running_summary,
166 key_findings=self.key_points,
167 source_contributions={},
168 confidence=min(0.9, 0.5 + (self.source_count * 0.1)),
169 conflicts=[]
170 )Streaming synthesis is excellent for interactive research where users want to see progress as sources are processed.
Handling Conflicts
Sources often conflict. A good research agent identifies and resolves these conflicts:
🐍python
1@dataclass
2class Conflict:
3 """Represents a conflict between sources."""
4 claim_a: str
5 source_a: str
6 claim_b: str
7 source_b: str
8 conflict_type: str # factual, interpretation, outdated
9 resolution: Optional[str] = None
10 confidence: float = 0.0
11
12
13class ConflictResolver:
14 """
15 Identify and resolve conflicts between sources.
16 """
17
18 def __init__(self, llm_client):
19 self.llm = llm_client
20
21 async def find_conflicts(
22 self,
23 sources: List[Dict[str, str]],
24 query: str
25 ) -> List[Conflict]:
26 """Identify conflicts between sources."""
27 sources_text = "\n\n".join([
28 f"[Source [i+1] - [s.get('url', 'unknown')]]: [s.get('content', '')[:1000]}"
29 for i, s in enumerate(sources)
30 ])
31
32 prompt = f"""Identify any conflicts or contradictions between these sources.
33
34TOPIC: [query]
35
36SOURCES:
37[sources_text]
38
39For each conflict found, provide:
40CONFLICT:
41- Claim A: [statement from source]
42- Source A: [source number]
43- Claim B: [contradicting statement]
44- Source B: [source number]
45- Type: factual/interpretation/outdated
46
47List all conflicts found."""
48
49 response = await self.llm.generate(prompt)
50 return self._parse_conflicts(response, sources)
51
52 def _parse_conflicts(
53 self,
54 response: str,
55 sources: List[Dict]
56 ) -> List[Conflict]:
57 """Parse conflicts from LLM response."""
58 conflicts = []
59 current_conflict = {}
60
61 for line in response.split("\n"):
62 line = line.strip()
63 if line.startswith("CONFLICT"):
64 if current_conflict:
65 conflicts.append(self._create_conflict(current_conflict, sources))
66 current_conflict = {}
67 elif line.startswith("- Claim A:"):
68 current_conflict["claim_a"] = line[10:].strip()
69 elif line.startswith("- Source A:"):
70 current_conflict["source_a"] = line[11:].strip()
71 elif line.startswith("- Claim B:"):
72 current_conflict["claim_b"] = line[10:].strip()
73 elif line.startswith("- Source B:"):
74 current_conflict["source_b"] = line[11:].strip()
75 elif line.startswith("- Type:"):
76 current_conflict["type"] = line[7:].strip()
77
78 if current_conflict:
79 conflicts.append(self._create_conflict(current_conflict, sources))
80
81 return conflicts
82
83 def _create_conflict(
84 self,
85 data: Dict,
86 sources: List[Dict]
87 ) -> Conflict:
88 """Create a Conflict object from parsed data."""
89 return Conflict(
90 claim_a=data.get("claim_a", ""),
91 source_a=data.get("source_a", ""),
92 claim_b=data.get("claim_b", ""),
93 source_b=data.get("source_b", ""),
94 conflict_type=data.get("type", "unknown")
95 )
96
97 async def resolve_conflict(
98 self,
99 conflict: Conflict,
100 additional_context: str = ""
101 ) -> Conflict:
102 """Attempt to resolve a conflict."""
103 prompt = f"""Analyze and resolve this conflict between sources.
104
105CONFLICT:
106Claim A: [conflict.claim_a] (from [conflict.source_a])
107Claim B: [conflict.claim_b] (from [conflict.source_b])
108Type: [conflict.conflict_type]
109
110[f"Additional context: [additional_context]" if additional_context else ""]
111
112Analyze:
1131. Which claim is more likely correct and why?
1142. Could both be correct in different contexts?
1153. Is one source more authoritative or recent?
116
117Provide:
118RESOLUTION: Your analysis and conclusion
119CONFIDENCE: High/Medium/Low"""
120
121 response = await self.llm.generate(prompt)
122
123 # Parse resolution
124 resolution = ""
125 confidence = 0.5
126
127 for line in response.split("\n"):
128 if line.startswith("RESOLUTION:"):
129 resolution = line[11:].strip()
130 elif line.startswith("CONFIDENCE:"):
131 conf_str = line[11:].strip().lower()
132 confidence = {"high": 0.9, "medium": 0.6, "low": 0.3}.get(conf_str, 0.5)
133
134 conflict.resolution = resolution
135 conflict.confidence = confidence
136
137 return conflictStructured Output
Research output should be well-structured for downstream use:
🐍python
1@dataclass
2class ResearchReport:
3 """A complete research report."""
4 title: str
5 question: str
6 executive_summary: str
7 key_findings: List[str]
8 detailed_analysis: str
9 sources: List[Dict[str, str]]
10 methodology: str
11 limitations: str
12 conclusion: str
13 generated_at: str
14
15
16class ReportGenerator:
17 """
18 Generate structured research reports.
19 """
20
21 def __init__(self, llm_client):
22 self.llm = llm_client
23
24 async def generate_report(
25 self,
26 synthesis: SynthesisOutput,
27 sources: List[Dict[str, str]],
28 query: str,
29 format_type: str = "comprehensive"
30 ) -> ResearchReport:
31 """Generate a complete research report."""
32 if format_type == "comprehensive":
33 return await self._generate_comprehensive(synthesis, sources, query)
34 elif format_type == "brief":
35 return await self._generate_brief(synthesis, sources, query)
36 else:
37 return await self._generate_comprehensive(synthesis, sources, query)
38
39 async def _generate_comprehensive(
40 self,
41 synthesis: SynthesisOutput,
42 sources: List[Dict[str, str]],
43 query: str
44 ) -> ResearchReport:
45 """Generate a comprehensive report."""
46 # Generate executive summary
47 exec_summary = await self._generate_executive_summary(synthesis, query)
48
49 # Generate detailed analysis
50 detailed = await self._generate_detailed_analysis(synthesis, sources, query)
51
52 # Generate methodology section
53 methodology = self._generate_methodology(sources)
54
55 # Generate limitations
56 limitations = await self._generate_limitations(synthesis, sources)
57
58 # Generate conclusion
59 conclusion = await self._generate_conclusion(synthesis, query)
60
61 from datetime import datetime
62
63 return ResearchReport(
64 title=f"Research Report: [query[:50]]...",
65 question=query,
66 executive_summary=exec_summary,
67 key_findings=synthesis.key_findings,
68 detailed_analysis=detailed,
69 sources=[{"url": s.get("url"), "title": s.get("title")} for s in sources],
70 methodology=methodology,
71 limitations=limitations,
72 conclusion=conclusion,
73 generated_at=datetime.now().isoformat()
74 )
75
76 async def _generate_executive_summary(
77 self,
78 synthesis: SynthesisOutput,
79 query: str
80 ) -> str:
81 """Generate executive summary."""
82 prompt = f"""Write an executive summary for research on: [query]
83
84Based on this synthesis:
85[synthesis.summary]
86
87Key findings:
88[chr(10).join(f"- [f]" for f in synthesis.key_findings)]
89
90Write a 2-3 paragraph executive summary that:
911. Answers the research question directly
922. Highlights the most important findings
933. Notes any significant caveats"""
94
95 return await self.llm.generate(prompt)
96
97 async def _generate_detailed_analysis(
98 self,
99 synthesis: SynthesisOutput,
100 sources: List[Dict[str, str]],
101 query: str
102 ) -> str:
103 """Generate detailed analysis section."""
104 source_summaries = "\n".join([
105 f"- [s.get('title', 'Untitled')]: [s.get('content', '')[:200]}..."
106 for s in sources[:10]
107 ])
108
109 prompt = f"""Write a detailed analysis section for research on: [query]
110
111Synthesis: [synthesis.summary]
112Key findings: [", ".join(synthesis.key_findings)]
113
114Sources consulted:
115[source_summaries]
116
117Write a detailed analysis (3-5 paragraphs) that:
1181. Explores each key finding in depth
1192. Provides evidence from sources
1203. Discusses implications
1214. Addresses any conflicts (if present): [str(synthesis.conflicts)]"""
122
123 return await self.llm.generate(prompt)
124
125 def _generate_methodology(self, sources: List[Dict]) -> str:
126 """Generate methodology section."""
127 source_types = {}
128 for s in sources:
129 s_type = s.get("type", "web")
130 source_types[s_type] = source_types.get(s_type, 0) + 1
131
132 methodology = f"""This research was conducted using automated web search and document analysis.
133
134Sources consulted: [len(sources)] total
135- [", ".join(f"[k]: [v]" for k, v in source_types.items())]
136
137Search queries were optimized for comprehensive coverage.
138Content was extracted and analyzed using natural language processing.
139Findings were synthesized using hierarchical summarization."""
140
141 return methodology
142
143 async def _generate_limitations(
144 self,
145 synthesis: SynthesisOutput,
146 sources: List[Dict]
147 ) -> str:
148 """Generate limitations section."""
149 limitations = []
150
151 # Check source diversity
152 if len(sources) < 5:
153 limitations.append("Limited number of sources consulted")
154
155 # Check for conflicts
156 if synthesis.conflicts:
157 limitations.append(f"[len(synthesis.conflicts)] conflicting claims identified")
158
159 # Check confidence
160 if synthesis.confidence < 0.7:
161 limitations.append("Moderate confidence due to source disagreement")
162
163 if not limitations:
164 limitations.append("Standard limitations of automated research apply")
165
166 return "Limitations of this research:\n" + "\n".join(f"- [l]" for l in limitations)
167
168 async def _generate_conclusion(
169 self,
170 synthesis: SynthesisOutput,
171 query: str
172 ) -> str:
173 """Generate conclusion section."""
174 prompt = f"""Write a conclusion for research on: [query]
175
176Summary: [synthesis.summary]
177Confidence level: [synthesis.confidence]
178
179Write a brief conclusion (1-2 paragraphs) that:
1801. Restates the main answer to the research question
1812. Summarizes key evidence
1823. Suggests areas for further research if applicable"""
183
184 return await self.llm.generate(prompt)Summary
In this section, we covered information synthesis techniques:
- Synthesis Strategies: Aggregative, comparative, analytical, and narrative approaches for different research needs
- Hierarchical Synthesis: Processing many sources through chunked summarization
- Conflict Resolution: Identifying and resolving contradictions between sources
- Structured Reports: Generating comprehensive research reports with proper structure
In the next section, we'll focus on source citation and verification to ensure research integrity.