Boo-AI — Master Artificial Intelligence by Building from Scratch

Introduction

Gemini integrates seamlessly with popular open-source agent frameworks. This lets you leverage Gemini's capabilities - massive context, multimodality, thinking models - while using frameworks like LangGraph, CrewAI, and LangChain for orchestration.

Best of Both Worlds: Use Google's powerful models with community-built frameworks. Get Gemini's capabilities with the flexibility and tooling of open-source ecosystems.

Gemini with LangGraph

LangGraph provides graph-based orchestration for complex agent workflows:

🐍gemini_langgraph.py

1from langchain_google_genai import ChatGoogleGenerativeAI
2from langgraph.graph import StateGraph, END
3from langgraph.prebuilt import ToolNode
4from typing import TypedDict, Annotated
5import operator
6
7# Initialize Gemini
8llm = ChatGoogleGenerativeAI(
9    model="gemini-2.5-flash",
10    temperature=0,
11)
12
13# Define state
14class AgentState(TypedDict):
15    messages: Annotated[list, operator.add]
16    next_action: str
17
18# Define tools
19from langchain_core.tools import tool
20
21@tool
22def search_codebase(query: str) -> str:
23    """Search the codebase for relevant code."""
24    return f"Found code matching: {query}"
25
26@tool
27def run_tests(test_path: str) -> str:
28    """Run tests at the specified path."""
29    return f"Tests passed at: {test_path}"
30
31@tool
32def edit_file(path: str, content: str) -> str:
33    """Edit a file with new content."""
34    return f"File edited: {path}"
35
36tools = [search_codebase, run_tests, edit_file]
37llm_with_tools = llm.bind_tools(tools)
38
39# Define nodes
40def agent_node(state: AgentState) -> AgentState:
41    """Main agent decision node."""
42    response = llm_with_tools.invoke(state["messages"])
43    return {"messages": [response]}
44
45def should_continue(state: AgentState) -> str:
46    """Determine if we should continue or end."""
47    last_message = state["messages"][-1]
48    if last_message.tool_calls:
49        return "tools"
50    return END
51
52# Build graph
53workflow = StateGraph(AgentState)
54workflow.add_node("agent", agent_node)
55workflow.add_node("tools", ToolNode(tools))
56
57workflow.set_entry_point("agent")
58workflow.add_conditional_edges(
59    "agent",
60    should_continue,
61    {"tools": "tools", END: END}
62)
63workflow.add_edge("tools", "agent")
64
65# Compile
66app = workflow.compile()
67
68# Run
69result = app.invoke({
70    "messages": [
71        ("user", "Find the authentication code and add rate limiting")
72    ]
73})

LangGraph with Gemini Thinking

🐍langgraph_thinking.py

1from langchain_google_genai import ChatGoogleGenerativeAI
2
3# Use thinking model for complex reasoning
4thinking_llm = ChatGoogleGenerativeAI(
5    model="gemini-2.0-flash-thinking-exp-01-21",
6    temperature=0,
7)
8
9# Fast model for simple operations
10fast_llm = ChatGoogleGenerativeAI(
11    model="gemini-2.5-flash",
12    temperature=0,
13)
14
15# Router to select appropriate model
16def model_router(state: AgentState) -> str:
17    """Route to appropriate model based on task."""
18    last_message = state["messages"][-1].content.lower()
19
20    # Complex tasks need thinking
21    complex_indicators = ["design", "architect", "debug", "analyze"]
22    for indicator in complex_indicators:
23        if indicator in last_message:
24            return "thinking"
25
26    return "fast"
27
28# Build graph with model selection
29workflow = StateGraph(AgentState)
30
31def thinking_node(state: AgentState) -> AgentState:
32    response = thinking_llm.invoke(state["messages"])
33    return {"messages": [response]}
34
35def fast_node(state: AgentState) -> AgentState:
36    response = fast_llm.invoke(state["messages"])
37    return {"messages": [response]}
38
39workflow.add_node("thinking", thinking_node)
40workflow.add_node("fast", fast_node)
41workflow.add_node("tools", ToolNode(tools))
42
43workflow.set_entry_point("router")
44workflow.add_conditional_edges(
45    "router",
46    model_router,
47    {"thinking": "thinking", "fast": "fast"}
48)

Gemini with CrewAI

CrewAI enables role-based multi-agent systems:

🐍gemini_crewai.py

1from crewai import Agent, Task, Crew, Process
2from langchain_google_genai import ChatGoogleGenerativeAI
3
4# Initialize Gemini models
5gemini_pro = ChatGoogleGenerativeAI(
6    model="gemini-2.5-pro",
7    temperature=0.7,
8)
9
10gemini_flash = ChatGoogleGenerativeAI(
11    model="gemini-2.5-flash",
12    temperature=0.5,
13)
14
15# Define specialized agents
16researcher = Agent(
17    role="Research Analyst",
18    goal="Find and analyze relevant information",
19    backstory="You are an expert at finding and synthesizing information.",
20    llm=gemini_pro,  # Use Pro for deep analysis
21    verbose=True,
22)
23
24developer = Agent(
25    role="Senior Developer",
26    goal="Write high-quality, maintainable code",
27    backstory="You are a senior developer with expertise in best practices.",
28    llm=gemini_pro,
29    verbose=True,
30)
31
32reviewer = Agent(
33    role="Code Reviewer",
34    goal="Ensure code quality and catch issues",
35    backstory="You are a meticulous code reviewer focused on quality.",
36    llm=gemini_flash,  # Use Flash for faster reviews
37    verbose=True,
38)
39
40# Define tasks
41research_task = Task(
42    description="Research best practices for implementing {feature}",
43    agent=researcher,
44    expected_output="Comprehensive research summary with recommendations",
45)
46
47development_task = Task(
48    description="Implement the feature based on research findings",
49    agent=developer,
50    expected_output="Complete implementation code",
51    context=[research_task],
52)
53
54review_task = Task(
55    description="Review the implementation for issues",
56    agent=reviewer,
57    expected_output="Code review with issues and suggestions",
58    context=[development_task],
59)
60
61# Create crew
62crew = Crew(
63    agents=[researcher, developer, reviewer],
64    tasks=[research_task, development_task, review_task],
65    process=Process.sequential,
66    verbose=True,
67)
68
69# Execute
70result = crew.kickoff(inputs={"feature": "user authentication with OAuth2"})

CrewAI with Multimodal Gemini

🐍crewai_multimodal.py

1from crewai import Agent, Task, Crew
2from crewai_tools import VisionTool
3import google.generativeai as genai
4
5# Custom vision tool using Gemini
6class GeminiVisionTool:
7    """Tool for analyzing images with Gemini."""
8
9    name = "analyze_image"
10    description = "Analyze an image and extract information"
11
12    def __init__(self):
13        self.model = genai.GenerativeModel("gemini-2.5-flash")
14
15    def run(self, image_path: str, question: str) -> str:
16        image = genai.upload_file(image_path)
17        response = self.model.generate_content([question, image])
18        return response.text
19
20# UI analyst agent with vision
21ui_analyst = Agent(
22    role="UI Analyst",
23    goal="Analyze UI designs and implementations",
24    backstory="You are an expert at analyzing user interfaces.",
25    tools=[GeminiVisionTool()],
26    llm=gemini_pro,
27)
28
29# Design review task
30design_task = Task(
31    description=(
32        "Analyze the UI screenshot at {screenshot_path}. "
33        "Compare it to best practices and suggest improvements."
34    ),
35    agent=ui_analyst,
36    expected_output="Detailed UI analysis with improvement suggestions",
37)
38
39# Create and run
40crew = Crew(
41    agents=[ui_analyst],
42    tasks=[design_task],
43)
44
45result = crew.kickoff(inputs={"screenshot_path": "ui_screenshot.png"})

Gemini with LangChain

🐍gemini_langchain.py

1from langchain_google_genai import ChatGoogleGenerativeAI
2from langchain.agents import AgentExecutor, create_react_agent
3from langchain.prompts import PromptTemplate
4from langchain_core.tools import tool
5
6# Initialize Gemini
7llm = ChatGoogleGenerativeAI(
8    model="gemini-2.5-flash",
9    temperature=0,
10    convert_system_message_to_human=True,
11)
12
13# Define tools
14@tool
15def read_file(path: str) -> str:
16    """Read contents of a file."""
17    with open(path) as f:
18        return f.read()
19
20@tool
21def write_file(path: str, content: str) -> str:
22    """Write content to a file."""
23    with open(path, "w") as f:
24        f.write(content)
25    return f"Written to {path}"
26
27@tool
28def run_command(command: str) -> str:
29    """Run a shell command."""
30    import subprocess
31    result = subprocess.run(command, shell=True, capture_output=True)
32    return result.stdout.decode() + result.stderr.decode()
33
34tools = [read_file, write_file, run_command]
35
36# Create ReAct prompt
37react_prompt = PromptTemplate.from_template("""
38You are a helpful coding assistant.
39
40You have access to the following tools:
41{tools}
42
43Use the following format:
44Question: the task you must complete
45Thought: think about what to do
46Action: the action to take, should be one of [{tool_names}]
47Action Input: the input to the action
48Observation: the result of the action
49... (repeat Thought/Action/Action Input/Observation as needed)
50Thought: I have completed the task
51Final Answer: summary of what was done
52
53Question: {input}
54{agent_scratchpad}
55""")
56
57# Create agent
58agent = create_react_agent(llm, tools, react_prompt)
59agent_executor = AgentExecutor(
60    agent=agent,
61    tools=tools,
62    verbose=True,
63    handle_parsing_errors=True,
64)
65
66# Run
67result = agent_executor.invoke({
68    "input": "Read the config.json file and add a new API endpoint configuration"
69})

LangChain with Gemini RAG

🐍langchain_rag.py

1from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
2from langchain_community.vectorstores import Chroma
3from langchain.text_splitter import RecursiveCharacterTextSplitter
4from langchain.chains import RetrievalQA
5from langchain_community.document_loaders import DirectoryLoader
6
7# Initialize embeddings and LLM
8embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
9llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
10
11# Load codebase
12loader = DirectoryLoader("./src", glob="**/*.py")
13documents = loader.load()
14
15# Split into chunks
16splitter = RecursiveCharacterTextSplitter(
17    chunk_size=1000,
18    chunk_overlap=200,
19)
20splits = splitter.split_documents(documents)
21
22# Create vector store
23vectorstore = Chroma.from_documents(
24    documents=splits,
25    embedding=embeddings,
26    persist_directory="./chroma_db"
27)
28
29# Create RAG chain
30qa_chain = RetrievalQA.from_chain_type(
31    llm=llm,
32    chain_type="stuff",
33    retriever=vectorstore.as_retriever(
34        search_kwargs={"k": 10}
35    ),
36    return_source_documents=True,
37)
38
39# Query
40result = qa_chain.invoke({
41    "query": "How does the authentication system work?"
42})
43
44print(result["result"])
45for doc in result["source_documents"]:
46    print(f"Source: {doc.metadata['source']}")

Building Custom Gemini Agents

🐍custom_gemini_agent.py

1import google.generativeai as genai
2from dataclasses import dataclass
3from typing import Callable, Any
4
5@dataclass
6class Tool:
7    name: str
8    description: str
9    function: Callable
10
11class CustomGeminiAgent:
12    """Custom agent built directly on Gemini API."""
13
14    def __init__(
15        self,
16        model_name: str = "gemini-2.5-flash",
17        system_prompt: str = "",
18    ):
19        self.model = genai.GenerativeModel(
20            model_name=model_name,
21            system_instruction=system_prompt,
22        )
23        self.tools: dict[str, Tool] = {}
24        self.chat = None
25
26    def add_tool(self, tool: Tool) -> None:
27        """Register a tool."""
28        self.tools[tool.name] = tool
29
30    def _get_tool_declarations(self) -> list:
31        """Convert tools to Gemini format."""
32        declarations = []
33        for tool in self.tools.values():
34            declarations.append({
35                "name": tool.name,
36                "description": tool.description,
37                "parameters": self._infer_parameters(tool.function),
38            })
39        return declarations
40
41    def _infer_parameters(self, func: Callable) -> dict:
42        """Infer parameters from function signature."""
43        import inspect
44        sig = inspect.signature(func)
45
46        properties = {}
47        required = []
48
49        for name, param in sig.parameters.items():
50            if param.annotation == str:
51                properties[name] = {"type": "string"}
52            elif param.annotation == int:
53                properties[name] = {"type": "integer"}
54            elif param.annotation == bool:
55                properties[name] = {"type": "boolean"}
56            else:
57                properties[name] = {"type": "string"}
58
59            if param.default == inspect.Parameter.empty:
60                required.append(name)
61
62        return {
63            "type": "object",
64            "properties": properties,
65            "required": required,
66        }
67
68    def run(self, task: str, max_iterations: int = 10) -> str:
69        """Run the agent on a task."""
70
71        # Start chat with tools
72        self.chat = self.model.start_chat(
73            enable_automatic_function_calling=True,
74        )
75
76        # Configure tools
77        if self.tools:
78            tool_config = {
79                "function_declarations": self._get_tool_declarations()
80            }
81
82        # Agent loop
83        iteration = 0
84        messages = [task]
85
86        while iteration < max_iterations:
87            response = self.chat.send_message(
88                messages[-1] if isinstance(messages[-1], str) else messages[-1],
89            )
90
91            # Check for function calls
92            if response.candidates[0].content.parts:
93                for part in response.candidates[0].content.parts:
94                    if hasattr(part, "function_call"):
95                        # Execute function
96                        func_name = part.function_call.name
97                        func_args = dict(part.function_call.args)
98
99                        tool = self.tools.get(func_name)
100                        if tool:
101                            result = tool.function(**func_args)
102                            messages.append(f"Function {func_name} returned: {result}")
103                        else:
104                            messages.append(f"Unknown function: {func_name}")
105                    else:
106                        # Text response - we're done
107                        return part.text
108
109            iteration += 1
110
111        return "Max iterations reached"
112
113
114# Usage
115agent = CustomGeminiAgent(
116    system_prompt="You are a helpful coding assistant.",
117)
118
119agent.add_tool(Tool(
120    name="read_file",
121    description="Read a file's contents",
122    function=lambda path: open(path).read(),
123))
124
125agent.add_tool(Tool(
126    name="list_files",
127    description="List files in a directory",
128    function=lambda dir: str(os.listdir(dir)),
129))
130
131result = agent.run("List the Python files in src/ and read the main.py file")

Framework Selection

Use LangGraph for complex workflows with branching, CrewAI for role-based multi-agent systems, LangChain for quick prototyping, and custom implementations for maximum control.

Summary

Integrating Gemini with open-source frameworks:

LangGraph: Graph-based orchestration with Gemini reasoning
CrewAI: Role-based multi-agent teams using Gemini
LangChain: Standard chain and agent patterns
Custom agents: Direct Gemini API for full control
Flexibility: Mix and match frameworks with Gemini capabilities

Part II Complete: You now understand how the top AI agents work - Claude Code, OpenAI Codex, and Google Gemini. In Part III, we'll dive deep into the core patterns and techniques that power all agents.