Gemini Architect Academy | Function Calling, ADK, MCP, Live API, Embeddings & plus encore
Structure minimale pour declarer une fonction appelable par Gemini.
from google.genai import types
get_weather = types.FunctionDeclaration(
name="get_weather",
description="Obtenir la meteo actuelle",
parameters=types.Schema(
type=types.Type.OBJECT,
properties={
"city": types.Schema(
type=types.Type.STRING,
description="Nom de la ville"
)
},
required=["city"]
)
)
items)properties)types.Schema(
type=types.Type.ARRAY,
items=types.Schema(type=types.Type.STRING),
description="Liste de villes"
)
Les fonctions sont regroupees dans un objet Tool avant d'etre passees au modele.
weather_tool = types.Tool(
function_declarations=[
get_weather,
set_temperature,
list_cities
]
)
response = client.models.generate_content(
model="gemini-2.5-flash",
contents="Meteo a Paris ?",
config=types.GenerateContentConfig(
tools=[weather_tool]
)
)
Gemini peut inferer le schema depuis une fonction Python avec docstring et type hints.
def get_weather(city: str, unit: str = "celsius") -> dict:
"""Obtenir la meteo pour une ville.
Args:
city: Nom de la ville
unit: Unite de temperature
"""
return {"temp": 22, "unit": unit}
# Passage direct - schema auto-genere
response = client.models.generate_content(
model="gemini-2.5-flash",
contents="Meteo Paris ?",
config=types.GenerateContentConfig(
tools=[get_weather]
)
)
Le modele decide librement s'il appelle une fonction ou repond directement en texte.
config = types.GenerateContentConfig(
tools=[weather_tool],
tool_config=types.ToolConfig(
function_calling_config=types.FunctionCallingConfig(
mode="AUTO"
)
)
)
Force le modele a toujours appeler une fonction. Optionnellement, restreindre a certaines fonctions.
tool_config=types.ToolConfig(
function_calling_config=types.FunctionCallingConfig(
mode="ANY",
allowed_function_names=[
"get_weather",
"get_forecast"
]
)
)
Empeche le modele d'appeler des fonctions. Utile pour du texte pur meme avec des outils configures.
tool_config=types.ToolConfig(
function_calling_config=types.FunctionCallingConfig(
mode="NONE"
)
)
# Le modele repondra uniquement en texte
Le mode ANY sans restriction permet l'appel de n'importe quelle fonction declaree.
Apres un appel au modele, verifier si la reponse contient un appel de fonction.
response = client.models.generate_content(
model="gemini-2.5-flash",
contents="Meteo a Lyon ?",
config=config
)
part = response.candidates[0].content.parts[0]
if part.function_call:
name = part.function_call.name
args = part.function_call.args
print(f"Appel: {name}({args})")
Executer la fonction localement puis renvoyer le resultat au modele.
# Executer la vraie fonction result = get_weather(**args) # Construire la reponse function_response = types.Part.from_function_response( name=name, response={"result": result} ) # Renvoyer au modele final = client.models.generate_content( model="gemini-2.5-flash", contents=[ user_msg, response.candidates[0].content, types.Content( parts=[function_response], role="function" ) ], config=config ) print(final.text)
history = [types.Content(
parts=[types.Part.from_text("Meteo Paris et Lyon")],
role="user"
)]
while True:
resp = client.models.generate_content(
model="gemini-2.5-flash",
contents=history, config=config
)
history.append(resp.candidates[0].content)
fc_parts = [p for p in resp.candidates[0]
.content.parts if p.function_call]
if not fc_parts:
break # Reponse texte finale
fn_responses = []
for p in fc_parts:
result = dispatch(p.function_call.name,
p.function_call.args)
fn_responses.append(
types.Part.from_function_response(
name=p.function_call.name,
response={"result": result}
))
history.append(types.Content(
parts=fn_responses, role="function"
))
Le SDK Python peut executer automatiquement les fonctions si vous passez des callables directement.
# Automatic Function Calling def get_weather(city: str) -> dict: """Obtenir la meteo.""" return {"temp": 20, "city": city} response = client.models.generate_content( model="gemini-2.5-flash", contents="Meteo a Marseille ?", config=types.GenerateContentConfig( tools=[get_weather] # SDK execute auto la boucle ) ) print(response.text) # Reponse finale directe
Gemini peut appeler plusieurs fonctions en une seule reponse quand elles sont independantes.
# Prompt: "Meteo a Paris ET a Tokyo" # Gemini retourne 2 function_call dans parts[] parts = response.candidates[0].content.parts for part in parts: if part.function_call: print(part.function_call.name) print(part.function_call.args) # Output: # get_weather {"city": "Paris"} # get_weather {"city": "Tokyo"}
Le resultat d'une fonction sert d'input a une autre, dans un seul tour.
# "Traduis la meteo de Paris en japonais" # Gemini chaine automatiquement: # 1. get_weather(city="Paris") # 2. translate(text=result, lang="ja") # Les parts contiennent les 2 appels # ordonnances avec dependances
# Collecter toutes les reponses paralleles
import asyncio
async def execute_parallel(fc_parts):
tasks = []
for p in fc_parts:
fn = FUNCTION_MAP[p.function_call.name]
tasks.append(
asyncio.to_thread(fn, **p.function_call.args)
)
results = await asyncio.gather(*tasks)
return [
types.Part.from_function_response(
name=fc_parts[i].function_call.name,
response={"result": r}
)
for i, r in enumerate(results)
]
Gemini peut ecrire et executer du code Python dans un sandbox securise.
response = client.models.generate_content(
model="gemini-2.5-flash",
contents="Calcule les 20 premiers Fibonacci",
config=types.GenerateContentConfig(
tools=[types.Tool(
code_execution=types.ToolCodeExecution()
)]
)
)
for part in response.candidates[0].content.parts:
if part.executable_code:
print("=== CODE ===")
print(part.executable_code.code)
if part.code_execution_result:
print("=== RESULTAT ===")
print(part.code_execution_result.output)
if part.text:
print("=== ANALYSE ===")
print(part.text)
response = client.models.generate_content(
model="gemini-2.5-flash",
contents="""Cree un graphique des ventes:
Jan: 100, Fev: 150, Mar: 130, Avr: 200""",
config=types.GenerateContentConfig(
tools=[types.Tool(
code_execution=types.ToolCodeExecution()
)]
)
)
# Le modele genere du matplotlib
# et retourne l'image en base64
# dans inline_data.mime_type="image/png"
Permet a Gemini de rechercher sur le web pour des informations a jour.
response = client.models.generate_content(
model="gemini-2.5-flash",
contents="Derniers resultats Ligue 1 ?",
config=types.GenerateContentConfig(
tools=[types.Tool(
google_search=types.GoogleSearch()
)]
)
)
Le modele decide automatiquement s'il a besoin de chercher ou non.
# Gemini 1.5 - seuil configurable google_search_retrieval=types.GoogleSearchRetrieval( dynamic_retrieval_config=types.DynamicRetrievalConfig( mode="MODE_DYNAMIC", dynamic_threshold=0.5 ) ) # Gemini 2.0+ - dynamique automatique # Utiliser simplement GoogleSearch() # Le modele gere le seuil lui-meme
grounding = response.candidates[0]\
.grounding_metadata
# Sources web
if grounding and grounding.grounding_chunks:
for chunk in grounding.grounding_chunks:
if chunk.web:
print(f"Source: {chunk.web.title}")
print(f"URL: {chunk.web.uri}")
# Support de recherche
if grounding and grounding.search_entry_point:
html = grounding.search_entry_point\
.rendered_content
# HTML a afficher pour les citations
Permet a Gemini de lire et extraire le contenu d'URLs fournies.
response = client.models.generate_content(
model="gemini-2.5-flash",
contents="""Resume cet article:
https://example.com/article-ia""",
config=types.GenerateContentConfig(
tools=[types.Tool(
url_context=types.UrlContext()
)]
)
)
response = client.models.generate_content(
model="gemini-2.5-flash",
contents="Extrais les prix de https://shop.com",
config=types.GenerateContentConfig(
tools=[types.Tool(
url_context=types.UrlContext()
)],
response_mime_type="application/json",
response_schema={
"type": "array",
"items": {
"type": "object",
"properties": {
"produit": {"type": "string"},
"prix": {"type": "number"}
}
}
}
)
)
Gemini peut utiliser Search, Code Exec et Function Calling ensemble.
config = types.GenerateContentConfig(
tools=[
# Google Search
types.Tool(google_search=types.GoogleSearch()),
# Code Execution
types.Tool(
code_execution=types.ToolCodeExecution()
),
# Function Calling
types.Tool(
function_declarations=[save_to_db]
)
]
)
Tool separe# Prompt complexe multi-outils response = client.models.generate_content( model="gemini-2.5-pro", contents=""" 1. Cherche le cours actuel du Bitcoin 2. Calcule la variation sur 30 jours 3. Genere un graphique de tendance """, config=types.GenerateContentConfig( tools=[ types.Tool( google_search=types.GoogleSearch() ), types.Tool( code_execution=types.ToolCodeExecution() ) ] ) ) # Gemini: Search -> Code Exec auto
L'agent principal du framework Agent Development Kit de Google.
from google.adk.agents import LlmAgent
agent = LlmAgent(
name="assistant_voyage",
model="gemini-2.5-flash",
instruction="""Tu es un assistant voyage.
Tu aides a planifier des itineraires.
Utilise les outils disponibles.""",
tools=[search_flights, book_hotel],
description="Agent de planification voyage"
)
mon_agent/ __init__.py # Expose root_agent agent.py # Definition de l'agent tools.py # Fonctions outils prompts.py # Instructions longues .env # GOOGLE_API_KEY # __init__.py from .agent import root_agent # Lancer avec ADK CLI adk run mon_agent adk web mon_agent # Interface web
# Instruction avec acces au state def dynamic_instruction(context): user = context.state.get("user_name", "ami") lang = context.state.get("language", "fr") return f"""Tu es un assistant pour {user}. Reponds en {lang}. Historique d'achats: { context.state.get('purchases', []) }""" agent = LlmAgent( name="personal_assistant", model="gemini-2.5-flash", instruction=dynamic_instruction, tools=[search_products] )
Encapsule une fonction Python comme outil ADK.
from google.adk.tools import FunctionTool def search_flights( origin: str, destination: str, date: str ) -> dict: """Rechercher des vols disponibles.""" return {"flights": [...], "count": 5} # Enregistrement explicite flight_tool = FunctionTool(func=search_flights) # Ou passage direct (auto-wrap) agent = LlmAgent( tools=[search_flights] # Auto FunctionTool )
from google.adk.tools import ( google_search, code_execution ) agent = LlmAgent( name="researcher", model="gemini-2.5-flash", instruction="Recherche et analyse des donnees", tools=[ google_search, # Google Search code_execution, # Code sandbox my_custom_tool # Outil custom ] )
from google.adk.tools import ToolContext
def save_preference(
key: str,
value: str,
tool_context: ToolContext
) -> str:
"""Sauvegarde une preference utilisateur."""
# Acces au state de la session
tool_context.state[key] = value
# Actions disponibles dans ToolContext:
# tool_context.state - Lire/ecrire state
# tool_context.actions - Controle du flux
return f"Preference {key}={value} sauvegardee"
from google.adk.tools import AgentTool # Agent specialise math_agent = LlmAgent( name="math_expert", model="gemini-2.5-flash", instruction="Tu resous des problemes maths", tools=[code_execution] ) # Utilise comme outil d'un autre agent main_agent = LlmAgent( name="assistant", model="gemini-2.5-flash", instruction="Assistant general", tools=[ AgentTool(agent=math_agent), google_search ] )
Stockage de session en memoire (developpement/test).
from google.adk.sessions import ( InMemorySessionService ) session_service = InMemorySessionService() # Creer une session session = await session_service.create_session( app_name="mon_app", user_id="user_123", state={ "language": "fr", "user_name": "Jean" } ) print(session.id) # UUID genere
from google.adk.sessions import ( DatabaseSessionService ) # SQLite session_service = DatabaseSessionService( db_url="sqlite:///sessions.db" ) # PostgreSQL session_service = DatabaseSessionService( db_url="postgresql://user:pass@host/db" ) # Memes methodes que InMemory session = await session_service.create_session( app_name="mon_app", user_id="user_456" )
# Dans un outil - via ToolContext def add_to_cart( item: str, tool_context: ToolContext ) -> str: cart = tool_context.state.get("cart", []) cart.append(item) tool_context.state["cart"] = cart return f"{item} ajoute au panier" # Dans une instruction dynamique def instruction(context): cart = context.state.get("cart", []) return f"Panier actuel: {cart}" # Prefixes de state : # "app:" - partage entre sessions # "user:" - specifique a l'utilisateur # sans prefixe - specifique a la session
from google.adk.runners import Runner from google.genai import types runner = Runner( agent=root_agent, app_name="mon_app", session_service=session_service ) # Envoyer un message content = types.Content( parts=[types.Part.from_text("Bonjour!")], role="user" ) events = runner.run( user_id="user_123", session_id=session.id, new_message=content ) async for event in events: if event.content and event.content.parts: print(event.content.parts[0].text)
Memoire longue duree pour les agents ADK, persistante entre les sessions.
from google.adk.memory import ( InMemoryMemoryService ) memory_service = InMemoryMemoryService() runner = Runner( agent=root_agent, app_name="mon_app", session_service=session_service, memory_service=memory_service )
from google.adk.memory import ( VertexAiRagMemoryService ) memory_service = VertexAiRagMemoryService( rag_corpus="projects/my-project/" "locations/us-central1/" "ragCorpora/my-corpus" ) # Stockage automatique apres chaque session # Recherche semantique dans l'historique # Pertinence basee sur embeddings
# La memoire est auto-injectee # dans le contexte de l'agent # Cycle de vie : # 1. Fin de session -> sauvegarde memoire # 2. Nouvelle session -> recherche memoire # 3. Resultats pertinents injectes # dans le contexte de l'agent # Configurer output_key pour sauvegarder # des resultats specifiques agent = LlmAgent( name="agent", model="gemini-2.5-flash", output_key="last_response", instruction="Tu es un assistant.", tools=[...] )
Execute les sous-agents dans l'ordre, un par un.
from google.adk.agents import SequentialAgent pipeline = SequentialAgent( name="pipeline_article", sub_agents=[ researcher, # 1. Recherche writer, # 2. Redaction reviewer, # 3. Relecture publisher # 4. Publication ] ) # Chaque agent recoit la sortie du precedent # via session.state et output_key
Execute tous les sous-agents simultanement.
from google.adk.agents import ParallelAgent multi_search = ParallelAgent( name="recherche_parallele", sub_agents=[ news_searcher, # Actualites academic_searcher, # Articles social_searcher # Reseaux sociaux ] ) # Tous executent en parallele # Resultats agregables via output_key
from google.adk.agents import LoopAgent refinement = LoopAgent( name="amelioration_iterative", sub_agents=[ writer, # Ecrit / ameliore critic # Evalue la qualite ], max_iterations=5 ) # Le critic peut arreter la boucle via # tool_context.actions.escalate # quand la qualite est satisfaisante
# Agent root avec delegation root = LlmAgent( name="orchestrator", model="gemini-2.5-pro", instruction="""Tu diriges l'equipe. Delegue aux agents specialises.""", sub_agents=[ travel_agent, finance_agent, support_agent ] ) # Transfer via instruction naturelle # "Transfere a travel_agent pour les vols" # L'orchestrateur decide la delegation # basee sur la description des sub_agents
Connecter des serveurs MCP (Model Context Protocol) aux agents ADK.
from google.adk.tools.mcp import MCPToolset from mcp import StdioServerParameters # Serveur MCP local (stdio) mcp_tools, cleanup = await MCPToolset.from_server( connection_params=StdioServerParameters( command="npx", args=["-y", "@modelcontextprotocol/" "server-filesystem", "/path/to/allowed/dir"] ) ) agent = LlmAgent( name="file_agent", tools=mcp_tools # Outils MCP )
from mcp import SseServerParameters mcp_tools, cleanup = await MCPToolset.from_server( connection_params=SseServerParameters( url="https://mcp-server.example.com/sse", headers={ "Authorization": "Bearer TOKEN" } ) ) # Nettoyage en fin d'utilisation # await cleanup()
# Filtrer les outils MCP exposes mcp_tools, cleanup = await MCPToolset.from_server( connection_params=StdioServerParameters( command="npx", args=["-y", "@mcp/server-github"] ), tool_filter=["create_issue", "list_repos"] ) # Combiner MCP + outils natifs agent = LlmAgent( name="dev_agent", tools=[ *mcp_tools, # MCP google_search, # Built-in my_custom_func # Custom ] )
# Installation globale npm install -g @anthropic-ai/gemini-cli # Ou avec npx (sans installation) npx @google/gemini-cli # Lancement interactif gemini # Avec un prompt direct gemini "Explique les closures en JS" # Mode non-interactif (pipe) echo "Resous x^2-4=0" | gemini
// ~/.gemini/settings.json
{
"mcpServers": {
"filesystem": {
"command": "npx",
"args": [
"-y",
"@modelcontextprotocol/server-filesystem",
"/home/user/projects"
]
},
"github": {
"command": "npx",
"args": ["-y", "@mcp/server-github"],
"env": {
"GITHUB_TOKEN": "ghp_xxx"
}
}
}
}
# Fichier GEMINI.md a la racine du projet # Charge automatiquement comme contexte # Mon Projet ## Stack technique - Python 3.12, FastAPI, PostgreSQL ## Conventions - PEP 8, docstrings Google style - Tests dans /tests, pytest ## Commandes - `make run` - Lancer le serveur - `make test` - Lancer les tests # Gemini CLI lit ce fichier automatiquement # et l'utilise comme system instruction
Streaming bidirectionnel pour audio, video et texte en temps reel.
from google import genai client = genai.Client(api_key="API_KEY") config = { "response_modalities": ["AUDIO"], "speech_config": { "voice_config": { "prebuilt_voice_config": { "voice_name": "Puck" } } } } async with client.aio.live.connect( model="gemini-2.0-flash-live", config=config ) as session: await session.send("Bonjour!", end_of_turn=True) async for msg in session.receive(): print(msg)
# Audio en temps reel (microphone)
import pyaudio
stream = pyaudio.PyAudio().open(
format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True,
frames_per_buffer=1024
)
while True:
data = stream.read(1024)
await session.send(
types.LiveClientRealtimeInput(
media_chunks=[types.Blob(
data=data,
mime_type="audio/pcm;rate=16000"
)]
)
)
# Interruption # Envoyer de l'audio pendant que le modele # parle = interruption automatique # Context window # 32K tokens de contexte pour Live API # Compression automatique de l'historique # Reconnexion # Pas de resume de session natif # Implementer cote client si necessaire # Timeout # Session max : ~15 min d'audio continu # Configurable via session_config
config = {
"response_modalities": ["AUDIO"],
"tools": [
{"function_declarations": [{
"name": "get_weather",
"description": "Obtenir meteo",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string"}
}
}
}]},
{"google_search": {}},
{"code_execution": {}}
]
}
# Function calling fonctionne en streaming!
config = types.LiveConnectConfig(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=
types.PrebuiltVoiceConfig(
voice_name="Kore"
)
)
)
)
# Multimodal : texte + audio
config.response_modalities = ["AUDIO", "TEXT"]
config = types.LiveConnectConfig(
response_modalities=["AUDIO"],
system_instruction="""Tu es un assistant
vocal francophone. Tu parles de maniere
naturelle et conversationnelle.
Tu utilises des expressions francaises.
Tu adaptes ton debit au sujet.""",
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=
types.PrebuiltVoiceConfig(
voice_name="Aoede"
)
)
)
)
result = client.models.embed_content(
model="gemini-embedding-001",
contents="Intelligence artificielle",
config=types.EmbedContentConfig(
task_type="SEMANTIC_SIMILARITY",
output_dimensionality=768
)
)
embedding = result.embeddings[0].values
print(f"Dimensions: {len(embedding)}")
# 768, 1536 ou 3072 selon config
Matryoshka Representation Learning : reduire les dimensions sans re-entrainer.
# Haute precision - 3072 dimensions config_hq = types.EmbedContentConfig( task_type="RETRIEVAL_DOCUMENT", output_dimensionality=3072 ) # Equilibre - 1536 dimensions config_mid = types.EmbedContentConfig( task_type="RETRIEVAL_DOCUMENT", output_dimensionality=1536 ) # Leger / rapide - 768 dimensions config_fast = types.EmbedContentConfig( task_type="RETRIEVAL_DOCUMENT", output_dimensionality=768 ) # Meme modele, trade-off qualite/vitesse
# Embedder plusieurs textes d'un coup documents = [ "L'IA transforme l'industrie", "Python est populaire en data science", "Les LLMs comprennent le langage" ] result = client.models.embed_content( model="gemini-embedding-001", contents=documents, config=types.EmbedContentConfig( task_type="RETRIEVAL_DOCUMENT", output_dimensionality=768 ) ) for i, emb in enumerate(result.embeddings): print(f"Doc {i}: {len(emb.values)} dims") # Max 2048 textes par requete # Max 2048 tokens par texte
// requests.jsonl - une requete par ligne
{"custom_id":"req-1","request":{
"model":"gemini-2.5-flash",
"contents":[{"role":"user","parts":[
{"text":"Resume l'histoire de France"}
]}],
"config":{"max_output_tokens":500}
}}
{"custom_id":"req-2","request":{
"model":"gemini-2.5-flash",
"contents":[{"role":"user","parts":[
{"text":"Explique la relativite"}
]}],
"config":{"max_output_tokens":500}
}}
# Uploader le fichier JSONL batch_file = client.files.upload( file="requests.jsonl" ) # Creer le job batch batch_job = client.batches.create( model="gemini-2.5-flash", src=batch_file.uri, config=types.CreateBatchJobConfig( display_name="mon_batch_001" ) ) print(f"Job ID: {batch_job.name}") print(f"Status: {batch_job.state}") # PENDING -> RUNNING -> SUCCEEDED
# Verifier le statut import time while True: job = client.batches.get(name=batch_job.name) print(f"Status: {job.state}") if job.state in ["SUCCEEDED", "FAILED"]: break time.sleep(60) # Recuperer les resultats if job.state == "SUCCEEDED": results = client.batches.list_results( name=batch_job.name ) for result in results: print(f"{result.custom_id}:") print(result.response.text)
from google.genai import types
config = types.GenerateContentConfig(
safety_settings=[
types.SafetySetting(
category="HARM_CATEGORY_HARASSMENT",
threshold="BLOCK_LOW_AND_ABOVE"
),
types.SafetySetting(
category="HARM_CATEGORY_HATE_SPEECH",
threshold="BLOCK_MEDIUM_AND_ABOVE"
),
types.SafetySetting(
category="HARM_CATEGORY_DANGEROUS_CONTENT",
threshold="BLOCK_ONLY_HIGH"
)
]
)
# Seuils disponibles :
# BLOCK_NONE / BLOCK_ONLY_HIGH
# BLOCK_MEDIUM_AND_ABOVE
# BLOCK_LOW_AND_ABOVE
// dataset.jsonl - Format d'entrainement
{"contents": [
{"role": "user", "parts": [
{"text": "Traduis: Hello"}
]},
{"role": "model", "parts": [
{"text": "Bonjour"}
]}
]}
{"contents": [
{"role": "user", "parts": [
{"text": "Traduis: Goodbye"}
]},
{"role": "model", "parts": [
{"text": "Au revoir"}
]}
]}
# Creer un job de fine-tuning tuning_job = client.tunings.tune( base_model="models/gemini-2.0-flash-001", training_dataset=types.TuningDataset( gcs_uri="gs://bucket/dataset.jsonl" # ou inline_examples=[...] ), config=types.CreateTuningJobConfig( epoch_count=5, learning_rate=0.001, tuned_model_display_name="mon-modele-v1" ) ) # Utiliser le modele fine-tune response = client.models.generate_content( model=tuning_job.tuned_model.model, contents="Traduis: Good morning" ) # Min 20 exemples, recommande 100+