Invoking A2A Agents
Learn how to invoke A2A agents through LiteLLM using different methods.
Want to test with your own agent? Deploy this template A2A agent powered by Google Gemini:
shin-bot-litellm/a2a-gemini-agent - Simple deployable A2A agent with streaming support
A2A SDK​
Use the A2A Python SDK to invoke agents through LiteLLM using the A2A protocol.
Non-Streaming​
This example shows how to:
- List available agents - Query
/v1/agentsto see which agents your key can access - Select an agent - Pick an agent from the list
- Invoke via A2A - Use the A2A protocol to send messages to the agent
from uuid import uuid4
import httpx
import asyncio
from a2a.client import A2ACardResolver, A2AClient
from a2a.types import MessageSendParams, SendMessageRequest
# === CONFIGURE THESE ===
LITELLM_BASE_URL = "http://localhost:4000" # Your LiteLLM proxy URL
LITELLM_VIRTUAL_KEY = "sk-1234" # Your LiteLLM Virtual Key
# =======================
async def main():
headers = {"Authorization": f"Bearer {LITELLM_VIRTUAL_KEY}"}
async with httpx.AsyncClient(headers=headers) as client:
# Step 1: List available agents
response = await client.get(f"{LITELLM_BASE_URL}/v1/agents")
agents = response.json()
print("Available agents:")
for agent in agents:
print(f" - {agent['agent_name']} (ID: {agent['agent_id']})")
if not agents:
print("No agents available for this key")
return
# Step 2: Select an agent and invoke it
selected_agent = agents[0]
agent_id = selected_agent["agent_id"]
agent_name = selected_agent["agent_name"]
print(f"\nInvoking: {agent_name}")
# Step 3: Use A2A protocol to invoke the agent
base_url = f"{LITELLM_BASE_URL}/a2a/{agent_id}"
resolver = A2ACardResolver(httpx_client=client, base_url=base_url)
agent_card = await resolver.get_agent_card()
a2a_client = A2AClient(httpx_client=client, agent_card=agent_card)
request = SendMessageRequest(
id=str(uuid4()),
params=MessageSendParams(
message={
"role": "user",
"parts": [{"kind": "text", "text": "Hello, what can you do?"}],
"messageId": uuid4().hex,
}
),
)
response = await a2a_client.send_message(request)
print(f"Response: {response.model_dump(mode='json', exclude_none=True, indent=4)}")
if __name__ == "__main__":
asyncio.run(main())
Streaming​
For streaming responses, use send_message_streaming:
from uuid import uuid4
import httpx
import asyncio
from a2a.client import A2ACardResolver, A2AClient
from a2a.types import MessageSendParams, SendStreamingMessageRequest
# === CONFIGURE THESE ===
LITELLM_BASE_URL = "http://localhost:4000" # Your LiteLLM proxy URL
LITELLM_VIRTUAL_KEY = "sk-1234" # Your LiteLLM Virtual Key
LITELLM_AGENT_NAME = "ij-local" # Agent name registered in LiteLLM
# =======================
async def main():
base_url = f"{LITELLM_BASE_URL}/a2a/{LITELLM_AGENT_NAME}"
headers = {"Authorization": f"Bearer {LITELLM_VIRTUAL_KEY}"}
async with httpx.AsyncClient(headers=headers) as httpx_client:
# Resolve agent card and create client
resolver = A2ACardResolver(httpx_client=httpx_client, base_url=base_url)
agent_card = await resolver.get_agent_card()
client = A2AClient(httpx_client=httpx_client, agent_card=agent_card)
# Send a streaming message
request = SendStreamingMessageRequest(
id=str(uuid4()),
params=MessageSendParams(
message={
"role": "user",
"parts": [{"kind": "text", "text": "Tell me a long story"}],
"messageId": uuid4().hex,
}
),
)
# Stream the response
async for chunk in client.send_message_streaming(request):
print(chunk.model_dump(mode="json", exclude_none=True))
if __name__ == "__main__":
asyncio.run(main())
/chat/completions API (OpenAI SDK)​
You can also invoke A2A agents using the familiar OpenAI SDK by using the a2a/ model prefix.
Non-Streaming​
- Python
- TypeScript
- cURL
import openai
client = openai.OpenAI(
api_key="sk-1234", # Your LiteLLM Virtual Key
base_url="http://localhost:4000" # Your LiteLLM proxy URL
)
response = client.chat.completions.create(
model="a2a/my-agent", # Use a2a/ prefix with your agent name
messages=[
{"role": "user", "content": "Hello, what can you do?"}
]
)
print(response.choices[0].message.content)
import OpenAI from 'openai';
const client = new OpenAI({
apiKey: 'sk-1234', // Your LiteLLM Virtual Key
baseURL: 'http://localhost:4000' // Your LiteLLM proxy URL
});
const response = await client.chat.completions.create({
model: 'a2a/my-agent', // Use a2a/ prefix with your agent name
messages: [
{ role: 'user', content: 'Hello, what can you do?' }
]
});
console.log(response.choices[0].message.content);
curl -X POST http://localhost:4000/v1/chat/completions \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "a2a/my-agent",
"messages": [
{"role": "user", "content": "Hello, what can you do?"}
]
}'
Streaming​
- Python
- TypeScript
- cURL
import openai
client = openai.OpenAI(
api_key="sk-1234", # Your LiteLLM Virtual Key
base_url="http://localhost:4000" # Your LiteLLM proxy URL
)
stream = client.chat.completions.create(
model="a2a/my-agent", # Use a2a/ prefix with your agent name
messages=[
{"role": "user", "content": "Tell me a long story"}
],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
import OpenAI from 'openai';
const client = new OpenAI({
apiKey: 'sk-1234', // Your LiteLLM Virtual Key
baseURL: 'http://localhost:4000' // Your LiteLLM proxy URL
});
const stream = await client.chat.completions.create({
model: 'a2a/my-agent', // Use a2a/ prefix with your agent name
messages: [
{ role: 'user', content: 'Tell me a long story' }
],
stream: true
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) {
process.stdout.write(content);
}
}
curl -X POST http://localhost:4000/v1/chat/completions \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "a2a/my-agent",
"messages": [
{"role": "user", "content": "Tell me a long story"}
],
"stream": true
}'
Task APIs (tasks/get, tasks/list, …)​
Agents that return a submitted task from message/send expect clients to poll with tasks/get. Call the same LiteLLM base URL with JSON-RPC:
curl -X POST "http://localhost:4000/a2a/${AGENT_ID}" \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"jsonrpc": "2.0",
"id": "req-2",
"method": "tasks/get",
"params": {"id": "TASK_ID_FROM_SEND_RESPONSE"}
}'
LiteLLM forwards tasks/get, tasks/list, tasks/cancel, push-notification methods, and agent/getAuthenticatedExtendedCard to the upstream agent URL. See the full method list in Supported A2A methods.
Key Differences​
| Method | Use Case | Advantages |
|---|---|---|
| A2A SDK | Native A2A protocol integration | • Full A2A protocol support • Access to task states and artifacts • Context management |
| OpenAI SDK | Familiar OpenAI-style interface | • Drop-in replacement for OpenAI calls • Easier migration from LLM to agent workflows • Works with existing OpenAI tooling |
When using the OpenAI SDK, always prefix your agent name with a2a/ (e.g., a2a/my-agent) to route requests to the A2A agent instead of an LLM provider.