1# pylint: disable=line-too-long,useless-suppression
2# ------------------------------------
3# Copyright (c) Microsoft Corporation.
4# Licensed under the MIT License.
5# ------------------------------------
6
7"""
8DESCRIPTION:
9 This sample demonstrates how to use Agent operations with the Deep Research tool from
10 the Azure Agents service through the synchronous Python client. Deep Research issues
11 external Bing Search queries and invokes an LLM, so each run can take several minutes
12 to complete.
13
14 For more information see the Deep Research Tool document: https://aka.ms/agents-deep-research
15
16USAGE:
17 python sample_agents_deep_research.py
18
19 Before running the sample:
20
21 pip install azure-identity
22 pip install --pre azure-ai-projects
23
24 Set this environment variables with your own values:
25 1) PROJECT_ENDPOINT - The Azure AI Project endpoint, as found in the Overview
26 page of your Azure AI Foundry portal.
27 2) MODEL_DEPLOYMENT_NAME - The deployment name of the arbitration AI model, as found under the "Name" column in
28 the "Models + endpoints" tab in your Azure AI Foundry project.
29 3) DEEP_RESEARCH_MODEL_DEPLOYMENT_NAME - The deployment name of the Deep Research AI model, as found under the "Name" column in
30 the "Models + endpoints" tab in your Azure AI Foundry project.
31 4) BING_RESOURCE_NAME - The resource name of the Bing connection, you can find it in the "Connected resources" tab
32 in the Management Center of your AI Foundry project.
33"""
34
35import os
36import time
37import re
38from typing import Optional, Dict, List
39from azure.ai.projects import AIProjectClient
40from azure.identity import DefaultAzureCredential
41from azure.ai.agents import AgentsClient
42from azure.ai.agents.models import DeepResearchTool, MessageRole, ThreadMessage
43
44def convert_citations_to_superscript(markdown_content):
45 """
46 Convert citation markers in markdown content to HTML superscript format.
47
48 This function finds citation patterns like [78:12+source] and converts them to
49 HTML superscript tags <sup>12</sup> for better formatting in markdown documents.
50 It also consolidates consecutive citations by sorting and deduplicating them.
51
52 Args:
53 markdown_content (str): The markdown content containing citation markers
54
55 Returns:
56 str: The markdown content with citations converted to HTML superscript format
57 """
58 # Pattern to match [number:number+source]
59 pattern = r"\u3010\d+:(\d+)\u2020source\u3011"
60
61 # Replace with <sup>captured_number</sup>
62 def replacement(match):
63 citation_number = match.group(1)
64 return f"<sup>{citation_number}</sup>"
65
66 # First, convert all citation markers to superscript
67 converted_text = re.sub(pattern, replacement, markdown_content)
68
69 # Then, consolidate consecutive superscript citations
70 # Pattern to match multiple superscript tags with optional commas/spaces
71 # Matches: <sup>5</sup>,<sup>4</sup>,<sup>5</sup> or <sup>5</sup><sup>4</sup><sup>5</sup>
72 consecutive_pattern = r"(<sup>\d+</sup>)(\s*,?\s*<sup>\d+</sup>)+"
73
74 def consolidate_and_sort_citations(match):
75 # Extract all citation numbers from the matched text
76 citation_text = match.group(0)
77 citation_numbers = re.findall(r"<sup>(\d+)</sup>", citation_text)
78
79 # Convert to integers, remove duplicates, and sort
80 unique_sorted_citations = sorted(set(int(num) for num in citation_numbers))
81
82 # If only one citation, return simple format
83 if len(unique_sorted_citations) == 1:
84 return f"<sup>{unique_sorted_citations[0]}</sup>"
85
86 # If multiple citations, return comma-separated format
87 citation_list = ",".join(str(num) for num in unique_sorted_citations)
88 return f"<sup>{citation_list}</sup>"
89
90 # Remove consecutive duplicate citations and sort them
91 final_text = re.sub(consecutive_pattern, consolidate_and_sort_citations, converted_text)
92
93 return final_text
94
95def fetch_and_print_new_agent_response(
96 thread_id: str,
97 agents_client: AgentsClient,
98 last_message_id: Optional[str] = None,
99 progress_filename: str = "research_progress.txt",
100) -> Optional[str]:
101 """
102 Fetch the interim agent responses and citations from a thread and write them to a file.
103
104 Args:
105 thread_id (str): The ID of the thread to fetch messages from
106 agents_client (AgentsClient): The Azure AI agents client instance
107 last_message_id (Optional[str], optional): ID of the last processed message
108 to avoid duplicates. Defaults to None.
109 progress_filename (str, optional): Name of the file to write progress to.
110 Defaults to "research_progress.txt".
111
112 Returns:
113 Optional[str]: The ID of the latest message if new content was found,
114 otherwise returns the last_message_id
115 """
116 response = agents_client.messages.get_last_message_by_role(
117 thread_id=thread_id,
118 role=MessageRole.AGENT,
119 )
120
121 if not response or response.id == last_message_id:
122 return last_message_id # No new content
123
124 # If not a "cot_summary", return.
125 if not any(t.text.value.startswith("cot_summary:") for t in response.text_messages):
126 return last_message_id
127
128 print("\nAgent response:")
129 agent_text = "\n".join(t.text.value.replace("cot_summary:", "Reasoning:") for t in response.text_messages)
130 print(agent_text)
131
132 # Print citation annotations (if any)
133 for ann in response.url_citation_annotations:
134 print(f"URL Citation: [{ann.url_citation.title}]({ann.url_citation.url})")
135
136 # Write progress to file
137 with open(progress_filename, "a", encoding="utf-8") as fp:
138 fp.write("\nAGENT>\n")
139 fp.write(agent_text)
140 fp.write("\n")
141
142 for ann in response.url_citation_annotations:
143 fp.write(f"Citation: [{ann.url_citation.title}]({ann.url_citation.url})\n")
144
145 return response.id
146
147def create_research_summary(message: ThreadMessage, filepath: str = "research_report.md") -> None:
148 """
149 Create a formatted research report from an agent's thread message with numbered citations
150 and a references section.
151
152 Args:
153 message (ThreadMessage): The thread message containing the agent's research response
154 filepath (str, optional): Path where the research summary will be saved.
155 Defaults to "research_report.md".
156
157 Returns:
158 None: This function doesn't return a value, it writes to a file
159 """
160 if not message:
161 print("No message content provided, cannot create research report.")
162 return
163
164 with open(filepath, "w", encoding="utf-8") as fp:
165 # Write text summary
166 text_summary = "\n\n".join([t.text.value.strip() for t in message.text_messages])
167 # Convert citations to superscript format
168 text_summary = convert_citations_to_superscript(text_summary)
169 fp.write(text_summary)
170
171 # Write unique URL citations with numbered bullets, if present
172 if message.url_citation_annotations:
173 fp.write("\n\n## Citations\n")
174 seen_urls = set()
175 # Dictionary mapping full citation content to ordinal number
176 citations_ordinals: Dict[str, int] = {}
177 # List of citation URLs indexed by ordinal (0-based)
178 text_citation_list: List[str] = []
179
180 for ann in message.url_citation_annotations:
181 url = ann.url_citation.url
182 title = ann.url_citation.title or url
183
184 if url not in seen_urls:
185 # Use the full annotation text as the key to avoid conflicts
186 citation_key = ann.text if ann.text else f"fallback_{url}"
187
188 # Only add if this citation content hasn't been seen before
189 if citation_key not in citations_ordinals:
190 # Assign next available ordinal number (1-based for display)
191 ordinal = len(text_citation_list) + 1
192 citations_ordinals[citation_key] = ordinal
193 text_citation_list.append(f"[{title}]({url})")
194
195 seen_urls.add(url)
196
197 # Write citations in order they were added
198 for i, citation_text in enumerate(text_citation_list):
199 fp.write(f"{i + 1}. {citation_text}\n")
200
201 print(f"Research report written to '{filepath}'.")
202
203if __name__ == "__main__":
204 project_client = AIProjectClient(
205 endpoint=os.environ["PROJECT_ENDPOINT"],
206 credential=DefaultAzureCredential(),
207 )
208 # [START create_agent_with_deep_research_tool]
209 bing_connection = project_client.connections.get(name=os.environ["BING_RESOURCE_NAME"])
210
211 # Initialize a Deep Research tool with Bing Connection ID and Deep Research model deployment name
212 deep_research_tool = DeepResearchTool(
213 bing_grounding_connection_id=bing_connection.id,
214 deep_research_model=os.environ["DEEP_RESEARCH_MODEL_DEPLOYMENT_NAME"],
215 )
216
217 # Create Agent with the Deep Research tool and process Agent run
218 with project_client:
219
220 with project_client.agents as agents_client:
221
222 # Create a new agent that has the Deep Research tool attached.
223 # NOTE: To add Deep Research to an existing agent, fetch it with `get_agent(agent_id)` and then,
224 # update the agent with the Deep Research tool.
225 agent = agents_client.create_agent(
226 model=os.environ["MODEL_DEPLOYMENT_NAME"],
227 name="my-agent",
228 instructions="You are a helpful Agent that assists in researching scientific topics.",
229 tools=deep_research_tool.definitions,
230 )
231
232 # [END create_agent_with_deep_research_tool]
233 print(f"Created agent, ID: {agent.id}")
234
235 # Create thread for communication
236 thread = agents_client.threads.create()
237 print(f"Created thread, ID: {thread.id}")
238
239 # Create message to thread
240 message = agents_client.messages.create(
241 thread_id=thread.id,
242 role="user",
243 content=(
244 "Research the current state of studies on orca intelligence and orca language, including what is currently known about orcas' cognitive capabilities and communication systems."
245 ),
246 )
247 print(f"Created message, ID: {message.id}")
248
249 print(f"Start processing the message... this may take a few minutes to finish. Be patient!")
250 # Poll the run as long as run status is queued or in progress
251 run = agents_client.runs.create(thread_id=thread.id, agent_id=agent.id)
252 last_message_id = None
253 while run.status in ("queued", "in_progress"):
254 time.sleep(1)
255 run = agents_client.runs.get(thread_id=thread.id, run_id=run.id)
256
257 last_message_id = fetch_and_print_new_agent_response(
258 thread_id=thread.id,
259 agents_client=agents_client,
260 last_message_id=last_message_id,
261 progress_filename="research_progress.txt",
262 )
263 print(f"Run status: {run.status}")
264
265 # Once the run is finished, print the final status and ID
266 print(f"Run finished with status: {run.status}, ID: {run.id}")
267
268 if run.status == "failed":
269 print(f"Run failed: {run.last_error}")
270
271 # Fetch the final message from the agent in the thread and create a research summary
272 final_message = agents_client.messages.get_last_message_by_role(thread_id=thread.id, role=MessageRole.AGENT)
273 if final_message:
274 create_research_summary(final_message)
275
276 # Clean-up and delete the agent once the run is finished.
277 # NOTE: Comment out this line if you plan to reuse the agent later.
278 agents_client.delete_agent(agent.id)
279 print("Deleted agent")