Azure AI Agents Deep Research Tool Integration with Python Client

Code

1# pylint: disable=line-too-long,useless-suppression
2# ------------------------------------
3# Copyright (c) Microsoft Corporation.
4# Licensed under the MIT License.
5# ------------------------------------
6 
7"""
8DESCRIPTION:
9    This sample demonstrates how to use Agent operations with the Deep Research tool from
10    the Azure Agents service through the synchronous Python client. Deep Research issues
11    external Bing Search queries and invokes an LLM, so each run can take several minutes
12    to complete.
13 
14    For more information see the Deep Research Tool document: https://aka.ms/agents-deep-research
15 
16USAGE:
17    python sample_agents_deep_research.py
18 
19    Before running the sample:
20 
21    pip install azure-identity
22    pip install --pre azure-ai-projects
23 
24    Set this environment variables with your own values:
25    1) PROJECT_ENDPOINT - The Azure AI Project endpoint, as found in the Overview
26                          page of your Azure AI Foundry portal.
27    2) MODEL_DEPLOYMENT_NAME - The deployment name of the arbitration AI model, as found under the "Name" column in
28       the "Models + endpoints" tab in your Azure AI Foundry project.
29    3) DEEP_RESEARCH_MODEL_DEPLOYMENT_NAME - The deployment name of the Deep Research AI model, as found under the "Name" column in
30       the "Models + endpoints" tab in your Azure AI Foundry project.
31    4) BING_RESOURCE_NAME - The resource name of the Bing connection, you can find it in the "Connected resources" tab
32       in the Management Center of your AI Foundry project.
33"""
34 
35import os
36import time
37import re
38from typing import Optional, Dict, List
39from azure.ai.projects import AIProjectClient
40from azure.identity import DefaultAzureCredential
41from azure.ai.agents import AgentsClient
42from azure.ai.agents.models import DeepResearchTool, MessageRole, ThreadMessage
43 
44def convert_citations_to_superscript(markdown_content):
45    """
46    Convert citation markers in markdown content to HTML superscript format.
47 
48    This function finds citation patterns like [78:12+source] and converts them to
49    HTML superscript tags <sup>12</sup> for better formatting in markdown documents.
50    It also consolidates consecutive citations by sorting and deduplicating them.
51 
52    Args:
53        markdown_content (str): The markdown content containing citation markers
54 
55    Returns:
56        str: The markdown content with citations converted to HTML superscript format
57    """
58    # Pattern to match [number:number+source]
59    pattern = r"\u3010\d+:(\d+)\u2020source\u3011"
60 
61    # Replace with <sup>captured_number</sup>
62    def replacement(match):
63        citation_number = match.group(1)
64        return f"<sup>{citation_number}</sup>"
65 
66    # First, convert all citation markers to superscript
67    converted_text = re.sub(pattern, replacement, markdown_content)
68 
69    # Then, consolidate consecutive superscript citations
70    # Pattern to match multiple superscript tags with optional commas/spaces
71    # Matches: <sup>5</sup>,<sup>4</sup>,<sup>5</sup> or <sup>5</sup><sup>4</sup><sup>5</sup>
72    consecutive_pattern = r"(<sup>\d+</sup>)(\s*,?\s*<sup>\d+</sup>)+"
73 
74    def consolidate_and_sort_citations(match):
75        # Extract all citation numbers from the matched text
76        citation_text = match.group(0)
77        citation_numbers = re.findall(r"<sup>(\d+)</sup>", citation_text)
78 
79        # Convert to integers, remove duplicates, and sort
80        unique_sorted_citations = sorted(set(int(num) for num in citation_numbers))
81 
82        # If only one citation, return simple format
83        if len(unique_sorted_citations) == 1:
84            return f"<sup>{unique_sorted_citations[0]}</sup>"
85 
86        # If multiple citations, return comma-separated format
87        citation_list = ",".join(str(num) for num in unique_sorted_citations)
88        return f"<sup>{citation_list}</sup>"
89 
90    # Remove consecutive duplicate citations and sort them
91    final_text = re.sub(consecutive_pattern, consolidate_and_sort_citations, converted_text)
92 
93    return final_text
94 
95def fetch_and_print_new_agent_response(
96    thread_id: str,
97    agents_client: AgentsClient,
98    last_message_id: Optional[str] = None,
99    progress_filename: str = "research_progress.txt",
100) -> Optional[str]:
101    """
102    Fetch the interim agent responses and citations from a thread and write them to a file.
103 
104    Args:
105        thread_id (str): The ID of the thread to fetch messages from
106        agents_client (AgentsClient): The Azure AI agents client instance
107        last_message_id (Optional[str], optional): ID of the last processed message
108            to avoid duplicates. Defaults to None.
109        progress_filename (str, optional): Name of the file to write progress to.
110            Defaults to "research_progress.txt".
111 
112    Returns:
113        Optional[str]: The ID of the latest message if new content was found,
114            otherwise returns the last_message_id
115    """
116    response = agents_client.messages.get_last_message_by_role(
117        thread_id=thread_id,
118        role=MessageRole.AGENT,
119    )
120 
121    if not response or response.id == last_message_id:
122        return last_message_id  # No new content
123 
124    # If not a "cot_summary", return.
125    if not any(t.text.value.startswith("cot_summary:") for t in response.text_messages):
126        return last_message_id
127 
128    print("\nAgent response:")
129    agent_text = "\n".join(t.text.value.replace("cot_summary:", "Reasoning:") for t in response.text_messages)
130    print(agent_text)
131 
132    # Print citation annotations (if any)
133    for ann in response.url_citation_annotations:
134        print(f"URL Citation: [{ann.url_citation.title}]({ann.url_citation.url})")
135 
136    # Write progress to file
137    with open(progress_filename, "a", encoding="utf-8") as fp:
138        fp.write("\nAGENT>\n")
139        fp.write(agent_text)
140        fp.write("\n")
141 
142        for ann in response.url_citation_annotations:
143            fp.write(f"Citation: [{ann.url_citation.title}]({ann.url_citation.url})\n")
144 
145    return response.id
146 
147def create_research_summary(message: ThreadMessage, filepath: str = "research_report.md") -> None:
148    """
149    Create a formatted research report from an agent's thread message with numbered citations
150    and a references section.
151 
152    Args:
153        message (ThreadMessage): The thread message containing the agent's research response
154        filepath (str, optional): Path where the research summary will be saved.
155            Defaults to "research_report.md".
156 
157    Returns:
158        None: This function doesn't return a value, it writes to a file
159    """
160    if not message:
161        print("No message content provided, cannot create research report.")
162        return
163 
164    with open(filepath, "w", encoding="utf-8") as fp:
165        # Write text summary
166        text_summary = "\n\n".join([t.text.value.strip() for t in message.text_messages])
167        # Convert citations to superscript format
168        text_summary = convert_citations_to_superscript(text_summary)
169        fp.write(text_summary)
170 
171        # Write unique URL citations with numbered bullets, if present
172        if message.url_citation_annotations:
173            fp.write("\n\n## Citations\n")
174            seen_urls = set()
175            # Dictionary mapping full citation content to ordinal number
176            citations_ordinals: Dict[str, int] = {}
177            # List of citation URLs indexed by ordinal (0-based)
178            text_citation_list: List[str] = []
179 
180            for ann in message.url_citation_annotations:
181                url = ann.url_citation.url
182                title = ann.url_citation.title or url
183 
184                if url not in seen_urls:
185                    # Use the full annotation text as the key to avoid conflicts
186                    citation_key = ann.text if ann.text else f"fallback_{url}"
187 
188                    # Only add if this citation content hasn't been seen before
189                    if citation_key not in citations_ordinals:
190                        # Assign next available ordinal number (1-based for display)
191                        ordinal = len(text_citation_list) + 1
192                        citations_ordinals[citation_key] = ordinal
193                        text_citation_list.append(f"[{title}]({url})")
194 
195                    seen_urls.add(url)
196 
197            # Write citations in order they were added
198            for i, citation_text in enumerate(text_citation_list):
199                fp.write(f"{i + 1}. {citation_text}\n")
200 
201    print(f"Research report written to '{filepath}'.")
202 
203if __name__ == "__main__":
204    project_client = AIProjectClient(
205        endpoint=os.environ["PROJECT_ENDPOINT"],
206        credential=DefaultAzureCredential(),
207    )
208    # [START create_agent_with_deep_research_tool]
209    bing_connection = project_client.connections.get(name=os.environ["BING_RESOURCE_NAME"])
210 
211    # Initialize a Deep Research tool with Bing Connection ID and Deep Research model deployment name
212    deep_research_tool = DeepResearchTool(
213        bing_grounding_connection_id=bing_connection.id,
214        deep_research_model=os.environ["DEEP_RESEARCH_MODEL_DEPLOYMENT_NAME"],
215    )
216 
217    # Create Agent with the Deep Research tool and process Agent run
218    with project_client:
219 
220        with project_client.agents as agents_client:
221 
222            # Create a new agent that has the Deep Research tool attached.
223            # NOTE: To add Deep Research to an existing agent, fetch it with `get_agent(agent_id)` and then,
224            # update the agent with the Deep Research tool.
225            agent = agents_client.create_agent(
226                model=os.environ["MODEL_DEPLOYMENT_NAME"],
227                name="my-agent",
228                instructions="You are a helpful Agent that assists in researching scientific topics.",
229                tools=deep_research_tool.definitions,
230            )
231 
232            # [END create_agent_with_deep_research_tool]
233            print(f"Created agent, ID: {agent.id}")
234 
235            # Create thread for communication
236            thread = agents_client.threads.create()
237            print(f"Created thread, ID: {thread.id}")
238 
239            # Create message to thread
240            message = agents_client.messages.create(
241                thread_id=thread.id,
242                role="user",
243                content=(
244                    "Research the current state of studies on orca intelligence and orca language, including what is currently known about orcas' cognitive capabilities and communication systems."
245                ),
246            )
247            print(f"Created message, ID: {message.id}")
248 
249            print(f"Start processing the message... this may take a few minutes to finish. Be patient!")
250            # Poll the run as long as run status is queued or in progress
251            run = agents_client.runs.create(thread_id=thread.id, agent_id=agent.id)
252            last_message_id = None
253            while run.status in ("queued", "in_progress"):
254                time.sleep(1)
255                run = agents_client.runs.get(thread_id=thread.id, run_id=run.id)
256 
257                last_message_id = fetch_and_print_new_agent_response(
258                    thread_id=thread.id,
259                    agents_client=agents_client,
260                    last_message_id=last_message_id,
261                    progress_filename="research_progress.txt",
262                )
263                print(f"Run status: {run.status}")
264 
265            # Once the run is finished, print the final status and ID
266            print(f"Run finished with status: {run.status}, ID: {run.id}")
267 
268            if run.status == "failed":
269                print(f"Run failed: {run.last_error}")
270 
271            # Fetch the final message from the agent in the thread and create a research summary
272            final_message = agents_client.messages.get_last_message_by_role(thread_id=thread.id, role=MessageRole.AGENT)
273            if final_message:
274                create_research_summary(final_message)
275 
276            # Clean-up and delete the agent once the run is finished.
277            # NOTE: Comment out this line if you plan to reuse the agent later.
278            agents_client.delete_agent(agent.id)
279            print("Deleted agent")
Code

Tags

Practitioner

Build & Tooling