agentic.py (13979B)
1 """ 2 Agentic harness that checks a given diff for issues using coding agents with tools. 3 """ 4 5 import concurrent.futures 6 import json 7 from concurrent.futures import ThreadPoolExecutor 8 from functools import cached_property 9 from typing import Any 10 from typing import Generator 11 12 import jinja2 13 from loguru import logger 14 15 from vet.imbue_core.agents.agent_api.data_types import AgentMessage 16 from vet.imbue_core.agents.agent_api.data_types import AgentOptions 17 from vet.imbue_core.agents.agent_api.errors import AgentCLINotFoundError 18 from vet.imbue_core.async_monkey_patches import log_exception 19 from vet.imbue_core.data_types import AgenticPhase 20 from vet.imbue_core.data_types import IssueCode 21 from vet.imbue_core.data_types import IssueIdentificationDebugInfo 22 from vet.imbue_core.data_types import IssueIdentificationLLMResponseMetadata 23 from vet.imbue_core.data_types import LLMResponse 24 from vet.imbue_tools.get_conversation_history.input_data_types import CommitInputs 25 from vet.imbue_tools.repo_utils.context_utils import escape_prompt_markers 26 from vet.imbue_tools.repo_utils.project_context import ProjectContext 27 from vet.imbue_tools.types.vet_config import VetConfig 28 from vet.issue_identifiers.base import IssueIdentifier 29 from vet.issue_identifiers.common import GeneratedIssueSchema 30 from vet.issue_identifiers.common import GeneratedResponseSchema 31 from vet.issue_identifiers.common import extract_invocation_info_from_messages 32 from vet.issue_identifiers.common import format_issue_identification_guide_for_llm 33 from vet.issue_identifiers.common import generate_issues_from_response_texts 34 from vet.issue_identifiers.common import generate_response_from_agent 35 from vet.issue_identifiers.common import get_agent_options 36 from vet.issue_identifiers.harnesses.base import IssueIdentifierHarness 37 from vet.issue_identifiers.identification_guides import IssueIdentificationGuide 38 39 PROMPT_TEMPLATE = """You are analyzing a code repository for potential issues. The repository files are available in {{ repo_path }}. 40 41 Assume that a user requested work to be done and a programmer delivered the diff below. 42 The changes from the diff are present in the codebase but are not yet committed. 43 44 ### User request ### 45 {% filter indent(width=2) %} 46 {{ commit_message }} 47 {% endfilter %} 48 49 ### Diff (lines starting with `-` indicate removed code, and lines starting with `+` indicate added code) ### 50 {% filter indent(width=2) %} 51 {{ unified_diff }} 52 {% endfilter %} 53 ### 54 55 Your task is to help verify the quality of the diff. 56 We care only about specific categories of important issues. 57 The rubric below outlines these categories of important issues, and contains guidelines and examples to correctly identify them: 58 {% for issue_code, guide in guides.items() %} 59 --- 60 **{{ issue_code }}**: 61 {{ guide }} 62 {% endfor %} 63 --- 64 65 Use your standard tools to explore the repository and analyze the code thoroughly. 66 Look at the additional guidance section below for more details on how to find issues. 67 68 After your analysis, provide your response in JSON format matching this schema: 69 70 {{ response_schema | tojson(indent=2) }} 71 72 For each issue found, provide: 73 - issue_code: Category from the rubric above 74 - description: Specific explanation of the issue 75 - (if applicable) location: File path where the issue occurs (relative to {{ repo_path }}) 76 - (if applicable) code_part: Specific code snippet that has the issue. Your code snippet should be the exact same as the original code including whitespace. 77 - severity: Integer 1-5 (1=minor, 5=critical) 78 - confidence: Float 0.0-1.0 indicating your confidence 79 80 Your response should look like: 81 ```json 82 { 83 "issues": [ 84 <list of issues> 85 ] 86 } 87 ``` 88 89 If no issues are found, return: ```json{"issues": []}``` 90 91 Focus on real issues that impact code quality, correctness, or maintainability. 92 You must not return issues that were already present in the code or issues that are fixed by the diff. 93 You must only return issues that were introduced by the diff. 94 Do not report duplicate issues with the same or equivalent descriptions. 95 96 ### Additional Guidance for Finding Issues ### 97 You should use a Task tool to create a parallel task for each issue type in the rubric. 98 You should pass along the exact issue type definition with all details to the task. 99 Once all the Tasks have completed you can collate their results. 100 You should pass along any relevant information from the guidance below to the task. 101 Here is a non-exhaustive list of things that you can do using your tools within the task to find issues: 102 {% for issue_code, guidance in additional_guidance.items() %} 103 --- 104 **{{ issue_code }}**: 105 {{ guidance }} 106 {% endfor %} 107 --- 108 Note that this is just guidance on how to find issues, please refer to the rubric for the types of issues to find. 109 """ 110 111 ISSUE_TYPE_PROMPT_TEMPLATE = """You are analyzing a code repository for potential issues of type {{ issue_type }}. The repository files are available in {{ repo_path }}. 112 113 Assume that a user requested work to be done and a programmer delivered the diff below. 114 The changes from the diff are present in the codebase but are not yet committed. 115 116 ### User request ### 117 {% filter indent(width=2) %} 118 {{ commit_message }} 119 {% endfilter %} 120 121 ### Diff (lines starting with `-` indicate removed code, and lines starting with `+` indicate added code) ### 122 {% filter indent(width=2) %} 123 {{ unified_diff }} 124 {% endfilter %} 125 ### 126 127 Your task is to help verify the quality of the diff. 128 Here is the definition of the issue type you are looking for: 129 **{{ issue_type }}**: 130 {{ guide }} 131 132 Use your standard tools to explore the repository and analyze the code thoroughly. 133 ONLY look for issues related to {{ issue_type }}. 134 Do NOT modify any files - this is read-only analysis. 135 136 After your analysis, provide your response in JSON format matching this schema: 137 138 {{ response_schema | tojson(indent=2) }} 139 140 For each issue found, provide: 141 - issue_code: Category from the rubric above 142 - description: Specific explanation of the issue 143 - (if applicable) location: File path where the issue occurs (relative to {{ repo_path }}) 144 - (if applicable) code_part: Specific code snippet that has the issue. Your code snippet should be the exact same as the original code including whitespace. 145 - severity: Integer 1-5 (1=minor, 5=critical) 146 - confidence: Float 0.0-1.0 indicating your confidence 147 148 Your response should look like: 149 ```json 150 { 151 "issues": [ 152 <list of issues> 153 ] 154 } 155 ``` 156 157 If no issues of this type are found, return: ```json{"issues": []}``` 158 You must not return issues that were already present in the code or issues that are fixed by the diff. 159 You must only return issues that were introduced by the diff. 160 Do not report duplicate issues with the same or equivalent descriptions. 161 """ 162 163 164 ResponseText = str 165 166 167 def _generate_issues_worker( 168 issue_code: IssueCode, 169 prompt: str, 170 options: AgentOptions, 171 ) -> tuple[IssueCode, ResponseText, list[AgentMessage]]: 172 response_text, agent_messages = generate_response_from_agent(prompt, options) 173 return issue_code, response_text, agent_messages 174 175 176 class _AgenticIssueIdentifier(IssueIdentifier[CommitInputs]): 177 _identification_guides: tuple[IssueIdentificationGuide, ...] 178 179 def __init__(self, identification_guides: tuple[IssueIdentificationGuide, ...]) -> None: 180 assert len(identification_guides) > 0, "At least one identification guide must be provided" 181 self._identification_guides = identification_guides 182 183 @cached_property 184 def _response_schema(self) -> dict[str, Any]: 185 return GeneratedResponseSchema.model_json_schema() 186 187 def _get_prompt( 188 self, 189 project_context: ProjectContext, 190 config: VetConfig, # unused 191 identifier_inputs: CommitInputs, 192 ) -> str: 193 env = jinja2.Environment(undefined=jinja2.StrictUndefined) 194 jinja_template = env.from_string(PROMPT_TEMPLATE) 195 additional_guidance_by_issue_code = { 196 guide.issue_code: guide.additional_guide_for_agent for guide in self._identification_guides 197 } 198 199 formatted_guides = { 200 guide.issue_code: format_issue_identification_guide_for_llm(guide) for guide in self._identification_guides 201 } 202 203 prompt = jinja_template.render( 204 { 205 "repo_path": project_context.repo_path, 206 "commit_message": escape_prompt_markers(identifier_inputs.goal), 207 "unified_diff": escape_prompt_markers(identifier_inputs.diff), 208 "guides": formatted_guides, 209 "response_schema": self._response_schema, 210 "additional_guidance": additional_guidance_by_issue_code, 211 } 212 ) 213 return prompt 214 215 def _get_prompt_for_issue_type( 216 self, 217 project_context: ProjectContext, 218 identifier_inputs: CommitInputs, 219 guide: IssueIdentificationGuide, 220 ) -> str: 221 env = jinja2.Environment(undefined=jinja2.StrictUndefined) 222 jinja_template = env.from_string(ISSUE_TYPE_PROMPT_TEMPLATE) 223 224 formatted_guide = format_issue_identification_guide_for_llm(guide) 225 226 prompt = jinja_template.render( 227 { 228 "repo_path": project_context.repo_path, 229 "commit_message": escape_prompt_markers(identifier_inputs.goal), 230 "unified_diff": escape_prompt_markers(identifier_inputs.diff), 231 "guide": formatted_guide, 232 "response_schema": self._response_schema, 233 "issue_type": guide.issue_code, 234 } 235 ) 236 return prompt 237 238 def identify_issues( 239 self, 240 identifier_inputs: CommitInputs, 241 project_context: ProjectContext, 242 config: VetConfig, 243 ) -> Generator[GeneratedIssueSchema, None, IssueIdentificationDebugInfo]: 244 assert project_context.repo_path is not None, "Project context must have a valid repo_path, got None" 245 246 options = get_agent_options( 247 cwd=project_context.repo_path, 248 model_name=config.agent_model_name, 249 agent_harness_type=config.agent_harness_type, 250 ) 251 252 if config.enable_parallel_agentic_issue_identification: 253 llm_responses = [] 254 255 issue_prompts = [ 256 ( 257 guide.issue_code, 258 self._get_prompt_for_issue_type(project_context, identifier_inputs, guide), 259 ) 260 for guide in self._identification_guides 261 ] 262 with ThreadPoolExecutor(max_workers=config.max_identify_workers) as executor: 263 tasks = [ 264 executor.submit(_generate_issues_worker, issue_code, prompt, options) 265 for issue_code, prompt in issue_prompts 266 ] 267 268 num_succeeded = 0 269 last_error: Exception | None = None 270 for task in concurrent.futures.as_completed(tasks): 271 try: 272 result = task.result() 273 except AgentCLINotFoundError: 274 raise 275 except Exception as e: 276 log_exception(e, "Error processing issue type: {e}", e=e) 277 last_error = e 278 continue 279 280 num_succeeded += 1 281 issue_code, issue_type_response_text, messages = result 282 283 yield from generate_issues_from_response_texts(response_texts=(issue_type_response_text,)) 284 285 message_dumps = tuple(json.dumps(message.model_dump()) for message in messages) 286 invocation_info = extract_invocation_info_from_messages(messages) 287 288 llm_responses.append( 289 LLMResponse( 290 metadata=IssueIdentificationLLMResponseMetadata( 291 agentic_phase=AgenticPhase.ISSUE_IDENTIFICATION, 292 issue_type=issue_code, 293 ), 294 raw_response=message_dumps, 295 invocation_info=invocation_info, 296 ) 297 ) 298 299 # If every task failed, re-raise the last error so it propagates to main(). 300 if num_succeeded == 0 and last_error is not None: 301 raise last_error 302 303 return IssueIdentificationDebugInfo(llm_responses=tuple(llm_responses)) 304 else: 305 prompt = self._get_prompt(project_context, config, identifier_inputs) 306 response_text, messages = generate_response_from_agent(prompt, options) 307 308 message_dumps = tuple(json.dumps(message.model_dump()) for message in messages) 309 invocation_info = extract_invocation_info_from_messages(messages) 310 311 llm_responses = [ 312 LLMResponse( 313 metadata=IssueIdentificationLLMResponseMetadata( 314 agentic_phase=AgenticPhase.ISSUE_IDENTIFICATION, 315 issue_type=None, 316 ), 317 raw_response=message_dumps, 318 invocation_info=invocation_info, 319 ) 320 ] 321 322 yield from generate_issues_from_response_texts(response_texts=(response_text,)) 323 324 return IssueIdentificationDebugInfo(llm_responses=tuple(llm_responses)) 325 326 def input_type(self) -> type[CommitInputs]: 327 return CommitInputs 328 329 @property 330 def enabled_issue_codes(self) -> tuple[IssueCode, ...]: 331 return tuple(guide.issue_code for guide in self._identification_guides) 332 333 @property 334 def requires_agentic_collation(self) -> bool: 335 return True 336 337 @property 338 def identifies_code_issues(self) -> bool: 339 return True 340 341 342 class AgenticHarness(IssueIdentifierHarness[CommitInputs]): 343 def make_issue_identifier( 344 self, identification_guides: tuple[IssueIdentificationGuide, ...] 345 ) -> IssueIdentifier[CommitInputs]: 346 return _AgenticIssueIdentifier(identification_guides=identification_guides)