vet

Mirror of Vet, an AI code review tool
git clone git://git.laack.co/vet.git
Log | Files | Refs | README | LICENSE

agentic.py (13979B)


      1 """
      2 Agentic harness that checks a given diff for issues using coding agents with tools.
      3 """
      4 
      5 import concurrent.futures
      6 import json
      7 from concurrent.futures import ThreadPoolExecutor
      8 from functools import cached_property
      9 from typing import Any
     10 from typing import Generator
     11 
     12 import jinja2
     13 from loguru import logger
     14 
     15 from vet.imbue_core.agents.agent_api.data_types import AgentMessage
     16 from vet.imbue_core.agents.agent_api.data_types import AgentOptions
     17 from vet.imbue_core.agents.agent_api.errors import AgentCLINotFoundError
     18 from vet.imbue_core.async_monkey_patches import log_exception
     19 from vet.imbue_core.data_types import AgenticPhase
     20 from vet.imbue_core.data_types import IssueCode
     21 from vet.imbue_core.data_types import IssueIdentificationDebugInfo
     22 from vet.imbue_core.data_types import IssueIdentificationLLMResponseMetadata
     23 from vet.imbue_core.data_types import LLMResponse
     24 from vet.imbue_tools.get_conversation_history.input_data_types import CommitInputs
     25 from vet.imbue_tools.repo_utils.context_utils import escape_prompt_markers
     26 from vet.imbue_tools.repo_utils.project_context import ProjectContext
     27 from vet.imbue_tools.types.vet_config import VetConfig
     28 from vet.issue_identifiers.base import IssueIdentifier
     29 from vet.issue_identifiers.common import GeneratedIssueSchema
     30 from vet.issue_identifiers.common import GeneratedResponseSchema
     31 from vet.issue_identifiers.common import extract_invocation_info_from_messages
     32 from vet.issue_identifiers.common import format_issue_identification_guide_for_llm
     33 from vet.issue_identifiers.common import generate_issues_from_response_texts
     34 from vet.issue_identifiers.common import generate_response_from_agent
     35 from vet.issue_identifiers.common import get_agent_options
     36 from vet.issue_identifiers.harnesses.base import IssueIdentifierHarness
     37 from vet.issue_identifiers.identification_guides import IssueIdentificationGuide
     38 
     39 PROMPT_TEMPLATE = """You are analyzing a code repository for potential issues. The repository files are available in {{ repo_path }}.
     40 
     41 Assume that a user requested work to be done and a programmer delivered the diff below.
     42 The changes from the diff are present in the codebase but are not yet committed.
     43 
     44 ### User request ###
     45 {% filter indent(width=2) %}
     46 {{ commit_message }}
     47 {% endfilter %}
     48 
     49 ### Diff (lines starting with `-` indicate removed code, and lines starting with `+` indicate added code) ###
     50 {% filter indent(width=2) %}
     51 {{ unified_diff }}
     52 {% endfilter %}
     53 ###
     54 
     55 Your task is to help verify the quality of the diff.
     56 We care only about specific categories of important issues.
     57 The rubric below outlines these categories of important issues, and contains guidelines and examples to correctly identify them:
     58 {% for issue_code, guide in guides.items() %}
     59 ---
     60 **{{ issue_code }}**:
     61 {{ guide }}
     62 {% endfor %}
     63 ---
     64 
     65 Use your standard tools to explore the repository and analyze the code thoroughly.
     66 Look at the additional guidance section below for more details on how to find issues.
     67 
     68 After your analysis, provide your response in JSON format matching this schema:
     69 
     70 {{ response_schema | tojson(indent=2) }}
     71 
     72 For each issue found, provide:
     73 - issue_code: Category from the rubric above
     74 - description: Specific explanation of the issue
     75 - (if applicable) location: File path where the issue occurs (relative to {{ repo_path }})
     76 - (if applicable) code_part: Specific code snippet that has the issue. Your code snippet should be the exact same as the original code including whitespace.
     77 - severity: Integer 1-5 (1=minor, 5=critical)
     78 - confidence: Float 0.0-1.0 indicating your confidence
     79 
     80 Your response should look like:
     81 ```json
     82 {
     83     "issues": [
     84         <list of issues>
     85     ]
     86 }
     87 ```
     88 
     89 If no issues are found, return: ```json{"issues": []}```
     90 
     91 Focus on real issues that impact code quality, correctness, or maintainability.
     92 You must not return issues that were already present in the code or issues that are fixed by the diff.
     93 You must only return issues that were introduced by the diff.
     94 Do not report duplicate issues with the same or equivalent descriptions.
     95 
     96 ### Additional Guidance for Finding Issues ###
     97 You should use a Task tool to create a parallel task for each issue type in the rubric.
     98 You should pass along the exact issue type definition with all details to the task.
     99 Once all the Tasks have completed you can collate their results.
    100 You should pass along any relevant information from the guidance below to the task.
    101 Here is a non-exhaustive list of things that you can do using your tools within the task to find issues:
    102 {% for issue_code, guidance in additional_guidance.items() %}
    103 ---
    104 **{{ issue_code }}**:
    105 {{ guidance }}
    106 {% endfor %}
    107 ---
    108 Note that this is just guidance on how to find issues, please refer to the rubric for the types of issues to find.
    109 """
    110 
    111 ISSUE_TYPE_PROMPT_TEMPLATE = """You are analyzing a code repository for potential issues of type {{ issue_type }}. The repository files are available in {{ repo_path }}.
    112 
    113 Assume that a user requested work to be done and a programmer delivered the diff below.
    114 The changes from the diff are present in the codebase but are not yet committed.
    115 
    116 ### User request ###
    117 {% filter indent(width=2) %}
    118 {{ commit_message }}
    119 {% endfilter %}
    120 
    121 ### Diff (lines starting with `-` indicate removed code, and lines starting with `+` indicate added code) ###
    122 {% filter indent(width=2) %}
    123 {{ unified_diff }}
    124 {% endfilter %}
    125 ###
    126 
    127 Your task is to help verify the quality of the diff.
    128 Here is the definition of the issue type you are looking for:
    129 **{{ issue_type }}**:
    130 {{ guide }}
    131 
    132 Use your standard tools to explore the repository and analyze the code thoroughly.
    133 ONLY look for issues related to {{ issue_type }}.
    134 Do NOT modify any files - this is read-only analysis.
    135 
    136 After your analysis, provide your response in JSON format matching this schema:
    137 
    138 {{ response_schema | tojson(indent=2) }}
    139 
    140 For each issue found, provide:
    141 - issue_code: Category from the rubric above
    142 - description: Specific explanation of the issue
    143 - (if applicable) location: File path where the issue occurs (relative to {{ repo_path }})
    144 - (if applicable) code_part: Specific code snippet that has the issue. Your code snippet should be the exact same as the original code including whitespace.
    145 - severity: Integer 1-5 (1=minor, 5=critical)
    146 - confidence: Float 0.0-1.0 indicating your confidence
    147 
    148 Your response should look like:
    149 ```json
    150 {
    151     "issues": [
    152         <list of issues>
    153     ]
    154 }
    155 ```
    156 
    157 If no issues of this type are found, return: ```json{"issues": []}```
    158 You must not return issues that were already present in the code or issues that are fixed by the diff.
    159 You must only return issues that were introduced by the diff.
    160 Do not report duplicate issues with the same or equivalent descriptions.
    161 """
    162 
    163 
    164 ResponseText = str
    165 
    166 
    167 def _generate_issues_worker(
    168     issue_code: IssueCode,
    169     prompt: str,
    170     options: AgentOptions,
    171 ) -> tuple[IssueCode, ResponseText, list[AgentMessage]]:
    172     response_text, agent_messages = generate_response_from_agent(prompt, options)
    173     return issue_code, response_text, agent_messages
    174 
    175 
    176 class _AgenticIssueIdentifier(IssueIdentifier[CommitInputs]):
    177     _identification_guides: tuple[IssueIdentificationGuide, ...]
    178 
    179     def __init__(self, identification_guides: tuple[IssueIdentificationGuide, ...]) -> None:
    180         assert len(identification_guides) > 0, "At least one identification guide must be provided"
    181         self._identification_guides = identification_guides
    182 
    183     @cached_property
    184     def _response_schema(self) -> dict[str, Any]:
    185         return GeneratedResponseSchema.model_json_schema()
    186 
    187     def _get_prompt(
    188         self,
    189         project_context: ProjectContext,
    190         config: VetConfig,  # unused
    191         identifier_inputs: CommitInputs,
    192     ) -> str:
    193         env = jinja2.Environment(undefined=jinja2.StrictUndefined)
    194         jinja_template = env.from_string(PROMPT_TEMPLATE)
    195         additional_guidance_by_issue_code = {
    196             guide.issue_code: guide.additional_guide_for_agent for guide in self._identification_guides
    197         }
    198 
    199         formatted_guides = {
    200             guide.issue_code: format_issue_identification_guide_for_llm(guide) for guide in self._identification_guides
    201         }
    202 
    203         prompt = jinja_template.render(
    204             {
    205                 "repo_path": project_context.repo_path,
    206                 "commit_message": escape_prompt_markers(identifier_inputs.goal),
    207                 "unified_diff": escape_prompt_markers(identifier_inputs.diff),
    208                 "guides": formatted_guides,
    209                 "response_schema": self._response_schema,
    210                 "additional_guidance": additional_guidance_by_issue_code,
    211             }
    212         )
    213         return prompt
    214 
    215     def _get_prompt_for_issue_type(
    216         self,
    217         project_context: ProjectContext,
    218         identifier_inputs: CommitInputs,
    219         guide: IssueIdentificationGuide,
    220     ) -> str:
    221         env = jinja2.Environment(undefined=jinja2.StrictUndefined)
    222         jinja_template = env.from_string(ISSUE_TYPE_PROMPT_TEMPLATE)
    223 
    224         formatted_guide = format_issue_identification_guide_for_llm(guide)
    225 
    226         prompt = jinja_template.render(
    227             {
    228                 "repo_path": project_context.repo_path,
    229                 "commit_message": escape_prompt_markers(identifier_inputs.goal),
    230                 "unified_diff": escape_prompt_markers(identifier_inputs.diff),
    231                 "guide": formatted_guide,
    232                 "response_schema": self._response_schema,
    233                 "issue_type": guide.issue_code,
    234             }
    235         )
    236         return prompt
    237 
    238     def identify_issues(
    239         self,
    240         identifier_inputs: CommitInputs,
    241         project_context: ProjectContext,
    242         config: VetConfig,
    243     ) -> Generator[GeneratedIssueSchema, None, IssueIdentificationDebugInfo]:
    244         assert project_context.repo_path is not None, "Project context must have a valid repo_path, got None"
    245 
    246         options = get_agent_options(
    247             cwd=project_context.repo_path,
    248             model_name=config.agent_model_name,
    249             agent_harness_type=config.agent_harness_type,
    250         )
    251 
    252         if config.enable_parallel_agentic_issue_identification:
    253             llm_responses = []
    254 
    255             issue_prompts = [
    256                 (
    257                     guide.issue_code,
    258                     self._get_prompt_for_issue_type(project_context, identifier_inputs, guide),
    259                 )
    260                 for guide in self._identification_guides
    261             ]
    262             with ThreadPoolExecutor(max_workers=config.max_identify_workers) as executor:
    263                 tasks = [
    264                     executor.submit(_generate_issues_worker, issue_code, prompt, options)
    265                     for issue_code, prompt in issue_prompts
    266                 ]
    267 
    268                 num_succeeded = 0
    269                 last_error: Exception | None = None
    270                 for task in concurrent.futures.as_completed(tasks):
    271                     try:
    272                         result = task.result()
    273                     except AgentCLINotFoundError:
    274                         raise
    275                     except Exception as e:
    276                         log_exception(e, "Error processing issue type: {e}", e=e)
    277                         last_error = e
    278                         continue
    279 
    280                     num_succeeded += 1
    281                     issue_code, issue_type_response_text, messages = result
    282 
    283                     yield from generate_issues_from_response_texts(response_texts=(issue_type_response_text,))
    284 
    285                     message_dumps = tuple(json.dumps(message.model_dump()) for message in messages)
    286                     invocation_info = extract_invocation_info_from_messages(messages)
    287 
    288                     llm_responses.append(
    289                         LLMResponse(
    290                             metadata=IssueIdentificationLLMResponseMetadata(
    291                                 agentic_phase=AgenticPhase.ISSUE_IDENTIFICATION,
    292                                 issue_type=issue_code,
    293                             ),
    294                             raw_response=message_dumps,
    295                             invocation_info=invocation_info,
    296                         )
    297                     )
    298 
    299                 # If every task failed, re-raise the last error so it propagates to main().
    300                 if num_succeeded == 0 and last_error is not None:
    301                     raise last_error
    302 
    303             return IssueIdentificationDebugInfo(llm_responses=tuple(llm_responses))
    304         else:
    305             prompt = self._get_prompt(project_context, config, identifier_inputs)
    306             response_text, messages = generate_response_from_agent(prompt, options)
    307 
    308             message_dumps = tuple(json.dumps(message.model_dump()) for message in messages)
    309             invocation_info = extract_invocation_info_from_messages(messages)
    310 
    311             llm_responses = [
    312                 LLMResponse(
    313                     metadata=IssueIdentificationLLMResponseMetadata(
    314                         agentic_phase=AgenticPhase.ISSUE_IDENTIFICATION,
    315                         issue_type=None,
    316                     ),
    317                     raw_response=message_dumps,
    318                     invocation_info=invocation_info,
    319                 )
    320             ]
    321 
    322             yield from generate_issues_from_response_texts(response_texts=(response_text,))
    323 
    324             return IssueIdentificationDebugInfo(llm_responses=tuple(llm_responses))
    325 
    326     def input_type(self) -> type[CommitInputs]:
    327         return CommitInputs
    328 
    329     @property
    330     def enabled_issue_codes(self) -> tuple[IssueCode, ...]:
    331         return tuple(guide.issue_code for guide in self._identification_guides)
    332 
    333     @property
    334     def requires_agentic_collation(self) -> bool:
    335         return True
    336 
    337     @property
    338     def identifies_code_issues(self) -> bool:
    339         return True
    340 
    341 
    342 class AgenticHarness(IssueIdentifierHarness[CommitInputs]):
    343     def make_issue_identifier(
    344         self, identification_guides: tuple[IssueIdentificationGuide, ...]
    345     ) -> IssueIdentifier[CommitInputs]:
    346         return _AgenticIssueIdentifier(identification_guides=identification_guides)