vet

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 5942508b8dc7330551edeea6519674d055e41317
parent 0e5a25d0b52640200025059a48e929824cfa2b89
Author: Andrew Laack <andrew@laack.co>
Date:   Fri,  6 Mar 2026 19:14:36 +0000

Merge pull request #169 from yashdive/vet-against-staged-changes-only

added feature to run vet against staged changes only
Diffstat:
Mvet/api.py | 33+++++++++++++++++++++++----------
Avet/cli/config/cli_config_consistency_test.py | 41+++++++++++++++++++++++++++++++++++++++++
Mvet/cli/config/cli_config_schema.py | 2++
Mvet/cli/main.py | 49+++++++++++++++++++++++++++++++++++++++++++++----
Mvet/git.py | 4++--
Mvet/repo_utils.py | 39+++++++++++++++++++++++++++++++++------
Mvet/repo_utils_test.py | 41+++++++++++++++++++++++++++++++++++++++++
7 files changed, 187 insertions(+), 22 deletions(-)

diff --git a/vet/api.py b/vet/api.py @@ -103,21 +103,34 @@ def find_issues( config: VetConfig, conversation_history: tuple[ConversationMessageUnion, ...] | None = None, extra_context: str | None = None, + only_staged: bool = False, ) -> tuple[IdentifiedVerifyIssue, ...]: - logger.debug( - "Finding issues in {repo_path} relative to {relative_to}", - repo_path=repo_path, - relative_to=relative_to, - ) - - base_commit, diff, diff_no_binary = get_code_to_check(relative_to, repo_path) - if not diff.strip(): + if only_staged: logger.debug( - "No code changes detected in repo {repo_path} since the specified relative_to commit {relative_to}, skipping issue identification", + "Finding issues in {repo_path} (staged changes)", + repo_path=repo_path, + ) + else: + logger.debug( + "Finding issues in {repo_path} relative to {relative_to}", repo_path=repo_path, relative_to=relative_to, ) - # No code changes detected since the specified relative_to commit, so no issues to find. + + base_commit, diff, diff_no_binary = get_code_to_check(relative_to, repo_path, only_staged=only_staged) + if not diff.strip(): + if only_staged: + logger.debug( + "No code changes detected in repo {repo_path} for staged changes, skipping issue identification", + repo_path=repo_path, + ) + else: + logger.debug( + "No code changes detected in repo {repo_path} since the specified relative_to commit {relative_to}, skipping issue identification", + repo_path=repo_path, + relative_to=relative_to, + ) + # No code changes detected, so no issues to find. return tuple() issues, _, _ = get_issues_with_raw_responses( diff --git a/vet/cli/config/cli_config_consistency_test.py b/vet/cli/config/cli_config_consistency_test.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from vet.cli.config.cli_config_schema import CliConfigPreset +from vet.cli.config.cli_config_schema import CliDefaults +from vet.cli.main import create_parser + +IGNORED_ARGS = { + "help", + "version", + # CLI-only flags that select behavior rather than configure defaults + "config", + "list_configs", + "list_models", + "list_issue_codes", + "list_fields", + # agent-mode flags are intentionally CLI-only + "agentic", + "agent_harness", + "update_models", +} + + +def _extract_cli_arg_dests() -> set[str]: + parser = create_parser() + return {action.dest for action in parser._actions if action.dest and action.dest not in IGNORED_ARGS} + + +def test_cli_args_present_in_cli_defaults_and_presets() -> None: + """Ensure every CLI argument that is meant to be configurable + appears in both `CliDefaults` and `CliConfigPreset`. + """ + cli_args = _extract_cli_arg_dests() + + defaults_fields = set(CliDefaults.model_fields.keys()) + preset_fields = set(CliConfigPreset.model_fields.keys()) + + missing_in_defaults = cli_args - defaults_fields + missing_in_presets = cli_args - preset_fields + + assert not missing_in_defaults, f"CLI args missing from CliDefaults: {sorted(missing_in_defaults)}" + assert not missing_in_presets, f"CLI args missing from CliConfigPreset: {sorted(missing_in_presets)}" diff --git a/vet/cli/config/cli_config_schema.py b/vet/cli/config/cli_config_schema.py @@ -26,6 +26,7 @@ class CliConfigPreset(BaseModel): goal: str | None = None repo: str | None = None base_commit: str | None = None + staged: bool | None = None history_loader: str | None = None extra_context: list[str] | None = None enabled_issue_codes: list[str] | None = None @@ -51,6 +52,7 @@ class CliDefaults(BaseModel): goal: str | None = None repo: str | None = None base_commit: str = "HEAD" + staged: bool = False history_loader: str | None = None extra_context: list[str] | None = None enabled_issue_codes: list[str] | None = None diff --git a/vet/cli/main.py b/vet/cli/main.py @@ -44,6 +44,7 @@ def create_parser() -> argparse.ArgumentParser: prog="vet", description="Identify issues in code changes using LLM-based analysis.", formatter_class=argparse.RawDescriptionHelpFormatter, + allow_abbrev=False, ) parser.add_argument( @@ -96,6 +97,8 @@ def create_parser() -> argparse.ArgumentParser: metavar="REF", help=f"Git commit, branch, or ref to use as the base for computing the diff (default: {CLI_DEFAULTS.base_commit})", ) + # By default, vet includes all changes (staged, unstaged, and untracked). With --staged, only staged changes are included. + diff_group.add_argument("--staged", action="store_true", help="Only analyze staged changes") context_group = parser.add_argument_group("context options") context_group.add_argument( @@ -347,6 +350,25 @@ def list_configs(cli_configs: dict[str, CliConfigPreset], repo_path: Path) -> No print() +def _validate_staged_related_options(args: argparse.Namespace, base_commit_cli_specified: bool) -> str | None: + """Validate options related to staged analysis. + + Returns an error message string when validation fails (caller should print + it to stderr and return an exit code of 2), otherwise returns None. + """ + if args.staged and base_commit_cli_specified: + # Only treat --base-commit as conflicting if explicitly provided on the CLI. + # Config/default values (e.g. "main") should not trigger an error because + # staged mode intentionally ignores base commits. + return "vet: --staged and --base-commit are mutually exclusive" + + if args.staged and args.agentic: + # Sanity check to prevent users from accidentally combining incompatible modes. + return "vet: --staged and --agentic are mutually exclusive" + + return None + + _DEFAULT_LOG_FILE = Path(os.environ.get("XDG_STATE_HOME", Path.home() / ".local" / "state")) / "vet" / "vet.log" @@ -433,6 +455,13 @@ def main(argv: list[str] | None = None) -> int: parser = create_parser() args = parser.parse_args(argv) + # Determine whether the user explicitly provided `--base-commit` on the + # command line. `CLI_DEFAULTS.base_commit` may be non-empty (e.g. "main") + # coming from config or defaults; we must only treat an explicit CLI + # `--base-commit` as conflicting with staged mode. + raw_argv = argv if argv is not None else sys.argv[1:] + base_commit_cli_specified = any(a == "--base-commit" or a.startswith("--base-commit=") for a in raw_argv) + # Handle subcommands that don't need config loading. if args.update_models: try: @@ -521,6 +550,11 @@ def main(argv: list[str] | None = None) -> int: ) return 2 + staged_err = _validate_staged_related_options(args, base_commit_cli_specified) + if staged_err is not None: + print(staged_err, file=sys.stderr) + return 2 + if args.verbose and args.quiet: print( "vet: --verbose and --quiet are mutually exclusive", @@ -651,10 +685,16 @@ def main(argv: list[str] | None = None) -> int: ) if not args.quiet: - print( - f"analyzing {repo_path} (relative to {args.base_commit})", - file=sys.stderr, - ) + if args.staged: + print( + f"analyzing {repo_path} (staged changes)", + file=sys.stderr, + ) + else: + print( + f"analyzing {repo_path} (relative to {args.base_commit})", + file=sys.stderr, + ) try: issues = find_issues( @@ -664,6 +704,7 @@ def main(argv: list[str] | None = None) -> int: config=config, conversation_history=conversation_history, extra_context=extra_context, + only_staged=args.staged, ) except AgentCLINotFoundError as e: print(f"vet: {e}", file=sys.stderr) diff --git a/vet/git.py b/vet/git.py @@ -113,7 +113,7 @@ class SyncLocalGitRepo: def get_git_diff( self, commit_hash: str | None = None, - staged: bool = False, + only_staged: bool = False, is_error_logged: bool = True, include_binary: bool = True, ) -> str: @@ -124,7 +124,7 @@ class SyncLocalGitRepo: # Without --binary, diffs of binary files will just contain a summary statement such as "Binary files a/file.bin and b/file.bin differ". # Such diffs cannot be applied, but are useful for inclusion in LLM prompts. command.append("--binary") - if staged: + if only_staged: command.append("--staged") if commit_hash: command.append(commit_hash) diff --git a/vet/repo_utils.py b/vet/repo_utils.py @@ -13,13 +13,42 @@ from vet.imbue_core.async_monkey_patches import log_exception VET_MAX_PROMPT_TOKENS = 10000 -def get_code_to_check(relative_to: str, repo_path: Path) -> tuple[str, str, str]: +def get_code_to_check(relative_to: str, repo_path: Path, only_staged: bool = False) -> tuple[str, str, str]: """ Returns: - - The commit hash to use as the base commit for the diff. - - The combined diff including staged, unstaged, and untracked changes. (compatible with `git apply`) - - The combined diff but with binary diffs shortened. (cannot be applied if binary changes are present) + - The commit hash to use as the base commit for the diff. When `only_staged` is True + this will be the current HEAD (staged-only mode ignores `relative_to`). + - The combined diff. When `only_staged` is False this includes staged, unstaged, + and untracked changes (compatible with `git apply`). When `only_staged` is True + this includes only staged changes. + - The combined diff with binary diffs shortened (cannot be applied if binary + changes are present). When `only_staged` is True this is generated from staged + changes only. + Note: When `only_staged` is True the `relative_to` argument is ignored; staged-only + analysis does not attempt to resolve or use the configured base commit. """ + repo = SyncLocalGitRepo(repo_path) + + if only_staged: + # In staged mode we ignore `relative_to` entirely. Avoid resolving the + # configured base commit since it may refer to a branch/ref that doesn't + # exist in the current working copy (e.g., config sets `main`). This + # prevents unnecessary git errors when the user explicitly requested + # staged-only analysis. + try: + combined_diff = repo.get_git_diff(only_staged=True) + combined_diff_no_binary = repo.get_git_diff(only_staged=True, include_binary=False) + # No untracked files in staged mode. Use HEAD as the base commit for + # consistency with non-staged behavior. + base_commit = repo.run_git(["rev-parse", "HEAD"]) + except RunCommandError as e: + # If either obtaining the staged diff or resolving HEAD fails, + # surface a wrapped GitCommandError so callers receive uniform + # error information. + raise GitCommandError(e, "get staged diff or determine HEAD commit", repo_path) from e + + return base_commit, combined_diff, combined_diff_no_binary + try: base_commit = find_relative_to_commit_hash(relative_to, repo_path=repo_path) except RunCommandError as e: @@ -29,8 +58,6 @@ def get_code_to_check(relative_to: str, repo_path: Path) -> tuple[str, str, str] repo_path, ) from e - repo = SyncLocalGitRepo(repo_path) - # Get the combined diff which includes all changes; staged, unstaged, and untracked. try: combined_diff = repo.get_git_diff(commit_hash=base_commit) diff --git a/vet/repo_utils_test.py b/vet/repo_utils_test.py @@ -95,3 +95,44 @@ def test_build_context(simple_test_git_repo: Path, snapshot: SnapshotAssertion) ) project_context_without_repo_path = chill(project_context_evolver) assert project_context_without_repo_path == snapshot + + +def test_get_code_to_check_staged_only(simple_test_git_repo: Path) -> None: + """When `only_staged=True`, only staged changes should be returned (no unstaged/untracked), + and resolving a configured `relative_to` (like 'main') should not error. + """ + repo_path = simple_test_git_repo + + # Record current HEAD + head = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=repo_path, + capture_output=True, + text=True, + check=True, + ).stdout.strip() + + # Create an untracked file + (repo_path / "untracked.txt").write_text("untracked content") + + # Create a staged change + (repo_path / "file1.txt").write_text("staged content\n") + subprocess.run(["git", "add", "file1.txt"], cwd=repo_path, check=True) + + # Create an unstaged change + with open((repo_path / "file1.txt"), "a+") as f: + f.write("\nunstaged content") + + # Use a relative_to that likely doesn't exist (e.g., 'main') to ensure we don't try to resolve it + git_hash, diff, diff_no_binary = get_code_to_check("main", repo_path=repo_path, only_staged=True) + + # In staged mode we return HEAD as the base commit + assert git_hash == head + + # Staged change should be present + assert "staged content" in diff + assert "staged content" in diff_no_binary + + # Unstaged and untracked changes should NOT be present + assert "unstaged content" not in diff + assert "untracked.txt" not in diff