Merge pull request #169 from yashdive/vet-against-staged-changes-only - vet - Unnamed repository; edit this file 'description' to name the repository.

commit 5942508b8dc7330551edeea6519674d055e41317
parent 0e5a25d0b52640200025059a48e929824cfa2b89
Author: Andrew Laack <andrew@laack.co>
Date:   Fri,  6 Mar 2026 19:14:36 +0000

Merge pull request #169 from yashdive/vet-against-staged-changes-only

added feature to run vet against staged changes only
Diffstat:
M vet/api.py  | 33 +++++++++++++++++++++++----------
A vet/cli/config/cli_config_consistency_test.py  | 41 +++++++++++++++++++++++++++++++++++++++++
M vet/cli/config/cli_config_schema.py  | 2 ++
M vet/cli/main.py  | 49 +++++++++++++++++++++++++++++++++++++++++++++----
M vet/git.py  | 4 ++--
M vet/repo_utils.py  | 39 +++++++++++++++++++++++++++++++++------
M vet/repo_utils_test.py  | 41 +++++++++++++++++++++++++++++++++++++++++

7 files changed, 187 insertions(+), 22 deletions(-)
diff --git a/vet/api.py b/vet/api.py
@@ -103,21 +103,34 @@ def find_issues(
     config: VetConfig,
     conversation_history: tuple[ConversationMessageUnion, ...] | None = None,
     extra_context: str | None = None,
+    only_staged: bool = False,
 ) -> tuple[IdentifiedVerifyIssue, ...]:
-    logger.debug(
-        "Finding issues in {repo_path} relative to {relative_to}",
-        repo_path=repo_path,
-        relative_to=relative_to,
-    )
-
-    base_commit, diff, diff_no_binary = get_code_to_check(relative_to, repo_path)
-    if not diff.strip():
+    if only_staged:
         logger.debug(
-            "No code changes detected in repo {repo_path} since the specified relative_to commit {relative_to}, skipping issue identification",
+            "Finding issues in {repo_path} (staged changes)",
+            repo_path=repo_path,
+        )
+    else:
+        logger.debug(
+            "Finding issues in {repo_path} relative to {relative_to}",
             repo_path=repo_path,
             relative_to=relative_to,
         )
-        # No code changes detected since the specified relative_to commit, so no issues to find.
+
+    base_commit, diff, diff_no_binary = get_code_to_check(relative_to, repo_path, only_staged=only_staged)
+    if not diff.strip():
+        if only_staged:
+            logger.debug(
+                "No code changes detected in repo {repo_path} for staged changes, skipping issue identification",
+                repo_path=repo_path,
+            )
+        else:
+            logger.debug(
+                "No code changes detected in repo {repo_path} since the specified relative_to commit {relative_to}, skipping issue identification",
+                repo_path=repo_path,
+                relative_to=relative_to,
+            )
+        # No code changes detected, so no issues to find.
         return tuple()
 
     issues, _, _ = get_issues_with_raw_responses(
diff --git a/vet/cli/config/cli_config_consistency_test.py b/vet/cli/config/cli_config_consistency_test.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+from vet.cli.config.cli_config_schema import CliConfigPreset
+from vet.cli.config.cli_config_schema import CliDefaults
+from vet.cli.main import create_parser
+
+IGNORED_ARGS = {
+    "help",
+    "version",
+    # CLI-only flags that select behavior rather than configure defaults
+    "config",
+    "list_configs",
+    "list_models",
+    "list_issue_codes",
+    "list_fields",
+    # agent-mode flags are intentionally CLI-only
+    "agentic",
+    "agent_harness",
+    "update_models",
+}
+
+
+def _extract_cli_arg_dests() -> set[str]:
+    parser = create_parser()
+    return {action.dest for action in parser._actions if action.dest and action.dest not in IGNORED_ARGS}
+
+
+def test_cli_args_present_in_cli_defaults_and_presets() -> None:
+    """Ensure every CLI argument that is meant to be configurable
+    appears in both `CliDefaults` and `CliConfigPreset`.
+    """
+    cli_args = _extract_cli_arg_dests()
+
+    defaults_fields = set(CliDefaults.model_fields.keys())
+    preset_fields = set(CliConfigPreset.model_fields.keys())
+
+    missing_in_defaults = cli_args - defaults_fields
+    missing_in_presets = cli_args - preset_fields
+
+    assert not missing_in_defaults, f"CLI args missing from CliDefaults: {sorted(missing_in_defaults)}"
+    assert not missing_in_presets, f"CLI args missing from CliConfigPreset: {sorted(missing_in_presets)}"
diff --git a/vet/cli/config/cli_config_schema.py b/vet/cli/config/cli_config_schema.py
@@ -26,6 +26,7 @@ class CliConfigPreset(BaseModel):
     goal: str | None = None
     repo: str | None = None
     base_commit: str | None = None
+    staged: bool | None = None
     history_loader: str | None = None
     extra_context: list[str] | None = None
     enabled_issue_codes: list[str] | None = None
@@ -51,6 +52,7 @@ class CliDefaults(BaseModel):
     goal: str | None = None
     repo: str | None = None
     base_commit: str = "HEAD"
+    staged: bool = False
     history_loader: str | None = None
     extra_context: list[str] | None = None
     enabled_issue_codes: list[str] | None = None
diff --git a/vet/cli/main.py b/vet/cli/main.py
@@ -44,6 +44,7 @@ def create_parser() -> argparse.ArgumentParser:
         prog="vet",
         description="Identify issues in code changes using LLM-based analysis.",
         formatter_class=argparse.RawDescriptionHelpFormatter,
+        allow_abbrev=False,
     )
 
     parser.add_argument(
@@ -96,6 +97,8 @@ def create_parser() -> argparse.ArgumentParser:
         metavar="REF",
         help=f"Git commit, branch, or ref to use as the base for computing the diff (default: {CLI_DEFAULTS.base_commit})",
     )
+    # By default, vet includes all changes (staged, unstaged, and untracked). With --staged, only staged changes are included.
+    diff_group.add_argument("--staged", action="store_true", help="Only analyze staged changes")
 
     context_group = parser.add_argument_group("context options")
     context_group.add_argument(
@@ -347,6 +350,25 @@ def list_configs(cli_configs: dict[str, CliConfigPreset], repo_path: Path) -> No
         print()
 
 
+def _validate_staged_related_options(args: argparse.Namespace, base_commit_cli_specified: bool) -> str | None:
+    """Validate options related to staged analysis.
+
+    Returns an error message string when validation fails (caller should print
+    it to stderr and return an exit code of 2), otherwise returns None.
+    """
+    if args.staged and base_commit_cli_specified:
+        # Only treat --base-commit as conflicting if explicitly provided on the CLI.
+        # Config/default values (e.g. "main") should not trigger an error because
+        # staged mode intentionally ignores base commits.
+        return "vet: --staged and --base-commit are mutually exclusive"
+
+    if args.staged and args.agentic:
+        # Sanity check to prevent users from accidentally combining incompatible modes.
+        return "vet: --staged and --agentic are mutually exclusive"
+
+    return None
+
+
 _DEFAULT_LOG_FILE = Path(os.environ.get("XDG_STATE_HOME", Path.home() / ".local" / "state")) / "vet" / "vet.log"
 
 
@@ -433,6 +455,13 @@ def main(argv: list[str] | None = None) -> int:
     parser = create_parser()
     args = parser.parse_args(argv)
 
+    # Determine whether the user explicitly provided `--base-commit` on the
+    # command line. `CLI_DEFAULTS.base_commit` may be non-empty (e.g. "main")
+    # coming from config or defaults; we must only treat an explicit CLI
+    # `--base-commit` as conflicting with staged mode.
+    raw_argv = argv if argv is not None else sys.argv[1:]
+    base_commit_cli_specified = any(a == "--base-commit" or a.startswith("--base-commit=") for a in raw_argv)
+
     # Handle subcommands that don't need config loading.
     if args.update_models:
         try:
@@ -521,6 +550,11 @@ def main(argv: list[str] | None = None) -> int:
                 )
                 return 2
 
+    staged_err = _validate_staged_related_options(args, base_commit_cli_specified)
+    if staged_err is not None:
+        print(staged_err, file=sys.stderr)
+        return 2
+
     if args.verbose and args.quiet:
         print(
             "vet: --verbose and --quiet are mutually exclusive",
@@ -651,10 +685,16 @@ def main(argv: list[str] | None = None) -> int:
         )
 
     if not args.quiet:
-        print(
-            f"analyzing {repo_path} (relative to {args.base_commit})",
-            file=sys.stderr,
-        )
+        if args.staged:
+            print(
+                f"analyzing {repo_path} (staged changes)",
+                file=sys.stderr,
+            )
+        else:
+            print(
+                f"analyzing {repo_path} (relative to {args.base_commit})",
+                file=sys.stderr,
+            )
 
     try:
         issues = find_issues(
@@ -664,6 +704,7 @@ def main(argv: list[str] | None = None) -> int:
             config=config,
             conversation_history=conversation_history,
             extra_context=extra_context,
+            only_staged=args.staged,
         )
     except AgentCLINotFoundError as e:
         print(f"vet: {e}", file=sys.stderr)
diff --git a/vet/git.py b/vet/git.py
@@ -113,7 +113,7 @@ class SyncLocalGitRepo:
     def get_git_diff(
         self,
         commit_hash: str | None = None,
-        staged: bool = False,
+        only_staged: bool = False,
         is_error_logged: bool = True,
         include_binary: bool = True,
     ) -> str:
@@ -124,7 +124,7 @@ class SyncLocalGitRepo:
             # Without --binary, diffs of binary files will just contain a summary statement such as "Binary files a/file.bin and b/file.bin differ".
             # Such diffs cannot be applied, but are useful for inclusion in LLM prompts.
             command.append("--binary")
-        if staged:
+        if only_staged:
             command.append("--staged")
         if commit_hash:
             command.append(commit_hash)
diff --git a/vet/repo_utils.py b/vet/repo_utils.py
@@ -13,13 +13,42 @@ from vet.imbue_core.async_monkey_patches import log_exception
 VET_MAX_PROMPT_TOKENS = 10000
 
 
-def get_code_to_check(relative_to: str, repo_path: Path) -> tuple[str, str, str]:
+def get_code_to_check(relative_to: str, repo_path: Path, only_staged: bool = False) -> tuple[str, str, str]:
     """
     Returns:
-    - The commit hash to use as the base commit for the diff.
-    - The combined diff including staged, unstaged, and untracked changes. (compatible with `git apply`)
-    - The combined diff but with binary diffs shortened. (cannot be applied if binary changes are present)
+    - The commit hash to use as the base commit for the diff. When `only_staged` is True
+      this will be the current HEAD (staged-only mode ignores `relative_to`).
+    - The combined diff. When `only_staged` is False this includes staged, unstaged,
+      and untracked changes (compatible with `git apply`). When `only_staged` is True
+      this includes only staged changes.
+    - The combined diff with binary diffs shortened (cannot be applied if binary
+      changes are present). When `only_staged` is True this is generated from staged
+      changes only.
+    Note: When `only_staged` is True the `relative_to` argument is ignored; staged-only
+    analysis does not attempt to resolve or use the configured base commit.
     """
+    repo = SyncLocalGitRepo(repo_path)
+
+    if only_staged:
+        # In staged mode we ignore `relative_to` entirely. Avoid resolving the
+        # configured base commit since it may refer to a branch/ref that doesn't
+        # exist in the current working copy (e.g., config sets `main`). This
+        # prevents unnecessary git errors when the user explicitly requested
+        # staged-only analysis.
+        try:
+            combined_diff = repo.get_git_diff(only_staged=True)
+            combined_diff_no_binary = repo.get_git_diff(only_staged=True, include_binary=False)
+            # No untracked files in staged mode. Use HEAD as the base commit for
+            # consistency with non-staged behavior.
+            base_commit = repo.run_git(["rev-parse", "HEAD"])
+        except RunCommandError as e:
+            # If either obtaining the staged diff or resolving HEAD fails,
+            # surface a wrapped GitCommandError so callers receive uniform
+            # error information.
+            raise GitCommandError(e, "get staged diff or determine HEAD commit", repo_path) from e
+
+        return base_commit, combined_diff, combined_diff_no_binary
+
     try:
         base_commit = find_relative_to_commit_hash(relative_to, repo_path=repo_path)
     except RunCommandError as e:
@@ -29,8 +58,6 @@ def get_code_to_check(relative_to: str, repo_path: Path) -> tuple[str, str, str]
             repo_path,
         ) from e
 
-    repo = SyncLocalGitRepo(repo_path)
-
     # Get the combined diff which includes all changes; staged, unstaged, and untracked.
     try:
         combined_diff = repo.get_git_diff(commit_hash=base_commit)
diff --git a/vet/repo_utils_test.py b/vet/repo_utils_test.py
@@ -95,3 +95,44 @@ def test_build_context(simple_test_git_repo: Path, snapshot: SnapshotAssertion) 
     )
     project_context_without_repo_path = chill(project_context_evolver)
     assert project_context_without_repo_path == snapshot
+
+
+def test_get_code_to_check_staged_only(simple_test_git_repo: Path) -> None:
+    """When `only_staged=True`, only staged changes should be returned (no unstaged/untracked),
+    and resolving a configured `relative_to` (like 'main') should not error.
+    """
+    repo_path = simple_test_git_repo
+
+    # Record current HEAD
+    head = subprocess.run(
+        ["git", "rev-parse", "HEAD"],
+        cwd=repo_path,
+        capture_output=True,
+        text=True,
+        check=True,
+    ).stdout.strip()
+
+    # Create an untracked file
+    (repo_path / "untracked.txt").write_text("untracked content")
+
+    # Create a staged change
+    (repo_path / "file1.txt").write_text("staged content\n")
+    subprocess.run(["git", "add", "file1.txt"], cwd=repo_path, check=True)
+
+    # Create an unstaged change
+    with open((repo_path / "file1.txt"), "a+") as f:
+        f.write("\nunstaged content")
+
+    # Use a relative_to that likely doesn't exist (e.g., 'main') to ensure we don't try to resolve it
+    git_hash, diff, diff_no_binary = get_code_to_check("main", repo_path=repo_path, only_staged=True)
+
+    # In staged mode we return HEAD as the base commit
+    assert git_hash == head
+
+    # Staged change should be present
+    assert "staged content" in diff
+    assert "staged content" in diff_no_binary
+
+    # Unstaged and untracked changes should NOT be present
+    assert "unstaged content" not in diff
+    assert "untracked.txt" not in diff

	vet Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README \| LICENSE

M	vet/api.py	\|	33	+++++++++++++++++++++++----------
A	vet/cli/config/cli_config_consistency_test.py	\|	41	+++++++++++++++++++++++++++++++++++++++++
M	vet/cli/config/cli_config_schema.py	\|	2	++
M	vet/cli/main.py	\|	49	+++++++++++++++++++++++++++++++++++++++++++++----
M	vet/git.py	\|	4	++--
M	vet/repo_utils.py	\|	39	+++++++++++++++++++++++++++++++++------
M	vet/repo_utils_test.py	\|	41	+++++++++++++++++++++++++++++++++++++++++