commit 5942508b8dc7330551edeea6519674d055e41317
parent 0e5a25d0b52640200025059a48e929824cfa2b89
Author: Andrew Laack <andrew@laack.co>
Date: Fri, 6 Mar 2026 19:14:36 +0000
Merge pull request #169 from yashdive/vet-against-staged-changes-only
added feature to run vet against staged changes only
Diffstat:
7 files changed, 187 insertions(+), 22 deletions(-)
diff --git a/vet/api.py b/vet/api.py
@@ -103,21 +103,34 @@ def find_issues(
config: VetConfig,
conversation_history: tuple[ConversationMessageUnion, ...] | None = None,
extra_context: str | None = None,
+ only_staged: bool = False,
) -> tuple[IdentifiedVerifyIssue, ...]:
- logger.debug(
- "Finding issues in {repo_path} relative to {relative_to}",
- repo_path=repo_path,
- relative_to=relative_to,
- )
-
- base_commit, diff, diff_no_binary = get_code_to_check(relative_to, repo_path)
- if not diff.strip():
+ if only_staged:
logger.debug(
- "No code changes detected in repo {repo_path} since the specified relative_to commit {relative_to}, skipping issue identification",
+ "Finding issues in {repo_path} (staged changes)",
+ repo_path=repo_path,
+ )
+ else:
+ logger.debug(
+ "Finding issues in {repo_path} relative to {relative_to}",
repo_path=repo_path,
relative_to=relative_to,
)
- # No code changes detected since the specified relative_to commit, so no issues to find.
+
+ base_commit, diff, diff_no_binary = get_code_to_check(relative_to, repo_path, only_staged=only_staged)
+ if not diff.strip():
+ if only_staged:
+ logger.debug(
+ "No code changes detected in repo {repo_path} for staged changes, skipping issue identification",
+ repo_path=repo_path,
+ )
+ else:
+ logger.debug(
+ "No code changes detected in repo {repo_path} since the specified relative_to commit {relative_to}, skipping issue identification",
+ repo_path=repo_path,
+ relative_to=relative_to,
+ )
+ # No code changes detected, so no issues to find.
return tuple()
issues, _, _ = get_issues_with_raw_responses(
diff --git a/vet/cli/config/cli_config_consistency_test.py b/vet/cli/config/cli_config_consistency_test.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+from vet.cli.config.cli_config_schema import CliConfigPreset
+from vet.cli.config.cli_config_schema import CliDefaults
+from vet.cli.main import create_parser
+
+IGNORED_ARGS = {
+ "help",
+ "version",
+ # CLI-only flags that select behavior rather than configure defaults
+ "config",
+ "list_configs",
+ "list_models",
+ "list_issue_codes",
+ "list_fields",
+ # agent-mode flags are intentionally CLI-only
+ "agentic",
+ "agent_harness",
+ "update_models",
+}
+
+
+def _extract_cli_arg_dests() -> set[str]:
+ parser = create_parser()
+ return {action.dest for action in parser._actions if action.dest and action.dest not in IGNORED_ARGS}
+
+
+def test_cli_args_present_in_cli_defaults_and_presets() -> None:
+ """Ensure every CLI argument that is meant to be configurable
+ appears in both `CliDefaults` and `CliConfigPreset`.
+ """
+ cli_args = _extract_cli_arg_dests()
+
+ defaults_fields = set(CliDefaults.model_fields.keys())
+ preset_fields = set(CliConfigPreset.model_fields.keys())
+
+ missing_in_defaults = cli_args - defaults_fields
+ missing_in_presets = cli_args - preset_fields
+
+ assert not missing_in_defaults, f"CLI args missing from CliDefaults: {sorted(missing_in_defaults)}"
+ assert not missing_in_presets, f"CLI args missing from CliConfigPreset: {sorted(missing_in_presets)}"
diff --git a/vet/cli/config/cli_config_schema.py b/vet/cli/config/cli_config_schema.py
@@ -26,6 +26,7 @@ class CliConfigPreset(BaseModel):
goal: str | None = None
repo: str | None = None
base_commit: str | None = None
+ staged: bool | None = None
history_loader: str | None = None
extra_context: list[str] | None = None
enabled_issue_codes: list[str] | None = None
@@ -51,6 +52,7 @@ class CliDefaults(BaseModel):
goal: str | None = None
repo: str | None = None
base_commit: str = "HEAD"
+ staged: bool = False
history_loader: str | None = None
extra_context: list[str] | None = None
enabled_issue_codes: list[str] | None = None
diff --git a/vet/cli/main.py b/vet/cli/main.py
@@ -44,6 +44,7 @@ def create_parser() -> argparse.ArgumentParser:
prog="vet",
description="Identify issues in code changes using LLM-based analysis.",
formatter_class=argparse.RawDescriptionHelpFormatter,
+ allow_abbrev=False,
)
parser.add_argument(
@@ -96,6 +97,8 @@ def create_parser() -> argparse.ArgumentParser:
metavar="REF",
help=f"Git commit, branch, or ref to use as the base for computing the diff (default: {CLI_DEFAULTS.base_commit})",
)
+ # By default, vet includes all changes (staged, unstaged, and untracked). With --staged, only staged changes are included.
+ diff_group.add_argument("--staged", action="store_true", help="Only analyze staged changes")
context_group = parser.add_argument_group("context options")
context_group.add_argument(
@@ -347,6 +350,25 @@ def list_configs(cli_configs: dict[str, CliConfigPreset], repo_path: Path) -> No
print()
+def _validate_staged_related_options(args: argparse.Namespace, base_commit_cli_specified: bool) -> str | None:
+ """Validate options related to staged analysis.
+
+ Returns an error message string when validation fails (caller should print
+ it to stderr and return an exit code of 2), otherwise returns None.
+ """
+ if args.staged and base_commit_cli_specified:
+ # Only treat --base-commit as conflicting if explicitly provided on the CLI.
+ # Config/default values (e.g. "main") should not trigger an error because
+ # staged mode intentionally ignores base commits.
+ return "vet: --staged and --base-commit are mutually exclusive"
+
+ if args.staged and args.agentic:
+ # Sanity check to prevent users from accidentally combining incompatible modes.
+ return "vet: --staged and --agentic are mutually exclusive"
+
+ return None
+
+
_DEFAULT_LOG_FILE = Path(os.environ.get("XDG_STATE_HOME", Path.home() / ".local" / "state")) / "vet" / "vet.log"
@@ -433,6 +455,13 @@ def main(argv: list[str] | None = None) -> int:
parser = create_parser()
args = parser.parse_args(argv)
+ # Determine whether the user explicitly provided `--base-commit` on the
+ # command line. `CLI_DEFAULTS.base_commit` may be non-empty (e.g. "main")
+ # coming from config or defaults; we must only treat an explicit CLI
+ # `--base-commit` as conflicting with staged mode.
+ raw_argv = argv if argv is not None else sys.argv[1:]
+ base_commit_cli_specified = any(a == "--base-commit" or a.startswith("--base-commit=") for a in raw_argv)
+
# Handle subcommands that don't need config loading.
if args.update_models:
try:
@@ -521,6 +550,11 @@ def main(argv: list[str] | None = None) -> int:
)
return 2
+ staged_err = _validate_staged_related_options(args, base_commit_cli_specified)
+ if staged_err is not None:
+ print(staged_err, file=sys.stderr)
+ return 2
+
if args.verbose and args.quiet:
print(
"vet: --verbose and --quiet are mutually exclusive",
@@ -651,10 +685,16 @@ def main(argv: list[str] | None = None) -> int:
)
if not args.quiet:
- print(
- f"analyzing {repo_path} (relative to {args.base_commit})",
- file=sys.stderr,
- )
+ if args.staged:
+ print(
+ f"analyzing {repo_path} (staged changes)",
+ file=sys.stderr,
+ )
+ else:
+ print(
+ f"analyzing {repo_path} (relative to {args.base_commit})",
+ file=sys.stderr,
+ )
try:
issues = find_issues(
@@ -664,6 +704,7 @@ def main(argv: list[str] | None = None) -> int:
config=config,
conversation_history=conversation_history,
extra_context=extra_context,
+ only_staged=args.staged,
)
except AgentCLINotFoundError as e:
print(f"vet: {e}", file=sys.stderr)
diff --git a/vet/git.py b/vet/git.py
@@ -113,7 +113,7 @@ class SyncLocalGitRepo:
def get_git_diff(
self,
commit_hash: str | None = None,
- staged: bool = False,
+ only_staged: bool = False,
is_error_logged: bool = True,
include_binary: bool = True,
) -> str:
@@ -124,7 +124,7 @@ class SyncLocalGitRepo:
# Without --binary, diffs of binary files will just contain a summary statement such as "Binary files a/file.bin and b/file.bin differ".
# Such diffs cannot be applied, but are useful for inclusion in LLM prompts.
command.append("--binary")
- if staged:
+ if only_staged:
command.append("--staged")
if commit_hash:
command.append(commit_hash)
diff --git a/vet/repo_utils.py b/vet/repo_utils.py
@@ -13,13 +13,42 @@ from vet.imbue_core.async_monkey_patches import log_exception
VET_MAX_PROMPT_TOKENS = 10000
-def get_code_to_check(relative_to: str, repo_path: Path) -> tuple[str, str, str]:
+def get_code_to_check(relative_to: str, repo_path: Path, only_staged: bool = False) -> tuple[str, str, str]:
"""
Returns:
- - The commit hash to use as the base commit for the diff.
- - The combined diff including staged, unstaged, and untracked changes. (compatible with `git apply`)
- - The combined diff but with binary diffs shortened. (cannot be applied if binary changes are present)
+ - The commit hash to use as the base commit for the diff. When `only_staged` is True
+ this will be the current HEAD (staged-only mode ignores `relative_to`).
+ - The combined diff. When `only_staged` is False this includes staged, unstaged,
+ and untracked changes (compatible with `git apply`). When `only_staged` is True
+ this includes only staged changes.
+ - The combined diff with binary diffs shortened (cannot be applied if binary
+ changes are present). When `only_staged` is True this is generated from staged
+ changes only.
+ Note: When `only_staged` is True the `relative_to` argument is ignored; staged-only
+ analysis does not attempt to resolve or use the configured base commit.
"""
+ repo = SyncLocalGitRepo(repo_path)
+
+ if only_staged:
+ # In staged mode we ignore `relative_to` entirely. Avoid resolving the
+ # configured base commit since it may refer to a branch/ref that doesn't
+ # exist in the current working copy (e.g., config sets `main`). This
+ # prevents unnecessary git errors when the user explicitly requested
+ # staged-only analysis.
+ try:
+ combined_diff = repo.get_git_diff(only_staged=True)
+ combined_diff_no_binary = repo.get_git_diff(only_staged=True, include_binary=False)
+ # No untracked files in staged mode. Use HEAD as the base commit for
+ # consistency with non-staged behavior.
+ base_commit = repo.run_git(["rev-parse", "HEAD"])
+ except RunCommandError as e:
+ # If either obtaining the staged diff or resolving HEAD fails,
+ # surface a wrapped GitCommandError so callers receive uniform
+ # error information.
+ raise GitCommandError(e, "get staged diff or determine HEAD commit", repo_path) from e
+
+ return base_commit, combined_diff, combined_diff_no_binary
+
try:
base_commit = find_relative_to_commit_hash(relative_to, repo_path=repo_path)
except RunCommandError as e:
@@ -29,8 +58,6 @@ def get_code_to_check(relative_to: str, repo_path: Path) -> tuple[str, str, str]
repo_path,
) from e
- repo = SyncLocalGitRepo(repo_path)
-
# Get the combined diff which includes all changes; staged, unstaged, and untracked.
try:
combined_diff = repo.get_git_diff(commit_hash=base_commit)
diff --git a/vet/repo_utils_test.py b/vet/repo_utils_test.py
@@ -95,3 +95,44 @@ def test_build_context(simple_test_git_repo: Path, snapshot: SnapshotAssertion)
)
project_context_without_repo_path = chill(project_context_evolver)
assert project_context_without_repo_path == snapshot
+
+
+def test_get_code_to_check_staged_only(simple_test_git_repo: Path) -> None:
+ """When `only_staged=True`, only staged changes should be returned (no unstaged/untracked),
+ and resolving a configured `relative_to` (like 'main') should not error.
+ """
+ repo_path = simple_test_git_repo
+
+ # Record current HEAD
+ head = subprocess.run(
+ ["git", "rev-parse", "HEAD"],
+ cwd=repo_path,
+ capture_output=True,
+ text=True,
+ check=True,
+ ).stdout.strip()
+
+ # Create an untracked file
+ (repo_path / "untracked.txt").write_text("untracked content")
+
+ # Create a staged change
+ (repo_path / "file1.txt").write_text("staged content\n")
+ subprocess.run(["git", "add", "file1.txt"], cwd=repo_path, check=True)
+
+ # Create an unstaged change
+ with open((repo_path / "file1.txt"), "a+") as f:
+ f.write("\nunstaged content")
+
+ # Use a relative_to that likely doesn't exist (e.g., 'main') to ensure we don't try to resolve it
+ git_hash, diff, diff_no_binary = get_code_to_check("main", repo_path=repo_path, only_staged=True)
+
+ # In staged mode we return HEAD as the base commit
+ assert git_hash == head
+
+ # Staged change should be present
+ assert "staged content" in diff
+ assert "staged content" in diff_no_binary
+
+ # Unstaged and untracked changes should NOT be present
+ assert "unstaged content" not in diff
+ assert "untracked.txt" not in diff