vet

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit c00c9163ea52fbeefc80126da72456002fff9c41
parent 5942508b8dc7330551edeea6519674d055e41317
Author: Andrew Laack <andrew@laack.co>
Date:   Fri,  6 Mar 2026 23:34:09 +0000

Merge pull request #176 from imbue-ai/andrew/fix-submodule-diff-application

Fix DiffApplicationError crash when diffs contain submodule changes
Diffstat:
Mvet/repo_utils.py | 36++++++++++++++++++++++++++++++++++--
Mvet/repo_utils_test.py | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 131 insertions(+), 2 deletions(-)

diff --git a/vet/repo_utils.py b/vet/repo_utils.py @@ -1,3 +1,4 @@ +import re from pathlib import Path from vet.errors import GitCommandError @@ -13,6 +14,29 @@ from vet.imbue_core.async_monkey_patches import log_exception VET_MAX_PROMPT_TOKENS = 10000 +def strip_submodule_diffs(diff_string: str) -> str: + if not diff_string: + return diff_string + + sections = re.split(r"(?=^diff --git )", diff_string, flags=re.MULTILINE) + + filtered: list[str] = [] + for section in sections: + if not section.startswith("diff --git "): + filtered.append(section) + continue + + hunk_start = section.find("\n@@ ") + header = section[:hunk_start] if hunk_start != -1 else section + + if " 160000" in header: + continue + + filtered.append(section) + + return "".join(filtered) + + def get_code_to_check(relative_to: str, repo_path: Path, only_staged: bool = False) -> tuple[str, str, str]: """ Returns: @@ -47,7 +71,11 @@ def get_code_to_check(relative_to: str, repo_path: Path, only_staged: bool = Fal # error information. raise GitCommandError(e, "get staged diff or determine HEAD commit", repo_path) from e - return base_commit, combined_diff, combined_diff_no_binary + return ( + base_commit, + strip_submodule_diffs(combined_diff), + strip_submodule_diffs(combined_diff_no_binary), + ) try: base_commit = find_relative_to_commit_hash(relative_to, repo_path=repo_path) @@ -106,4 +134,8 @@ def get_code_to_check(relative_to: str, repo_path: Path, only_staged: bool = Fal if untracked_diffs_no_binary: combined_diff_no_binary += "\n" + "\n".join(untracked_diffs_no_binary) - return base_commit, combined_diff, combined_diff_no_binary + return ( + base_commit, + strip_submodule_diffs(combined_diff), + strip_submodule_diffs(combined_diff_no_binary), + ) diff --git a/vet/repo_utils_test.py b/vet/repo_utils_test.py @@ -9,6 +9,7 @@ from vet.imbue_core.nested_evolver import chill from vet.imbue_core.nested_evolver import evolver from vet.imbue_tools.repo_utils.project_context import LazyProjectContext from vet.repo_utils import get_code_to_check +from vet.repo_utils import strip_submodule_diffs def test_get_code_to_check(simple_test_git_repo: Path) -> None: @@ -136,3 +137,99 @@ def test_get_code_to_check_staged_only(simple_test_git_repo: Path) -> None: # Unstaged and untracked changes should NOT be present assert "unstaged content" not in diff assert "untracked.txt" not in diff + + +_REGULAR_FILE_DIFF = """\ +diff --git a/src/main.py b/src/main.py +index abc1234..def5678 100644 +--- a/src/main.py ++++ b/src/main.py +@@ -1,3 +1,4 @@ + import os ++import sys + + def main(): +""" + +_NEW_SUBMODULE_DIFF = """\ +diff --git a/libs/external b/libs/external +new file mode 160000 +index 0000000..abc1234 +--- /dev/null ++++ b/libs/external +@@ -0,0 +1 @@ ++Subproject commit abc1234567890abcdef1234567890abcdef123456 +""" + +_DELETED_SUBMODULE_DIFF = """\ +diff --git a/vendor/old b/vendor/old +deleted file mode 160000 +index abc1234..0000000 +--- a/vendor/old ++++ /dev/null +@@ -1 +0,0 @@ +-Subproject commit abc1234567890abcdef1234567890abcdef123456 +""" + +_UPDATED_SUBMODULE_DIFF = """\ +diff --git a/libs/shared b/libs/shared +index abc1234..def5678 160000 +--- a/libs/shared ++++ b/libs/shared +@@ -1 +1 @@ +-Subproject commit abc1234567890abcdef1234567890abcdef123456 ++Subproject commit def567890abcdef1234567890abcdef1234567890 +""" + + +def test_strip_submodule_diffs_empty() -> None: + assert strip_submodule_diffs("") == "" + + +def test_strip_submodule_diffs_no_submodules() -> None: + assert strip_submodule_diffs(_REGULAR_FILE_DIFF) == _REGULAR_FILE_DIFF + + +def test_strip_submodule_diffs_removes_new_submodule() -> None: + combined = _REGULAR_FILE_DIFF + _NEW_SUBMODULE_DIFF + assert strip_submodule_diffs(combined) == _REGULAR_FILE_DIFF + + +def test_strip_submodule_diffs_removes_deleted_submodule() -> None: + combined = _DELETED_SUBMODULE_DIFF + _REGULAR_FILE_DIFF + assert strip_submodule_diffs(combined) == _REGULAR_FILE_DIFF + + +def test_strip_submodule_diffs_removes_updated_submodule() -> None: + combined = _REGULAR_FILE_DIFF + _UPDATED_SUBMODULE_DIFF + assert strip_submodule_diffs(combined) == _REGULAR_FILE_DIFF + + +def test_strip_submodule_diffs_only_submodules() -> None: + combined = _NEW_SUBMODULE_DIFF + _DELETED_SUBMODULE_DIFF + _UPDATED_SUBMODULE_DIFF + assert strip_submodule_diffs(combined) == "" + + +def test_strip_submodule_diffs_preserves_files_in_submodule_path() -> None: + file_in_submodule_dir = """\ +diff --git a/libs/external/.gitignore b/libs/external/.gitignore +deleted file mode 100644 +index abc1234..0000000 +--- a/libs/external/.gitignore ++++ /dev/null +@@ -1,2 +0,0 @@ +-target/ +-.cache/ +""" + combined = _NEW_SUBMODULE_DIFF + file_in_submodule_dir + _REGULAR_FILE_DIFF + result = strip_submodule_diffs(combined) + assert file_in_submodule_dir in result + assert _REGULAR_FILE_DIFF in result + assert _NEW_SUBMODULE_DIFF not in result + + +def test_strip_submodule_diffs_preserves_preamble() -> None: + preamble = "some preamble text\n" + combined = preamble + _NEW_SUBMODULE_DIFF + _REGULAR_FILE_DIFF + result = strip_submodule_diffs(combined) + assert result == preamble + _REGULAR_FILE_DIFF