commit c00c9163ea52fbeefc80126da72456002fff9c41
parent 5942508b8dc7330551edeea6519674d055e41317
Author: Andrew Laack <andrew@laack.co>
Date: Fri, 6 Mar 2026 23:34:09 +0000
Merge pull request #176 from imbue-ai/andrew/fix-submodule-diff-application
Fix DiffApplicationError crash when diffs contain submodule changes
Diffstat:
2 files changed, 131 insertions(+), 2 deletions(-)
diff --git a/vet/repo_utils.py b/vet/repo_utils.py
@@ -1,3 +1,4 @@
+import re
from pathlib import Path
from vet.errors import GitCommandError
@@ -13,6 +14,29 @@ from vet.imbue_core.async_monkey_patches import log_exception
VET_MAX_PROMPT_TOKENS = 10000
+def strip_submodule_diffs(diff_string: str) -> str:
+ if not diff_string:
+ return diff_string
+
+ sections = re.split(r"(?=^diff --git )", diff_string, flags=re.MULTILINE)
+
+ filtered: list[str] = []
+ for section in sections:
+ if not section.startswith("diff --git "):
+ filtered.append(section)
+ continue
+
+ hunk_start = section.find("\n@@ ")
+ header = section[:hunk_start] if hunk_start != -1 else section
+
+ if " 160000" in header:
+ continue
+
+ filtered.append(section)
+
+ return "".join(filtered)
+
+
def get_code_to_check(relative_to: str, repo_path: Path, only_staged: bool = False) -> tuple[str, str, str]:
"""
Returns:
@@ -47,7 +71,11 @@ def get_code_to_check(relative_to: str, repo_path: Path, only_staged: bool = Fal
# error information.
raise GitCommandError(e, "get staged diff or determine HEAD commit", repo_path) from e
- return base_commit, combined_diff, combined_diff_no_binary
+ return (
+ base_commit,
+ strip_submodule_diffs(combined_diff),
+ strip_submodule_diffs(combined_diff_no_binary),
+ )
try:
base_commit = find_relative_to_commit_hash(relative_to, repo_path=repo_path)
@@ -106,4 +134,8 @@ def get_code_to_check(relative_to: str, repo_path: Path, only_staged: bool = Fal
if untracked_diffs_no_binary:
combined_diff_no_binary += "\n" + "\n".join(untracked_diffs_no_binary)
- return base_commit, combined_diff, combined_diff_no_binary
+ return (
+ base_commit,
+ strip_submodule_diffs(combined_diff),
+ strip_submodule_diffs(combined_diff_no_binary),
+ )
diff --git a/vet/repo_utils_test.py b/vet/repo_utils_test.py
@@ -9,6 +9,7 @@ from vet.imbue_core.nested_evolver import chill
from vet.imbue_core.nested_evolver import evolver
from vet.imbue_tools.repo_utils.project_context import LazyProjectContext
from vet.repo_utils import get_code_to_check
+from vet.repo_utils import strip_submodule_diffs
def test_get_code_to_check(simple_test_git_repo: Path) -> None:
@@ -136,3 +137,99 @@ def test_get_code_to_check_staged_only(simple_test_git_repo: Path) -> None:
# Unstaged and untracked changes should NOT be present
assert "unstaged content" not in diff
assert "untracked.txt" not in diff
+
+
+_REGULAR_FILE_DIFF = """\
+diff --git a/src/main.py b/src/main.py
+index abc1234..def5678 100644
+--- a/src/main.py
++++ b/src/main.py
+@@ -1,3 +1,4 @@
+ import os
++import sys
+
+ def main():
+"""
+
+_NEW_SUBMODULE_DIFF = """\
+diff --git a/libs/external b/libs/external
+new file mode 160000
+index 0000000..abc1234
+--- /dev/null
++++ b/libs/external
+@@ -0,0 +1 @@
++Subproject commit abc1234567890abcdef1234567890abcdef123456
+"""
+
+_DELETED_SUBMODULE_DIFF = """\
+diff --git a/vendor/old b/vendor/old
+deleted file mode 160000
+index abc1234..0000000
+--- a/vendor/old
++++ /dev/null
+@@ -1 +0,0 @@
+-Subproject commit abc1234567890abcdef1234567890abcdef123456
+"""
+
+_UPDATED_SUBMODULE_DIFF = """\
+diff --git a/libs/shared b/libs/shared
+index abc1234..def5678 160000
+--- a/libs/shared
++++ b/libs/shared
+@@ -1 +1 @@
+-Subproject commit abc1234567890abcdef1234567890abcdef123456
++Subproject commit def567890abcdef1234567890abcdef1234567890
+"""
+
+
+def test_strip_submodule_diffs_empty() -> None:
+ assert strip_submodule_diffs("") == ""
+
+
+def test_strip_submodule_diffs_no_submodules() -> None:
+ assert strip_submodule_diffs(_REGULAR_FILE_DIFF) == _REGULAR_FILE_DIFF
+
+
+def test_strip_submodule_diffs_removes_new_submodule() -> None:
+ combined = _REGULAR_FILE_DIFF + _NEW_SUBMODULE_DIFF
+ assert strip_submodule_diffs(combined) == _REGULAR_FILE_DIFF
+
+
+def test_strip_submodule_diffs_removes_deleted_submodule() -> None:
+ combined = _DELETED_SUBMODULE_DIFF + _REGULAR_FILE_DIFF
+ assert strip_submodule_diffs(combined) == _REGULAR_FILE_DIFF
+
+
+def test_strip_submodule_diffs_removes_updated_submodule() -> None:
+ combined = _REGULAR_FILE_DIFF + _UPDATED_SUBMODULE_DIFF
+ assert strip_submodule_diffs(combined) == _REGULAR_FILE_DIFF
+
+
+def test_strip_submodule_diffs_only_submodules() -> None:
+ combined = _NEW_SUBMODULE_DIFF + _DELETED_SUBMODULE_DIFF + _UPDATED_SUBMODULE_DIFF
+ assert strip_submodule_diffs(combined) == ""
+
+
+def test_strip_submodule_diffs_preserves_files_in_submodule_path() -> None:
+ file_in_submodule_dir = """\
+diff --git a/libs/external/.gitignore b/libs/external/.gitignore
+deleted file mode 100644
+index abc1234..0000000
+--- a/libs/external/.gitignore
++++ /dev/null
+@@ -1,2 +0,0 @@
+-target/
+-.cache/
+"""
+ combined = _NEW_SUBMODULE_DIFF + file_in_submodule_dir + _REGULAR_FILE_DIFF
+ result = strip_submodule_diffs(combined)
+ assert file_in_submodule_dir in result
+ assert _REGULAR_FILE_DIFF in result
+ assert _NEW_SUBMODULE_DIFF not in result
+
+
+def test_strip_submodule_diffs_preserves_preamble() -> None:
+ preamble = "some preamble text\n"
+ combined = preamble + _NEW_SUBMODULE_DIFF + _REGULAR_FILE_DIFF
+ result = strip_submodule_diffs(combined)
+ assert result == preamble + _REGULAR_FILE_DIFF