Updated logging levels, added guidance about logging choices (#116) - vet - Unnamed repository; edit this file 'description' to name the repository.

commit c5379ba57a0bff38395d9ce257a8a447ca466217
parent 730a9ad72b93434e1df6157a675310180548657c
Author: andrewlaack-collab <andrew.laack@imbue.com>
Date:   Fri, 20 Feb 2026 09:31:00 +0000

Updated logging levels, added guidance about logging choices (#116)

* Updated logging levels, added guidance about logging choices

* Updated messaging

* Restate

* Updated wording

---------

Co-authored-by: Andrew Laack <andrew@laack.co>
Diffstat:
M DEVELOPMENT.md  | 14 ++++++++------
M vet/api.py  | 10 +++++-----
M vet/cli/main.py  | 8 ++++----
M vet/imbue_core/agents/llm_apis/anthropic_api.py  | 10 +++++-----
M vet/imbue_core/agents/llm_apis/gemini_api.py  | 18 +++++++++---------
M vet/imbue_core/agents/llm_apis/groq_api.py  | 10 +++++-----
M vet/imbue_core/agents/llm_apis/language_model_api.py  | 2 +-
M vet/imbue_core/agents/llm_apis/mock_api.py  | 2 +-
M vet/imbue_core/serialization.py  | 2 +-
M vet/imbue_tools/get_conversation_history/get_conversation_history.py  | 2 +-
M vet/imbue_tools/repo_utils/context_prefix.py  | 4 ++--
M vet/imbue_tools/repo_utils/context_retrieval.py  | 2 +-
M vet/issue_identifiers/common.py  | 8 ++++----
M vet/issue_identifiers/registry.py  | 2 +-

14 files changed, 48 insertions(+), 46 deletions(-)
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -147,15 +147,17 @@ Vet is published to PyPI via the `publish-pypi.yml` GitHub Actions workflow. Dep
 
 ## Development Notes
 
-### Logging Configuration
+### Logging
 
-When creating a new entrypoint into vet, you must call `ensure_core_log_levels_configured()` to register the custom log levels used throughout the codebase.
+When creating a new entry point into vet, you must call `configure_logging()` from `vet.cli.main`.
 
-```python
-from vet.imbue_core.log_utils import ensure_core_log_levels_configured
+Log level heuristics:
 
-ensure_core_log_levels_configured()
-```
+- **TRACE** - API payloads, token counts, dollar costs, agent subprocess messages.
+- **DEBUG** - Everything internal: API exceptions before re-raise, retries, fallbacks, identifier selection, history loading, context assembly. All LLM provider exception handlers must log at DEBUG before raising (see `_openai_exception_manager` for the pattern).
+- **INFO** - Top-level run lifecycle only. Do not add new INFO messages without team discussion.
+- **WARNING** - Degraded conditions: LLM content blocked/flagged, unrecognized config values, malformed user data, spend limit warnings.
+- **ERROR** - Failures that prevent producing results. Use `log_exception()` from `vet.imbue_core.async_monkey_patches` for tracebacks.
 
 ### README links
 
diff --git a/vet/api.py b/vet/api.py
@@ -34,20 +34,20 @@ def get_issues_with_raw_responses(
     extra_context: str | None = None,
 ) -> tuple[tuple[IdentifiedVerifyIssue, ...], IssueIdentificationDebugInfo, ProjectContext]:
     if not goal or not goal.strip():
-        logger.info("No goal was provided, generating one from conversation history")
+        logger.debug("No goal was provided, generating one from conversation history")
         # should be not None and not empty
         if conversation_history:
             try:
                 # TODO: we use the config here, but we may want to configure this separately
                 goal = get_goal_from_conversation(conversation_history, config.language_model_generation_config)
-                logger.info("Generated goal from conversation history: {}", goal)
+                logger.debug("Generated goal from conversation history: {}", goal)
             except Exception as e:
                 raise ConversationLoadingError(
                     f"No goal was provided and generating one from conversation history failed: {e}"
                 )
         else:
             # TODO: Consider which CLI options we should show this for (quiet, normal, verbose).
-            logger.info("No goal or conversation history provided, only goal-independent identifiers will run")
+            logger.debug("No goal or conversation history provided, only goal-independent identifiers will run")
             goal = ""
 
     lm_config = config.language_model_generation_config
@@ -68,7 +68,7 @@ def get_issues_with_raw_responses(
     )
 
     if conversation_history:
-        logger.info(
+        logger.debug(
             "Passing {} conversation history messages to identifier inputs",
             len(conversation_history),
         )
@@ -105,7 +105,7 @@ def find_issues(
     extra_context: str | None = None,
 ) -> tuple[IdentifiedVerifyIssue, ...]:
     logger.info(
-        "Finding issues in {repo_path} relative to commit hash {relative_to}",
+        "Finding issues in {repo_path} relative to {relative_to}",
         repo_path=repo_path,
         relative_to=relative_to,
     )
diff --git a/vet/cli/main.py b/vet/cli/main.py
@@ -316,16 +316,16 @@ def configure_logging(verbose: bool, quiet: bool) -> None:
 def load_conversation_from_command(command: str, cwd: Path) -> tuple:
     from vet.imbue_tools.get_conversation_history.get_conversation_history import parse_conversation_history
 
-    logger.info("Running history loader command: {}", command)
+    logger.debug("Running history loader command: {}", command)
     result = subprocess.run(command, shell=True, capture_output=True, text=True, cwd=cwd)
     if result.returncode != 0:
         logger.warning(f"History loader command failed with exit code {result.returncode}: {result.stderr}")
         return ()
     if not result.stdout.strip():
-        logger.info("History loader command returned empty output, no conversation history loaded")
+        logger.debug("History loader command returned empty output, no conversation history loaded")
         return ()
     messages = parse_conversation_history(result.stdout)
-    logger.info(
+    logger.debug(
         "Loaded {} conversation history messages from history loader command",
         len(messages),
     )
@@ -482,7 +482,7 @@ def main(argv: list[str] | None = None) -> int:
     if args.history_loader is not None:
         conversation_history = load_conversation_from_command(args.history_loader, repo_path)
     else:
-        logger.info("No history loader provided, skipping conversation history loading")
+        logger.debug("No history loader provided, skipping conversation history loading")
     extra_context = None
     if args.extra_context:
         extra_context_parts = []
diff --git a/vet/imbue_core/agents/llm_apis/anthropic_api.py b/vet/imbue_core/agents/llm_apis/anthropic_api.py
@@ -422,10 +422,10 @@ def _anthropic_exception_manager() -> Iterator[None]:
         # this can be caused by either malformed requests or transient errors, so play it safe and retry
         raise TransientLanguageModelError(str(e)) from e
     except anthropic.BadRequestError as e:
-        logger.info("BadAPIRequestError {e}", e=e)
+        logger.debug("BadAPIRequestError {e}", e=e)
         raise BadAPIRequestError(str(e)) from e
     except TypeError as e:
-        logger.info("Type error calling Anthropic API: {e}", e=e)
+        logger.debug("Type error calling Anthropic API: {e}", e=e)
         raise BadAPIRequestError(str(e)) from e
     except anthropic.APIConnectionError as e:
         raise TransientLanguageModelError(str(e)) from e
@@ -443,12 +443,12 @@ def _anthropic_exception_manager() -> Iterator[None]:
         # anthropic.APIStatusError: {'type': 'error', 'error': {'details': None, 'type': 'invalid_request_error', 'message': 'Output blocked by content filtering policy'}}
         if e.message == "Output blocked by content filtering policy":
             raise NewSeedRetriableLanguageModelError(e)
-        logger.info(str(e))
+        logger.debug(str(e))
         if e.status_code == 409 or e.status_code >= 500:
             raise TransientLanguageModelError(str(e)) from e
         raise
     except httpx.RemoteProtocolError as e:
-        logger.info(str(e))
+        logger.debug(str(e))
         raise TransientLanguageModelError("httpx.RemoteProtocolError") from e
     except (BadAPIRequestError, TransientLanguageModelError, MissingAPIKeyError):
         # we already raised this error ourselves earlier, so we don't need to mark it as unknown
@@ -809,7 +809,7 @@ class AnthropicAPI(LanguageModelAPI):
             return input_cost + output_cost
 
         except MissingCachingInfoError as e:
-            logger.info("{}; using basic cost model", e)
+            logger.debug("{}; using basic cost model", e)
             return self.basic_calculate_cost(prompt_tokens, completion_tokens)
 
 
diff --git a/vet/imbue_core/agents/llm_apis/gemini_api.py b/vet/imbue_core/agents/llm_apis/gemini_api.py
@@ -248,19 +248,19 @@ def _gemini_exception_manager() -> Iterator[None]:
     try:
         yield
     except AssertionError as e:
-        logger.info("The Gemini prompt is invalid.")
+        logger.debug("The Gemini prompt is invalid.")
         raise BadAPIRequestError(str(e)) from e
     except APIError as e:
-        logger.info("Gemini failed to generate content.")
+        logger.debug("Gemini failed to generate content.")
         raise BadAPIRequestError(str(e)) from e
     except ValueError as e:
-        logger.info("Gemini did not return a simple text response.")
+        logger.debug("Gemini did not return a simple text response.")
         raise BadAPIRequestError(str(e)) from e
     except AttributeError as e:
-        logger.info("There is an error with the Gemini prompt or processing code: {}.", str(e))
+        logger.debug("There is an error with the Gemini prompt or processing code: {}.", str(e))
         raise BadAPIRequestError(str(e)) from e
     except httpx.RemoteProtocolError as e:
-        logger.info(str(e))
+        logger.debug(str(e))
         raise TransientLanguageModelError("httpx.RemoteProtocolError") from e
     except (BadAPIRequestError, TransientLanguageModelError, MissingAPIKeyError):
         # we already raised this error ourselves earlier, so we don't need to mark it as unknown
@@ -345,7 +345,7 @@ class GeminiAPI(LanguageModelAPI):
                 and api_result.prompt_feedback.block_reason is not None
                 and api_result.prompt_feedback.block_reason != BlockedReason.BLOCKED_REASON_UNSPECIFIED
             ):
-                logger.info(
+                logger.warning(
                     f"Gemini blocked output: {messages=}, {api_result.prompt_feedback.block_reason=}, {api_result.prompt_feedback.safety_ratings=}"
                 )
                 return create_costed_language_model_response_for_single_result(
@@ -362,7 +362,7 @@ class GeminiAPI(LanguageModelAPI):
                 safety_ratings = (
                     api_result.prompt_feedback.safety_ratings if api_result.prompt_feedback is not None else None
                 )
-                logger.info(
+                logger.warning(
                     "Gemini flagged output: block_reason={block_reason}, safety_ratings={safety_ratings}",
                     block_reason=block_reason,
                     safety_ratings=safety_ratings,
@@ -387,7 +387,7 @@ class GeminiAPI(LanguageModelAPI):
             if finish_reason not in [FinishReason.MAX_TOKENS, FinishReason.STOP]:
                 block_reason = fmap(lambda x: x.block_reason, api_result.prompt_feedback)
                 safety_ratings = fmap(lambda x: x.safety_ratings, api_result.prompt_feedback)
-                logger.info(
+                logger.warning(
                     f"Gemini did not return a simple text response, {block_reason=}, {safety_ratings=}, {finish_reason=}, {candidate.safety_ratings=}"
                 )
                 return create_costed_language_model_response_for_single_result(
@@ -418,7 +418,7 @@ class GeminiAPI(LanguageModelAPI):
                     raise BadAPIRequestError(
                         "Gemini ran out of tokens while thinking and did not return a text response"
                     )
-                logger.info("Non-simple-text response: {}", api_result)
+                logger.warning("Non-simple-text response: {}", api_result)
                 raise BadAPIRequestError("Gemini did not return a simple text response (text is None)")
 
             prompt_tokens = (
diff --git a/vet/imbue_core/agents/llm_apis/groq_api.py b/vet/imbue_core/agents/llm_apis/groq_api.py
@@ -181,16 +181,16 @@ def _groq_exception_manager() -> Iterator[None]:
     try:
         yield
     except BadRequestError as e:
-        logger.info("BadAPIRequestError {}", e)
+        logger.debug("BadAPIRequestError {}", e)
         raise BadAPIRequestError(str(e)) from e
     except APIConnectionError as e:
-        logger.info("Rate limited? Received APIConnectionError {}", e)
+        logger.debug("Rate limited? Received APIConnectionError {}", e)
         raise TransientLanguageModelError("APIConnectionError") from e
     except RateLimitError as e:
-        logger.info("Rate limited? {}", e)
+        logger.debug("Rate limited? {}", e)
         raise TransientLanguageModelError("RateLimitError") from e
     except httpx.RemoteProtocolError as e:
-        logger.info("{}", e)
+        logger.debug("{}", e)
         raise TransientLanguageModelError("httpx.RemoteProtocolError") from e
     except APIError as e:
         if e.body["code"] == "context_length_exceeded":  # type: ignore
@@ -315,7 +315,7 @@ class GroqChatAPI(LanguageModelAPI):
                     presence_penalty=self.presence_penalty,
                 )
             assert isinstance(api_result, AsyncStream)
-            logger.info("API response status code: {}", api_result.response.status_code)
+            logger.debug("API response status code: {}", api_result.response.status_code)
 
             yield LanguageModelStreamStartEvent()
 
diff --git a/vet/imbue_core/agents/llm_apis/language_model_api.py b/vet/imbue_core/agents/llm_apis/language_model_api.py
@@ -517,7 +517,7 @@ class LanguageModelAPI(abc.ABC, MutableModel):
         caching_info: CachingInfo | None = None,
     ) -> float:
         """Overridden by subclasses which have more complex cost calculations, such as if caching is used."""
-        logger.info(
+        logger.debug(
             f"no calculate_cost implemented for {self.model_name}; using basic_calculate_cost",
             model_name=self.model_name,
         )
diff --git a/vet/imbue_core/agents/llm_apis/mock_api.py b/vet/imbue_core/agents/llm_apis/mock_api.py
@@ -132,7 +132,7 @@ class FileBasedLanguageModelMock(LanguageModelMock):
         # TODO: currently the identifier is the last user message, because the entire prompt is really long
         #  if we need to support the same user message with different responses, expand this, maybe chat history?
         identifier = self._get_user_message_from_prompt(prompt)
-        logger.info("Getting response for identifier: {} from {}", identifier, toml_dict)
+        logger.debug("Getting response for identifier: {} from {}", identifier, toml_dict)
         toml_item = toml_dict.get(identifier, None)
         if toml_item is None:
             for toml_key, response in toml_dict.items():
diff --git a/vet/imbue_core/serialization.py b/vet/imbue_core/serialization.py
@@ -178,7 +178,7 @@ class FrozenSerializer(Serializer):
     ) -> list[object]:
         if isinstance(obj, list):
             if self._allow_unsafe_list_serialization:
-                logger.info("Converting list to tuple for serialization: {}", obj)
+                logger.debug("Converting list to tuple for serialization: {}", obj)
                 obj = tuple(obj)
             else:
                 raise Exception(f"Lists are not allowed for serialization. Use tuples instead. Current iterable: {obj}")
diff --git a/vet/imbue_tools/get_conversation_history/get_conversation_history.py b/vet/imbue_tools/get_conversation_history/get_conversation_history.py
@@ -64,7 +64,7 @@ def parse_conversation_history(
             # deserialize the message with pydantic
             message: ConversationMessageUnion = TypeAdapter(ConversationMessageUnion).validate_json(line)
         except ValidationError:
-            logger.info("Skipping malformed history line {}", line)
+            logger.warning("Skipping malformed history line {}", line)
             continue
         messages.append(message)
     return tuple(messages)
diff --git a/vet/imbue_tools/repo_utils/context_prefix.py b/vet/imbue_tools/repo_utils/context_prefix.py
@@ -563,11 +563,11 @@ def get_repo_context(
                 tokens_to_reserve=tokens_to_reserve,
                 template=template,
             )
-            logger.info("Selected subrepo context strategy: {}", subrepo_context_strategy.label)
+            logger.debug("Selected subrepo context strategy: {}", subrepo_context_strategy.label)
 
             if subrepo_context_strategy.label == SubrepoContextStrategyType.NOTHING:
                 # log a message if we have to use the NOTHING strategy, but still proceed with the call
-                logger.info("Selected NOTHING subrepo context strategy; hopefully this doesn't happen too often!")
+                logger.debug("Selected NOTHING subrepo context strategy; hopefully this doesn't happen too often!")
 
             return SubrepoContextWithFormattedContext(
                 formatted_repo_context=repo_context_str,
diff --git a/vet/imbue_tools/repo_utils/context_retrieval.py b/vet/imbue_tools/repo_utils/context_retrieval.py
@@ -115,7 +115,7 @@ class RepoContextManager:
 
             elif entry.type == ObjectType.COMMIT:
                 # A COMMIT object indicates a submodule, which we do not traverse for the time being.
-                logger.info("Skipping submodule in repo context: {}", entry.name)
+                logger.debug("Skipping submodule in repo context: {}", entry.name)
 
             else:
                 raise ValueError(f"Unexpected entry type in git tree: {entry.type}")
diff --git a/vet/issue_identifiers/common.py b/vet/issue_identifiers/common.py
@@ -205,7 +205,7 @@ def get_agent_options(cwd: Path | None, model_name: str, agent_harness_type: Age
     # but if we do, this should be refactored into a registry or factory pattern.
     if agent_harness_type == AgentHarnessType.CODEX:
         if model_name in _ANTHROPIC_MODEL_NAMES:
-            logger.info(
+            logger.debug(
                 "Config model_name {config_model_name} is an Anthropic model, using default Codex model ({model_name}).",
                 config_model_name=model_name,
                 model_name=_DEFAULT_CODEX_MODEL,
@@ -217,14 +217,14 @@ def get_agent_options(cwd: Path | None, model_name: str, agent_harness_type: Age
             sandbox_mode="read-only",
         )
     if model_name in _OPENAI_MODEL_NAMES:
-        logger.info(
+        logger.debug(
             "Config model_name {config_model_name} is an OpenAI model, using default Claude model ({model_name}).",
             config_model_name=model_name,
             model_name=_DEFAULT_CLAUDE_MODEL,
         )
         model_name = _DEFAULT_CLAUDE_MODEL
     elif model_name not in _ANTHROPIC_MODEL_NAMES:
-        logger.info(
+        logger.warning(
             "Config model_name {config_model_name} is not a valid Anthropic model, using default ({model_name}).",
             config_model_name=model_name,
             model_name=_DEFAULT_CLAUDE_MODEL,
@@ -287,7 +287,7 @@ def extract_invocation_info_from_costed_response(
                     caching_info.provider_specific_data.written_5m + caching_info.provider_specific_data.written_1h
                 )
             else:
-                logger.info(
+                logger.debug(
                     "Not recording caching info for provider specific data type {}",
                     type(caching_info.provider_specific_data),
                 )
diff --git a/vet/issue_identifiers/registry.py b/vet/issue_identifiers/registry.py
@@ -232,7 +232,7 @@ def run(
 
         issue_generators.append(_generate_with_name_in_debug_info(identifier_name, filtered_results_generator))
 
-    logger.info(
+    logger.debug(
         "Using the following issue identifiers compatible with the input: {}",
         ", ".join([n for n in compatible_enabled_identifier_names]),
     )

	vet Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README \| LICENSE

M	DEVELOPMENT.md	\|	14	++++++++------
M	vet/api.py	\|	10	+++++-----
M	vet/cli/main.py	\|	8	++++----
M	vet/imbue_core/agents/llm_apis/anthropic_api.py	\|	10	+++++-----
M	vet/imbue_core/agents/llm_apis/gemini_api.py	\|	18	+++++++++---------
M	vet/imbue_core/agents/llm_apis/groq_api.py	\|	10	+++++-----
M	vet/imbue_core/agents/llm_apis/language_model_api.py	\|	2	+-
M	vet/imbue_core/agents/llm_apis/mock_api.py	\|	2	+-
M	vet/imbue_core/serialization.py	\|	2	+-
M	vet/imbue_tools/get_conversation_history/get_conversation_history.py	\|	2	+-
M	vet/imbue_tools/repo_utils/context_prefix.py	\|	4	++--
M	vet/imbue_tools/repo_utils/context_retrieval.py	\|	2	+-
M	vet/issue_identifiers/common.py	\|	8	++++----
M	vet/issue_identifiers/registry.py	\|	2	+-