commit c5379ba57a0bff38395d9ce257a8a447ca466217
parent 730a9ad72b93434e1df6157a675310180548657c
Author: andrewlaack-collab <andrew.laack@imbue.com>
Date: Fri, 20 Feb 2026 09:31:00 +0000
Updated logging levels, added guidance about logging choices (#116)
* Updated logging levels, added guidance about logging choices
* Updated messaging
* Restate
* Updated wording
---------
Co-authored-by: Andrew Laack <andrew@laack.co>
Diffstat:
14 files changed, 48 insertions(+), 46 deletions(-)
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -147,15 +147,17 @@ Vet is published to PyPI via the `publish-pypi.yml` GitHub Actions workflow. Dep
## Development Notes
-### Logging Configuration
+### Logging
-When creating a new entrypoint into vet, you must call `ensure_core_log_levels_configured()` to register the custom log levels used throughout the codebase.
+When creating a new entry point into vet, you must call `configure_logging()` from `vet.cli.main`.
-```python
-from vet.imbue_core.log_utils import ensure_core_log_levels_configured
+Log level heuristics:
-ensure_core_log_levels_configured()
-```
+- **TRACE** - API payloads, token counts, dollar costs, agent subprocess messages.
+- **DEBUG** - Everything internal: API exceptions before re-raise, retries, fallbacks, identifier selection, history loading, context assembly. All LLM provider exception handlers must log at DEBUG before raising (see `_openai_exception_manager` for the pattern).
+- **INFO** - Top-level run lifecycle only. Do not add new INFO messages without team discussion.
+- **WARNING** - Degraded conditions: LLM content blocked/flagged, unrecognized config values, malformed user data, spend limit warnings.
+- **ERROR** - Failures that prevent producing results. Use `log_exception()` from `vet.imbue_core.async_monkey_patches` for tracebacks.
### README links
diff --git a/vet/api.py b/vet/api.py
@@ -34,20 +34,20 @@ def get_issues_with_raw_responses(
extra_context: str | None = None,
) -> tuple[tuple[IdentifiedVerifyIssue, ...], IssueIdentificationDebugInfo, ProjectContext]:
if not goal or not goal.strip():
- logger.info("No goal was provided, generating one from conversation history")
+ logger.debug("No goal was provided, generating one from conversation history")
# should be not None and not empty
if conversation_history:
try:
# TODO: we use the config here, but we may want to configure this separately
goal = get_goal_from_conversation(conversation_history, config.language_model_generation_config)
- logger.info("Generated goal from conversation history: {}", goal)
+ logger.debug("Generated goal from conversation history: {}", goal)
except Exception as e:
raise ConversationLoadingError(
f"No goal was provided and generating one from conversation history failed: {e}"
)
else:
# TODO: Consider which CLI options we should show this for (quiet, normal, verbose).
- logger.info("No goal or conversation history provided, only goal-independent identifiers will run")
+ logger.debug("No goal or conversation history provided, only goal-independent identifiers will run")
goal = ""
lm_config = config.language_model_generation_config
@@ -68,7 +68,7 @@ def get_issues_with_raw_responses(
)
if conversation_history:
- logger.info(
+ logger.debug(
"Passing {} conversation history messages to identifier inputs",
len(conversation_history),
)
@@ -105,7 +105,7 @@ def find_issues(
extra_context: str | None = None,
) -> tuple[IdentifiedVerifyIssue, ...]:
logger.info(
- "Finding issues in {repo_path} relative to commit hash {relative_to}",
+ "Finding issues in {repo_path} relative to {relative_to}",
repo_path=repo_path,
relative_to=relative_to,
)
diff --git a/vet/cli/main.py b/vet/cli/main.py
@@ -316,16 +316,16 @@ def configure_logging(verbose: bool, quiet: bool) -> None:
def load_conversation_from_command(command: str, cwd: Path) -> tuple:
from vet.imbue_tools.get_conversation_history.get_conversation_history import parse_conversation_history
- logger.info("Running history loader command: {}", command)
+ logger.debug("Running history loader command: {}", command)
result = subprocess.run(command, shell=True, capture_output=True, text=True, cwd=cwd)
if result.returncode != 0:
logger.warning(f"History loader command failed with exit code {result.returncode}: {result.stderr}")
return ()
if not result.stdout.strip():
- logger.info("History loader command returned empty output, no conversation history loaded")
+ logger.debug("History loader command returned empty output, no conversation history loaded")
return ()
messages = parse_conversation_history(result.stdout)
- logger.info(
+ logger.debug(
"Loaded {} conversation history messages from history loader command",
len(messages),
)
@@ -482,7 +482,7 @@ def main(argv: list[str] | None = None) -> int:
if args.history_loader is not None:
conversation_history = load_conversation_from_command(args.history_loader, repo_path)
else:
- logger.info("No history loader provided, skipping conversation history loading")
+ logger.debug("No history loader provided, skipping conversation history loading")
extra_context = None
if args.extra_context:
extra_context_parts = []
diff --git a/vet/imbue_core/agents/llm_apis/anthropic_api.py b/vet/imbue_core/agents/llm_apis/anthropic_api.py
@@ -422,10 +422,10 @@ def _anthropic_exception_manager() -> Iterator[None]:
# this can be caused by either malformed requests or transient errors, so play it safe and retry
raise TransientLanguageModelError(str(e)) from e
except anthropic.BadRequestError as e:
- logger.info("BadAPIRequestError {e}", e=e)
+ logger.debug("BadAPIRequestError {e}", e=e)
raise BadAPIRequestError(str(e)) from e
except TypeError as e:
- logger.info("Type error calling Anthropic API: {e}", e=e)
+ logger.debug("Type error calling Anthropic API: {e}", e=e)
raise BadAPIRequestError(str(e)) from e
except anthropic.APIConnectionError as e:
raise TransientLanguageModelError(str(e)) from e
@@ -443,12 +443,12 @@ def _anthropic_exception_manager() -> Iterator[None]:
# anthropic.APIStatusError: {'type': 'error', 'error': {'details': None, 'type': 'invalid_request_error', 'message': 'Output blocked by content filtering policy'}}
if e.message == "Output blocked by content filtering policy":
raise NewSeedRetriableLanguageModelError(e)
- logger.info(str(e))
+ logger.debug(str(e))
if e.status_code == 409 or e.status_code >= 500:
raise TransientLanguageModelError(str(e)) from e
raise
except httpx.RemoteProtocolError as e:
- logger.info(str(e))
+ logger.debug(str(e))
raise TransientLanguageModelError("httpx.RemoteProtocolError") from e
except (BadAPIRequestError, TransientLanguageModelError, MissingAPIKeyError):
# we already raised this error ourselves earlier, so we don't need to mark it as unknown
@@ -809,7 +809,7 @@ class AnthropicAPI(LanguageModelAPI):
return input_cost + output_cost
except MissingCachingInfoError as e:
- logger.info("{}; using basic cost model", e)
+ logger.debug("{}; using basic cost model", e)
return self.basic_calculate_cost(prompt_tokens, completion_tokens)
diff --git a/vet/imbue_core/agents/llm_apis/gemini_api.py b/vet/imbue_core/agents/llm_apis/gemini_api.py
@@ -248,19 +248,19 @@ def _gemini_exception_manager() -> Iterator[None]:
try:
yield
except AssertionError as e:
- logger.info("The Gemini prompt is invalid.")
+ logger.debug("The Gemini prompt is invalid.")
raise BadAPIRequestError(str(e)) from e
except APIError as e:
- logger.info("Gemini failed to generate content.")
+ logger.debug("Gemini failed to generate content.")
raise BadAPIRequestError(str(e)) from e
except ValueError as e:
- logger.info("Gemini did not return a simple text response.")
+ logger.debug("Gemini did not return a simple text response.")
raise BadAPIRequestError(str(e)) from e
except AttributeError as e:
- logger.info("There is an error with the Gemini prompt or processing code: {}.", str(e))
+ logger.debug("There is an error with the Gemini prompt or processing code: {}.", str(e))
raise BadAPIRequestError(str(e)) from e
except httpx.RemoteProtocolError as e:
- logger.info(str(e))
+ logger.debug(str(e))
raise TransientLanguageModelError("httpx.RemoteProtocolError") from e
except (BadAPIRequestError, TransientLanguageModelError, MissingAPIKeyError):
# we already raised this error ourselves earlier, so we don't need to mark it as unknown
@@ -345,7 +345,7 @@ class GeminiAPI(LanguageModelAPI):
and api_result.prompt_feedback.block_reason is not None
and api_result.prompt_feedback.block_reason != BlockedReason.BLOCKED_REASON_UNSPECIFIED
):
- logger.info(
+ logger.warning(
f"Gemini blocked output: {messages=}, {api_result.prompt_feedback.block_reason=}, {api_result.prompt_feedback.safety_ratings=}"
)
return create_costed_language_model_response_for_single_result(
@@ -362,7 +362,7 @@ class GeminiAPI(LanguageModelAPI):
safety_ratings = (
api_result.prompt_feedback.safety_ratings if api_result.prompt_feedback is not None else None
)
- logger.info(
+ logger.warning(
"Gemini flagged output: block_reason={block_reason}, safety_ratings={safety_ratings}",
block_reason=block_reason,
safety_ratings=safety_ratings,
@@ -387,7 +387,7 @@ class GeminiAPI(LanguageModelAPI):
if finish_reason not in [FinishReason.MAX_TOKENS, FinishReason.STOP]:
block_reason = fmap(lambda x: x.block_reason, api_result.prompt_feedback)
safety_ratings = fmap(lambda x: x.safety_ratings, api_result.prompt_feedback)
- logger.info(
+ logger.warning(
f"Gemini did not return a simple text response, {block_reason=}, {safety_ratings=}, {finish_reason=}, {candidate.safety_ratings=}"
)
return create_costed_language_model_response_for_single_result(
@@ -418,7 +418,7 @@ class GeminiAPI(LanguageModelAPI):
raise BadAPIRequestError(
"Gemini ran out of tokens while thinking and did not return a text response"
)
- logger.info("Non-simple-text response: {}", api_result)
+ logger.warning("Non-simple-text response: {}", api_result)
raise BadAPIRequestError("Gemini did not return a simple text response (text is None)")
prompt_tokens = (
diff --git a/vet/imbue_core/agents/llm_apis/groq_api.py b/vet/imbue_core/agents/llm_apis/groq_api.py
@@ -181,16 +181,16 @@ def _groq_exception_manager() -> Iterator[None]:
try:
yield
except BadRequestError as e:
- logger.info("BadAPIRequestError {}", e)
+ logger.debug("BadAPIRequestError {}", e)
raise BadAPIRequestError(str(e)) from e
except APIConnectionError as e:
- logger.info("Rate limited? Received APIConnectionError {}", e)
+ logger.debug("Rate limited? Received APIConnectionError {}", e)
raise TransientLanguageModelError("APIConnectionError") from e
except RateLimitError as e:
- logger.info("Rate limited? {}", e)
+ logger.debug("Rate limited? {}", e)
raise TransientLanguageModelError("RateLimitError") from e
except httpx.RemoteProtocolError as e:
- logger.info("{}", e)
+ logger.debug("{}", e)
raise TransientLanguageModelError("httpx.RemoteProtocolError") from e
except APIError as e:
if e.body["code"] == "context_length_exceeded": # type: ignore
@@ -315,7 +315,7 @@ class GroqChatAPI(LanguageModelAPI):
presence_penalty=self.presence_penalty,
)
assert isinstance(api_result, AsyncStream)
- logger.info("API response status code: {}", api_result.response.status_code)
+ logger.debug("API response status code: {}", api_result.response.status_code)
yield LanguageModelStreamStartEvent()
diff --git a/vet/imbue_core/agents/llm_apis/language_model_api.py b/vet/imbue_core/agents/llm_apis/language_model_api.py
@@ -517,7 +517,7 @@ class LanguageModelAPI(abc.ABC, MutableModel):
caching_info: CachingInfo | None = None,
) -> float:
"""Overridden by subclasses which have more complex cost calculations, such as if caching is used."""
- logger.info(
+ logger.debug(
f"no calculate_cost implemented for {self.model_name}; using basic_calculate_cost",
model_name=self.model_name,
)
diff --git a/vet/imbue_core/agents/llm_apis/mock_api.py b/vet/imbue_core/agents/llm_apis/mock_api.py
@@ -132,7 +132,7 @@ class FileBasedLanguageModelMock(LanguageModelMock):
# TODO: currently the identifier is the last user message, because the entire prompt is really long
# if we need to support the same user message with different responses, expand this, maybe chat history?
identifier = self._get_user_message_from_prompt(prompt)
- logger.info("Getting response for identifier: {} from {}", identifier, toml_dict)
+ logger.debug("Getting response for identifier: {} from {}", identifier, toml_dict)
toml_item = toml_dict.get(identifier, None)
if toml_item is None:
for toml_key, response in toml_dict.items():
diff --git a/vet/imbue_core/serialization.py b/vet/imbue_core/serialization.py
@@ -178,7 +178,7 @@ class FrozenSerializer(Serializer):
) -> list[object]:
if isinstance(obj, list):
if self._allow_unsafe_list_serialization:
- logger.info("Converting list to tuple for serialization: {}", obj)
+ logger.debug("Converting list to tuple for serialization: {}", obj)
obj = tuple(obj)
else:
raise Exception(f"Lists are not allowed for serialization. Use tuples instead. Current iterable: {obj}")
diff --git a/vet/imbue_tools/get_conversation_history/get_conversation_history.py b/vet/imbue_tools/get_conversation_history/get_conversation_history.py
@@ -64,7 +64,7 @@ def parse_conversation_history(
# deserialize the message with pydantic
message: ConversationMessageUnion = TypeAdapter(ConversationMessageUnion).validate_json(line)
except ValidationError:
- logger.info("Skipping malformed history line {}", line)
+ logger.warning("Skipping malformed history line {}", line)
continue
messages.append(message)
return tuple(messages)
diff --git a/vet/imbue_tools/repo_utils/context_prefix.py b/vet/imbue_tools/repo_utils/context_prefix.py
@@ -563,11 +563,11 @@ def get_repo_context(
tokens_to_reserve=tokens_to_reserve,
template=template,
)
- logger.info("Selected subrepo context strategy: {}", subrepo_context_strategy.label)
+ logger.debug("Selected subrepo context strategy: {}", subrepo_context_strategy.label)
if subrepo_context_strategy.label == SubrepoContextStrategyType.NOTHING:
# log a message if we have to use the NOTHING strategy, but still proceed with the call
- logger.info("Selected NOTHING subrepo context strategy; hopefully this doesn't happen too often!")
+ logger.debug("Selected NOTHING subrepo context strategy; hopefully this doesn't happen too often!")
return SubrepoContextWithFormattedContext(
formatted_repo_context=repo_context_str,
diff --git a/vet/imbue_tools/repo_utils/context_retrieval.py b/vet/imbue_tools/repo_utils/context_retrieval.py
@@ -115,7 +115,7 @@ class RepoContextManager:
elif entry.type == ObjectType.COMMIT:
# A COMMIT object indicates a submodule, which we do not traverse for the time being.
- logger.info("Skipping submodule in repo context: {}", entry.name)
+ logger.debug("Skipping submodule in repo context: {}", entry.name)
else:
raise ValueError(f"Unexpected entry type in git tree: {entry.type}")
diff --git a/vet/issue_identifiers/common.py b/vet/issue_identifiers/common.py
@@ -205,7 +205,7 @@ def get_agent_options(cwd: Path | None, model_name: str, agent_harness_type: Age
# but if we do, this should be refactored into a registry or factory pattern.
if agent_harness_type == AgentHarnessType.CODEX:
if model_name in _ANTHROPIC_MODEL_NAMES:
- logger.info(
+ logger.debug(
"Config model_name {config_model_name} is an Anthropic model, using default Codex model ({model_name}).",
config_model_name=model_name,
model_name=_DEFAULT_CODEX_MODEL,
@@ -217,14 +217,14 @@ def get_agent_options(cwd: Path | None, model_name: str, agent_harness_type: Age
sandbox_mode="read-only",
)
if model_name in _OPENAI_MODEL_NAMES:
- logger.info(
+ logger.debug(
"Config model_name {config_model_name} is an OpenAI model, using default Claude model ({model_name}).",
config_model_name=model_name,
model_name=_DEFAULT_CLAUDE_MODEL,
)
model_name = _DEFAULT_CLAUDE_MODEL
elif model_name not in _ANTHROPIC_MODEL_NAMES:
- logger.info(
+ logger.warning(
"Config model_name {config_model_name} is not a valid Anthropic model, using default ({model_name}).",
config_model_name=model_name,
model_name=_DEFAULT_CLAUDE_MODEL,
@@ -287,7 +287,7 @@ def extract_invocation_info_from_costed_response(
caching_info.provider_specific_data.written_5m + caching_info.provider_specific_data.written_1h
)
else:
- logger.info(
+ logger.debug(
"Not recording caching info for provider specific data type {}",
type(caching_info.provider_specific_data),
)
diff --git a/vet/issue_identifiers/registry.py b/vet/issue_identifiers/registry.py
@@ -232,7 +232,7 @@ def run(
issue_generators.append(_generate_with_name_in_debug_info(identifier_name, filtered_results_generator))
- logger.info(
+ logger.debug(
"Using the following issue identifiers compatible with the input: {}",
", ".join([n for n in compatible_enabled_identifier_names]),
)