Updated gemini definitions (#145) - vet - Unnamed repository; edit this file 'description' to name the repository.

commit 27acbd08842dc4307bbf483f9263fd082fce7d4a
parent 964334a018aab364859b092a31b557da0705f51c
Author: andrewlaack-collab <andrew.laack@imbue.com>
Date:   Wed, 25 Feb 2026 18:16:49 -0600

Updated gemini definitions (#145)

* Updated gemini definitions

* Removed models

* formatting

* Verified all defined models work correctly right now.

* Tests

* Fix vet identified issue

---------

Co-authored-by: Andrew Laack <andrew@laack.co>
Diffstat:
M vet/imbue_core/agents/llm_apis/common.py  | 3 ++-
M vet/imbue_core/agents/llm_apis/gemini_api.py  | 117 +++++++++++++++++++++++++++++--------------------------------------------------

2 files changed, 45 insertions(+), 75 deletions(-)
diff --git a/vet/imbue_core/agents/llm_apis/common.py b/vet/imbue_core/agents/llm_apis/common.py
@@ -50,7 +50,8 @@ def get_formatted_model_name(model_name: str) -> str:
 
         Some examples:
 
-        - `models/gemini-1.5-flash-001` -> `gemini-1.5-flash-001`
+    - 'models/gemini-2.5-flash' -> 'gemini-2.5-flash'
+    - 'groq/llama-3.3-70b-versatile' -> 'groq-llama-3.3-70b-versatile'
     - 'claude-opus-4-6' -> 'claude-opus-4-6'
 
     """
diff --git a/vet/imbue_core/agents/llm_apis/gemini_api.py b/vet/imbue_core/agents/llm_apis/gemini_api.py
@@ -53,95 +53,64 @@ from vet.imbue_core.secrets_utils import get_secret
 
 
 class GeminiModelName(enum.StrEnum):
-    GEMINI_1_0_PRO = "models/gemini-1.0-pro-001"
-    GEMINI_1_5_FLASH = "models/gemini-1.5-flash-001"
-    GEMINI_1_5_PRO = "models/gemini-1.5-pro-001"
-    GEMINI_1_5_PRO_2 = "models/gemini-1.5-pro-002"
-    GEMINI_1_5_FLASH_2 = "models/gemini-1.5-flash-002"
-    GEMINI_2_0_FLASH = "models/gemini-2.0-flash-001"
-    GEMINI_2_5_FLASH = "models/gemini-2.5-flash"
-    GEMINI_2_5_FLASH_LITE_PREVIEW = "models/gemini-2.5-flash-lite-preview-06-17"
+    # GA models
+    GEMINI_2_5_FLASH = "gemini-2.5-flash"
+    GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite"
+    # Preview models
+    GEMINI_3_FLASH_PREVIEW = "gemini-3-flash-preview"
+    GEMINI_3_1_PRO_PREVIEW = "gemini-3.1-pro-preview"
 
 
 # Rate limits for Google Gemini models based on published API documentation
 # Reference: https://ai.google.dev/gemini-api/docs/rate-limits#tier-3
 # Using Tier 3 rate limits
+#
+# Pricing references:
+# - https://cloud.google.com/vertex-ai/generative-ai/pricing
+# - https://ai.google.dev/pricing
+# For pricing there are different rates depending on context/prompt size, so below we use the most
+# expensive value (the >200K token tier for 2.5+ models).
 
 GEMINI_MODEL_INFO_BY_NAME: FrozenMapping[GeminiModelName, ModelInfo] = FrozenDict(
     {
-        # https://ai.google.dev/gemini-api/docs/models/gemini
-        # https://ai.google.dev/pricing
-        # For pricing there are different rates depending on context/prompt size, so below we use the most
-        # expensive value. Note that this only kicks in at 128k tokens, the cost for most prompts is 2x lower
-        GeminiModelName.GEMINI_1_0_PRO: ModelInfo(
-            model_name="models/gemini-1.0-pro-001",
-            cost_per_input_token=0.5 / 1_000_000,
-            cost_per_output_token=1.5 / 1_000_000,
-            max_input_tokens=30_720,
-            max_output_tokens=2048,
-            rate_limit_req=2000 / 60,  # 2000 RPM = 33.33 RPS
-        ),
-        GeminiModelName.GEMINI_1_5_FLASH: ModelInfo(
-            model_name="models/gemini-1.5-flash-001",
-            cost_per_input_token=0.15 / 1_000_000,
-            cost_per_output_token=0.60 / 1_000_000,
-            max_input_tokens=1_048_576,
-            max_output_tokens=8192,
-            rate_limit_req=30000 / 60,  # 30000 RPM = 500.00 RPS
-        ),
-        GeminiModelName.GEMINI_1_5_FLASH_2: ModelInfo(
-            model_name="models/gemini-1.5-flash-002",
-            cost_per_input_token=0.15 / 1_000_000,
-            cost_per_output_token=0.60 / 1_000_000,
+        GeminiModelName.GEMINI_2_5_FLASH: ModelInfo(
+            model_name="gemini-2.5-flash",
+            cost_per_input_token=0.30 / 1_000_000,
+            cost_per_output_token=2.50 / 1_000_000,
             max_input_tokens=1_048_576,
-            max_output_tokens=8192,
-            rate_limit_req=30000 / 60,  # 30000 RPM = 500.00 RPS
-        ),
-        GeminiModelName.GEMINI_1_5_PRO: ModelInfo(
-            model_name="models/gemini-1.5-pro-001",
-            cost_per_input_token=2.5 / 1_000_000,
-            cost_per_output_token=10.0 / 1_000_000,
-            max_input_tokens=2_097_152,
-            max_output_tokens=8192,
-            rate_limit_req=4000 / 60,  # 4000 RPM = 66.67 RPS
-        ),
-        GeminiModelName.GEMINI_1_5_PRO_2: ModelInfo(
-            model_name="models/gemini-1.5-pro-002",
-            cost_per_input_token=2.5 / 1_000_000,
-            cost_per_output_token=10.0 / 1_000_000,
-            max_input_tokens=2_097_152,
-            max_output_tokens=8192,
-            rate_limit_req=4000 / 60,  # 4000 RPM = 66.67 RPS
+            max_output_tokens=65_536,
+            rate_limit_req=10_000 / 60,  # 10000 RPM = 166.67 RPS
+            rate_limit_tok=8_000_000 / 60,  # 8,000,000 TPM = 133,333.33 TPS
+            max_thinking_budget=24_576,
         ),
-        GeminiModelName.GEMINI_2_0_FLASH: ModelInfo(
-            model_name="models/gemini-2.0-flash-001",
-            cost_per_input_token=0.1 / 1_000_000,
-            cost_per_output_token=0.4 / 1_000_000,
+        GeminiModelName.GEMINI_2_5_FLASH_LITE: ModelInfo(
+            model_name="gemini-2.5-flash-lite",
+            cost_per_input_token=0.10 / 1_000_000,
+            cost_per_output_token=0.40 / 1_000_000,
             max_input_tokens=1_048_576,
-            max_output_tokens=8192,
-            rate_limit_req=30000 / 60,  # 30000 RPM = 500.00 RPS
+            max_output_tokens=65_535,
+            rate_limit_req=10_000 / 60,
+            rate_limit_tok=10_000_000 / 60,
+            max_thinking_budget=24_576,
         ),
-        GeminiModelName.GEMINI_2_5_FLASH: ModelInfo(
-            model_name="models/gemini-2.5-flash",
-            cost_per_input_token=0.3 / 1_000_000,
-            cost_per_output_token=2.5 / 1_000_000,
+        GeminiModelName.GEMINI_3_FLASH_PREVIEW: ModelInfo(
+            model_name="gemini-3-flash-preview",
+            cost_per_input_token=0.50 / 1_000_000,
+            cost_per_output_token=3.0 / 1_000_000,
             max_input_tokens=1_048_576,
-            max_output_tokens=65536,
+            max_output_tokens=65_536,
             rate_limit_req=10_000 / 60,  # 10000 RPM = 166.67 RPS
             rate_limit_tok=8_000_000 / 60,  # 8,000,000 TPM = 133,333.33 TPS
-            max_thinking_budget=24576,
+            max_thinking_budget=24_576,
         ),
-        GeminiModelName.GEMINI_2_5_FLASH_LITE_PREVIEW: ModelInfo(
-            model_name="models/gemini-2.5-flash-lite-preview-06-17",
-            cost_per_input_token=0.1 / 1_000_000,
-            cost_per_output_token=0.4 / 1_000_000,
-            max_input_tokens=1_000_000,
-            max_output_tokens=64_000,
-            # these are the tier 2 rate limits. the above claims that we're on tier 3, but i've never actually seen that
-            rate_limit_req=10_000 / 60,
-            rate_limit_tok=10_000_000 / 60,
-            # rate_limit_req=30_000 / 60,  # 30000 RPM = 500.00 RPS
-            # rate_limit_tok=30_000_000 / 60,  # 30,000,000 TPM = 500,000 TPS
+        GeminiModelName.GEMINI_3_1_PRO_PREVIEW: ModelInfo(
+            model_name="gemini-3.1-pro-preview",
+            cost_per_input_token=4.0 / 1_000_000,
+            cost_per_output_token=18.0 / 1_000_000,
+            max_input_tokens=1_048_576,
+            max_output_tokens=65_536,
+            rate_limit_req=4_000 / 60,  # 4000 RPM = 66.67 RPS
+            rate_limit_tok=8_000_000 / 60,  # 8,000,000 TPM = 133,333.33 TPS
             max_thinking_budget=24_576,
         ),
     }
@@ -285,7 +254,7 @@ def fmap(fn: Callable[[T], R], values: T | None) -> R | None:
 
 
 class GeminiAPI(LanguageModelAPI):
-    model_name: GeminiModelName = GeminiModelName.GEMINI_1_5_FLASH
+    model_name: GeminiModelName = GeminiModelName.GEMINI_2_5_FLASH
     is_conversational: bool = True
 
     count_tokens_cache_path: Path | None = None

	vet Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README \| LICENSE

M	vet/imbue_core/agents/llm_apis/common.py	\|	3	++-
M	vet/imbue_core/agents/llm_apis/gemini_api.py	\|	117	+++++++++++++++++++++++++++++--------------------------------------------------