commit 21780745c11f773cbe8f7046a012762ae0fc812e
parent 2976119e26eee3a46919b4e95e26afa8fad155c0
Author: andrewlaack-collab <andrew.laack@imbue.com>
Date: Wed, 25 Feb 2026 16:15:45 -0600
Remove first party groq support (#149)
* Removed groq to decrease maintenance burden.
* Formatter
---------
Co-authored-by: Andrew Laack <andrew@laack.co>
Diffstat:
7 files changed, 2 insertions(+), 399 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
@@ -47,7 +47,6 @@ dependencies = [
"anthropic~=0.54",
"openai>=1.79.0",
"tiktoken",
- "groq>=0.18.0",
"google-genai>=1.26.0",
# From imbue_tools
diff --git a/uv.lock b/uv.lock
@@ -426,23 +426,6 @@ wheels = [
]
[[package]]
-name = "groq"
-version = "1.0.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
- { name = "anyio" },
- { name = "distro" },
- { name = "httpx" },
- { name = "pydantic" },
- { name = "sniffio" },
- { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/3f/12/f4099a141677fcd2ed79dcc1fcec431e60c52e0e90c9c5d935f0ffaf8c0e/groq-1.0.0.tar.gz", hash = "sha256:66cb7bb729e6eb644daac7ce8efe945e99e4eb33657f733ee6f13059ef0c25a9", size = 146068, upload-time = "2025-12-17T23:34:23.115Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/4a/88/3175759d2ef30406ea721f4d837bfa1ba4339fde3b81ba8c5640a96ed231/groq-1.0.0-py3-none-any.whl", hash = "sha256:6e22bf92ffad988f01d2d4df7729add66b8fd5dbfb2154b5bbf3af245b72c731", size = 138292, upload-time = "2025-12-17T23:34:21.957Z" },
-]
-
-[[package]]
name = "h11"
version = "0.16.0"
source = { registry = "https://pypi.org/simple" }
@@ -1522,7 +1505,6 @@ dependencies = [
{ name = "cattrs" },
{ name = "diskcache" },
{ name = "google-genai" },
- { name = "groq" },
{ name = "httpx" },
{ name = "jinja2" },
{ name = "libcst" },
@@ -1557,7 +1539,6 @@ requires-dist = [
{ name = "cattrs" },
{ name = "diskcache", specifier = ">=5.6.3" },
{ name = "google-genai", specifier = ">=1.26.0" },
- { name = "groq", specifier = ">=0.18.0" },
{ name = "httpx" },
{ name = "jinja2" },
{ name = "libcst" },
diff --git a/vet/cli/models.py b/vet/cli/models.py
@@ -6,7 +6,6 @@ from vet.cli.config.schema import ModelsConfig
from vet.imbue_core.agents.llm_apis.anthropic_api import AnthropicModelName
from vet.imbue_core.agents.llm_apis.common import get_all_model_names
from vet.imbue_core.agents.llm_apis.gemini_api import GeminiModelName
-from vet.imbue_core.agents.llm_apis.groq_api import GroqSupportedModelName
from vet.imbue_core.agents.llm_apis.openai_api import OpenAIModelName
DEFAULT_MODEL_ID = AnthropicModelName.CLAUDE_4_6_OPUS.value
@@ -46,7 +45,6 @@ def get_builtin_models_by_provider() -> dict[str, list[str]]:
"anthropic": [m.value for m in AnthropicModelName],
"openai": [m.value for m in OpenAIModelName],
"gemini": [m.value for m in GeminiModelName],
- "groq": [m.value for m in GroqSupportedModelName],
}
diff --git a/vet/cli/models_test.py b/vet/cli/models_test.py
@@ -123,7 +123,7 @@ def test_get_builtin_models_by_provider_returns_dict_with_expected_providers() -
assert "anthropic" in providers
assert "openai" in providers
assert "gemini" in providers
- assert "groq" in providers
+ assert "groq" not in providers
def test_get_builtin_models_by_provider_all_values_are_lists_of_strings() -> None:
diff --git a/vet/imbue_core/agents/llm_apis/build_apis.py b/vet/imbue_core/agents/llm_apis/build_apis.py
@@ -8,8 +8,6 @@ from vet.imbue_core.agents.llm_apis.anthropic_api import AnthropicModelName
from vet.imbue_core.agents.llm_apis.constants import approximate_token_count
from vet.imbue_core.agents.llm_apis.gemini_api import GeminiAPI
from vet.imbue_core.agents.llm_apis.gemini_api import GeminiModelName
-from vet.imbue_core.agents.llm_apis.groq_api import GroqChatAPI
-from vet.imbue_core.agents.llm_apis.groq_api import GroqSupportedModelName
from vet.imbue_core.agents.llm_apis.language_model_api import LanguageModelAPI
from vet.imbue_core.agents.llm_apis.mock_api import FileBasedLanguageModelMock
from vet.imbue_core.agents.llm_apis.mock_api import MockModelName
@@ -51,16 +49,6 @@ def build_language_model_from_config(
is_using_logprobs=config.is_using_logprobs,
retry_jitter_factor=config.retry_jitter_factor,
)
- if config.model_name in (v for v in GroqSupportedModelName):
- return GroqChatAPI(
- model_name=config.model_name,
- cache_path=config.cache_path,
- is_caching_inputs=config.is_caching_inputs,
- is_running_offline=config.is_running_offline,
- is_conversational=True,
- is_using_logprobs=config.is_using_logprobs,
- retry_jitter_factor=config.retry_jitter_factor,
- )
if config.model_name in (v for v in AnthropicModelName):
return AnthropicAPI(
model_name=config.model_name,
diff --git a/vet/imbue_core/agents/llm_apis/common.py b/vet/imbue_core/agents/llm_apis/common.py
@@ -2,14 +2,12 @@ from vet.imbue_core.agents.llm_apis.anthropic_api import ANTHROPIC_MODEL_INFO_BY
from vet.imbue_core.agents.llm_apis.anthropic_api import AnthropicModelName
from vet.imbue_core.agents.llm_apis.gemini_api import GEMINI_MODEL_INFO_BY_NAME
from vet.imbue_core.agents.llm_apis.gemini_api import GeminiModelName
-from vet.imbue_core.agents.llm_apis.groq_api import GroqSupportedModelName
-from vet.imbue_core.agents.llm_apis.groq_api import get_model_info as get_groq_model_info
from vet.imbue_core.agents.llm_apis.mock_api import MY_MOCK_MODEL_INFO
from vet.imbue_core.agents.llm_apis.models import ModelInfo
from vet.imbue_core.agents.llm_apis.openai_api import OpenAIModelName
from vet.imbue_core.agents.llm_apis.openai_api import get_model_info as get_openai_model_info
-ModelName = AnthropicModelName | OpenAIModelName | GroqSupportedModelName | GeminiModelName
+ModelName = AnthropicModelName | OpenAIModelName | GeminiModelName
def get_model_info_from_name(model_name: str) -> ModelInfo:
@@ -19,8 +17,6 @@ def get_model_info_from_name(model_name: str) -> ModelInfo:
return ANTHROPIC_MODEL_INFO_BY_NAME[AnthropicModelName(model_name)]
elif model_name in (v for v in OpenAIModelName):
return get_openai_model_info(OpenAIModelName(model_name))
- elif model_name in (v for v in GroqSupportedModelName):
- return get_groq_model_info(GroqSupportedModelName(model_name))
elif model_name in (v for v in GeminiModelName):
return GEMINI_MODEL_INFO_BY_NAME[GeminiModelName(model_name)]
else:
@@ -43,7 +39,6 @@ def get_all_model_names() -> list[str]:
names = []
names.extend(list(v for v in AnthropicModelName))
names.extend(list(v for v in OpenAIModelName))
- names.extend(list(v for v in GroqSupportedModelName))
names.extend(list(v for v in GeminiModelName))
return names
@@ -56,7 +51,6 @@ def get_formatted_model_name(model_name: str) -> str:
Some examples:
- `models/gemini-1.5-flash-001` -> `gemini-1.5-flash-001`
- - 'groq/llama-3.3-70b-versatile' -> 'groq-llama-3.3-70b-versatile'
- 'claude-3-5-haiku-20241022' -> 'claude-3-5-haiku-20241022'
"""
diff --git a/vet/imbue_core/agents/llm_apis/groq_api.py b/vet/imbue_core/agents/llm_apis/groq_api.py
@@ -1,357 +0,0 @@
-import asyncio
-import enum
-import math
-from contextlib import contextmanager
-from typing import AsyncGenerator
-from typing import Final
-from typing import Iterator
-from typing import Mapping
-
-import httpx
-from groq import APIConnectionError
-from groq import APIError
-from groq import AsyncGroq
-from groq import AsyncStream
-from groq import BadRequestError
-from groq import RateLimitError
-from groq.types.chat import ChatCompletion
-from loguru import logger
-from pydantic.functional_validators import field_validator
-
-from vet.imbue_core.agents.llm_apis.api_utils import convert_prompt_to_openai_messages
-from vet.imbue_core.agents.llm_apis.data_types import CostedLanguageModelResponse
-from vet.imbue_core.agents.llm_apis.data_types import LanguageModelGenerationParams
-from vet.imbue_core.agents.llm_apis.data_types import LanguageModelResponse
-from vet.imbue_core.agents.llm_apis.data_types import LanguageModelResponseUsage
-from vet.imbue_core.agents.llm_apis.data_types import ResponseStopReason
-from vet.imbue_core.agents.llm_apis.errors import BadAPIRequestError
-from vet.imbue_core.agents.llm_apis.errors import LanguageModelInvalidModelNameError
-from vet.imbue_core.agents.llm_apis.errors import MissingAPIKeyError
-from vet.imbue_core.agents.llm_apis.errors import PromptTooLongError
-from vet.imbue_core.agents.llm_apis.errors import TransientLanguageModelError
-from vet.imbue_core.agents.llm_apis.language_model_api import LanguageModelAPI
-from vet.imbue_core.agents.llm_apis.models import ModelInfo
-from vet.imbue_core.agents.llm_apis.stream import LanguageModelStreamDeltaEvent
-from vet.imbue_core.agents.llm_apis.stream import LanguageModelStreamEndEvent
-from vet.imbue_core.agents.llm_apis.stream import LanguageModelStreamEvent
-from vet.imbue_core.agents.llm_apis.stream import LanguageModelStreamStartEvent
-from vet.imbue_core.frozen_utils import FrozenDict
-from vet.imbue_core.frozen_utils import FrozenMapping
-from vet.imbue_core.itertools import only
-from vet.imbue_core.secrets_utils import get_secret
-
-# note: we require that these model versions are explicit, just like the rest of our dependencies
-# the reason is that these models are actually now mostly deterministic, and it is much easier to debug if we know what model was used
-# also, there's no need to troll yourself by wondering why results have improved (or gotten worse) when you dont realized that the version has shifted under you
-# if you want to use an upgraded model, just upgrade the model to the key displayed on the website
-# please do NOT set these back to the generic model names!
-
-
-# TODO: there are likely more models to add
-class GroqSupportedModelName(enum.StrEnum):
- GROQ_GEMMA2_9B_IT = "groq/gemma2-9b-it"
- GROQ_LLAMA3_70B_8192 = "groq/llama3-70b-8192"
- GROQ_LLAMA3_8B_8192 = "groq/llama3-8b-8192"
- GROQ_LLAMA_3_3_70B_SPECDEC = "groq/llama-3.3-70b-specdec"
- GROQ_MIXTRAL_8X7B_32768 = "groq/mixtral-8x7b-32768"
- GROQ_LLAMA_3_3_70B_VERSATILE = "groq/llama-3.3-70b-versatile"
- GROQ_LLAMA_3_1_8B_INSTANT = "groq/llama-3.1-8b-instant"
- GROQ_LLAMA_3_2_1B_PREVIEW = "groq/llama-3.2-1b-preview"
- GROQ_LLAMA_3_2_3B_PREVIEW = "groq/llama-3.2-3b-preview"
-
-
-# Rate limits for Groq models based on custom rate limits for our organization.
-# See here https://console.groq.com/dashboard/limits (requires login, use your Google account)
-
-GROQ_MODEL_INFO_BY_NAME: FrozenMapping[GroqSupportedModelName, ModelInfo] = FrozenDict(
- {
- GroqSupportedModelName.GROQ_GEMMA2_9B_IT: ModelInfo(
- model_name=str(GroqSupportedModelName.GROQ_GEMMA2_9B_IT),
- cost_per_input_token=0.20 / 1_000_000,
- cost_per_output_token=0.20 / 1_000_000,
- max_input_tokens=8192,
- max_output_tokens=None,
- rate_limit_req=30 / 60, # 30 RPM = 0.50 RPS
- ),
- GroqSupportedModelName.GROQ_LLAMA3_70B_8192: ModelInfo(
- model_name=str(GroqSupportedModelName.GROQ_LLAMA3_70B_8192),
- cost_per_input_token=0.59 / 1_000_000,
- cost_per_output_token=0.79 / 1_000_000,
- max_input_tokens=8192,
- max_output_tokens=None,
- rate_limit_req=30 / 60, # 30 RPM = 0.50 RPS
- ),
- GroqSupportedModelName.GROQ_LLAMA3_8B_8192: ModelInfo(
- model_name=str(GroqSupportedModelName.GROQ_LLAMA3_8B_8192),
- cost_per_input_token=0.05 / 1_000_000,
- cost_per_output_token=0.08 / 1_000_000,
- max_input_tokens=8192,
- max_output_tokens=None,
- rate_limit_req=30 / 60, # 30 RPM = 0.50 RPS
- ),
- GroqSupportedModelName.GROQ_LLAMA_3_3_70B_SPECDEC: ModelInfo(
- model_name=str(GroqSupportedModelName.GROQ_LLAMA_3_3_70B_SPECDEC),
- cost_per_input_token=0.59 / 1_000_000,
- cost_per_output_token=0.99 / 1_000_000,
- max_input_tokens=8192,
- max_output_tokens=None,
- rate_limit_req=30 / 60, # 30 RPM = 0.50 RPS
- ),
- GroqSupportedModelName.GROQ_MIXTRAL_8X7B_32768: ModelInfo(
- model_name=str(GroqSupportedModelName.GROQ_MIXTRAL_8X7B_32768),
- cost_per_input_token=0.24 / 1_000_000,
- cost_per_output_token=0.24 / 1_000_000,
- max_input_tokens=32768,
- max_output_tokens=None,
- rate_limit_req=30 / 60, # 30 RPM = 0.50 RPS
- ),
- GroqSupportedModelName.GROQ_LLAMA_3_3_70B_VERSATILE: ModelInfo(
- model_name=str(GroqSupportedModelName.GROQ_LLAMA_3_3_70B_VERSATILE),
- cost_per_input_token=0.59 / 1_000_000,
- cost_per_output_token=0.79 / 1_000_000,
- max_input_tokens=128_000,
- max_output_tokens=None,
- rate_limit_req=30 / 60, # 30 RPM = 0.50 RPS
- ),
- GroqSupportedModelName.GROQ_LLAMA_3_1_8B_INSTANT: ModelInfo(
- model_name=str(GroqSupportedModelName.GROQ_LLAMA_3_1_8B_INSTANT),
- cost_per_input_token=0.05 / 1_000_000,
- cost_per_output_token=0.08 / 1_000_000,
- max_input_tokens=128_000,
- max_output_tokens=None,
- rate_limit_req=30 / 60, # 30 RPM = 0.50 RPS
- ),
- GroqSupportedModelName.GROQ_LLAMA_3_2_1B_PREVIEW: ModelInfo(
- model_name=str(GroqSupportedModelName.GROQ_LLAMA_3_2_1B_PREVIEW),
- cost_per_input_token=0.04 / 1_000_000,
- cost_per_output_token=0.04 / 1_000_000,
- max_input_tokens=128_000,
- max_output_tokens=None,
- rate_limit_req=30 / 60, # 30 RPM = 0.50 RPS
- ),
- GroqSupportedModelName.GROQ_LLAMA_3_2_3B_PREVIEW: ModelInfo(
- model_name=str(GroqSupportedModelName.GROQ_LLAMA_3_2_3B_PREVIEW),
- cost_per_input_token=0.06 / 1_000_000,
- cost_per_output_token=0.06 / 1_000_000,
- max_input_tokens=128_000,
- max_output_tokens=None,
- rate_limit_req=30 / 60, # 30 RPM = 0.50 RPS
- ),
- }
-)
-
-
-def get_model_info(model_name: GroqSupportedModelName) -> ModelInfo:
- return GROQ_MODEL_INFO_BY_NAME[model_name]
-
-
-_CAPACITY_SEMAPHOR_BY_MODEL_NAME: Mapping[str, asyncio.Semaphore] = {
- GroqSupportedModelName.GROQ_GEMMA2_9B_IT: asyncio.Semaphore(100),
- GroqSupportedModelName.GROQ_LLAMA3_70B_8192: asyncio.Semaphore(100),
- GroqSupportedModelName.GROQ_LLAMA3_8B_8192: asyncio.Semaphore(100),
- GroqSupportedModelName.GROQ_LLAMA_3_3_70B_SPECDEC: asyncio.Semaphore(100),
- GroqSupportedModelName.GROQ_MIXTRAL_8X7B_32768: asyncio.Semaphore(100),
- GroqSupportedModelName.GROQ_LLAMA_3_3_70B_VERSATILE: asyncio.Semaphore(100),
- GroqSupportedModelName.GROQ_LLAMA_3_1_8B_INSTANT: asyncio.Semaphore(100),
- GroqSupportedModelName.GROQ_LLAMA_3_2_1B_PREVIEW: asyncio.Semaphore(100),
- GroqSupportedModelName.GROQ_LLAMA_3_2_3B_PREVIEW: asyncio.Semaphore(100),
-}
-
-
-def _get_capacity_semaphor(model_name: str) -> asyncio.Semaphore:
- return _CAPACITY_SEMAPHOR_BY_MODEL_NAME[model_name]
-
-
-# ref: https://github.com/groq/groq-python/blob/b74ce9e301115520c744e18425653a4c783cb6f5/src/groq/types/chat/chat_completion_chunk.py#L86
-_GROQ_STOP_REASON_TO_STOP_REASON: Final[FrozenMapping[str, ResponseStopReason]] = FrozenDict(
- {
- # Groq copies OpenAI and treats stop due to natural stop point and provided stop sequence the same
- "stop": ResponseStopReason.END_TURN,
- "length": ResponseStopReason.MAX_TOKENS,
- "tool_calls": ResponseStopReason.TOOL_CALLS,
- "function_call": ResponseStopReason.FUNCTION_CALL,
- "content_filter": ResponseStopReason.CONTENT_FILTER,
- }
-)
-
-
-@contextmanager
-def _groq_exception_manager() -> Iterator[None]:
- """Simple context manager for parsing groq exceptions mostly based on how we parse OpenAI API exceptions."""
- try:
- yield
- except BadRequestError as e:
- logger.debug("BadAPIRequestError {}", e)
- raise BadAPIRequestError(str(e)) from e
- except APIConnectionError as e:
- logger.debug("Rate limited? Received APIConnectionError {}", e)
- raise TransientLanguageModelError("APIConnectionError") from e
- except RateLimitError as e:
- logger.debug("Rate limited? {}", e)
- raise TransientLanguageModelError("RateLimitError") from e
- except httpx.RemoteProtocolError as e:
- logger.debug("{}", e)
- raise TransientLanguageModelError("httpx.RemoteProtocolError") from e
- except APIError as e:
- if e.body["code"] == "context_length_exceeded": # type: ignore
- # TODO: eventually fix elsewhere, since this doesn't actually give you any information in the body...
- raise PromptTooLongError(prompt_len=1, max_prompt_len=1)
- raise TransientLanguageModelError("APIError") from e
-
-
-class GroqChatAPI(LanguageModelAPI):
- model_name: GroqSupportedModelName = GroqSupportedModelName.GROQ_LLAMA3_8B_8192
- is_conversational: bool = True
- presence_penalty: float = 0.0
- # this shouldn't really ever even be used, but just in case
- stop_token_log_probability: float = math.log(0.9999)
-
- @field_validator("model_name") # pyre-ignore[56]: pyre doesn't understand pydantic
- @classmethod
- def validate_model_name(cls, v: str) -> str:
- if v not in GROQ_MODEL_INFO_BY_NAME:
- raise LanguageModelInvalidModelNameError(v, cls.__name__, list(GROQ_MODEL_INFO_BY_NAME))
- return v
-
- @property
- def model_info(self) -> ModelInfo:
- return GROQ_MODEL_INFO_BY_NAME[self.model_name]
-
- @property
- def external_model_name(self) -> str:
- return self.model_name.replace("groq/", "")
-
- def _get_client(self) -> AsyncGroq:
- api_key = get_secret("GROQ_API_KEY")
- if not api_key:
- raise MissingAPIKeyError("GROQ_API_KEY environment variable is not set")
- return AsyncGroq(api_key=api_key)
-
- async def _call_api(
- self,
- prompt: str,
- params: LanguageModelGenerationParams,
- network_failure_count: int = 0,
- ) -> CostedLanguageModelResponse:
- with _groq_exception_manager():
- messages = convert_prompt_to_openai_messages(prompt)
- client = self._get_client()
- async with _get_capacity_semaphor(self.model_name):
- # logger.info("Open requests: {}", semaphor._value)
- api_result = await client.chat.completions.create(
- model=self.external_model_name,
- messages=messages, # type: ignore
- max_tokens=params.max_tokens,
- n=params.count,
- temperature=params.temperature,
- stop=params.stop,
- logprobs=False,
- seed=params.seed,
- stream=False,
- presence_penalty=self.presence_penalty,
- )
- assert isinstance(api_result, ChatCompletion)
-
- results = []
- for data in api_result.choices:
- assert data.message.content is not None
-
- assert data.logprobs is not None and data.logprobs.content is not None
- text = data.message.content
-
- stop_reason = _GROQ_STOP_REASON_TO_STOP_REASON[str(data.finish_reason)]
-
- # Note, like OpenAI, Groq treats end turn and stop sequence the same
- # Here we assume it is stop sequence if user has specified a stop sequence
- if params.stop is not None and stop_reason == ResponseStopReason.END_TURN:
- text += params.stop
- result = LanguageModelResponse(
- text=text,
- token_count=0,
- stop_reason=stop_reason,
- network_failure_count=network_failure_count,
- )
- results.append(result)
-
- logger.trace("text: " + results[0].text)
- if api_result.usage is not None:
- completion_tokens = api_result.usage.completion_tokens
- prompt_tokens = api_result.usage.prompt_tokens
- else:
- completion_tokens = 0
- prompt_tokens = 0
- dollars_used = self.calculate_cost(prompt_tokens, completion_tokens)
- logger.trace("dollars used: {}", dollars_used)
- return CostedLanguageModelResponse(
- usage=LanguageModelResponseUsage(
- prompt_tokens_used=prompt_tokens,
- completion_tokens_used=completion_tokens,
- dollars_used=dollars_used,
- ),
- responses=tuple(results),
- )
-
- async def _get_api_stream(
- self,
- prompt: str,
- params: LanguageModelGenerationParams,
- ) -> AsyncGenerator[LanguageModelStreamEvent, None]:
- with _groq_exception_manager():
- messages = convert_prompt_to_openai_messages(prompt)
- client = self._get_client()
- async with _get_capacity_semaphor(self.model_name):
- api_result = await client.chat.completions.create(
- model=self.external_model_name,
- messages=messages, # type: ignore
- max_tokens=params.max_tokens,
- n=1,
- temperature=params.temperature,
- stop=params.stop,
- logprobs=False,
- seed=params.seed,
- stream=True,
- # This field is currently unsupported by the groq API
- # stream_options={"include_usage": True},
- presence_penalty=self.presence_penalty,
- )
- assert isinstance(api_result, AsyncStream)
- logger.debug("API response status code: {}", api_result.response.status_code)
-
- yield LanguageModelStreamStartEvent()
-
- usage = None
- finish_reason: str | None = None
- async for chunk in api_result:
- if chunk.choices:
- assert len(chunk.choices) == 1, "Currently only count=1 supported for streaming API."
- data = only(chunk.choices)
- delta = data.delta.content
- if delta is not None:
- yield LanguageModelStreamDeltaEvent(delta=delta)
- if data.finish_reason:
- finish_reason = str(data.finish_reason)
-
- stop_reason = _GROQ_STOP_REASON_TO_STOP_REASON[str(finish_reason)]
- # Note, Open API treats end turn and stop sequence the same TODO: check if groq is the same
- # Here we assume it is stop sequence if user has specified a stop sequence
- if params.stop is not None and stop_reason == ResponseStopReason.END_TURN:
- yield LanguageModelStreamDeltaEvent(delta=params.stop)
-
- if usage is not None:
- completion_tokens = usage.completion_tokens
- prompt_tokens = usage.prompt_tokens
- dollars_used = self.calculate_cost(prompt_tokens, completion_tokens)
- else:
- completion_tokens = -1
- prompt_tokens = -1
- dollars_used = -1
- logger.trace("dollars used: {}", dollars_used)
-
- yield LanguageModelStreamEndEvent(
- usage=LanguageModelResponseUsage(
- prompt_tokens_used=prompt_tokens,
- completion_tokens_used=completion_tokens,
- dollars_used=dollars_used,
- ),
- stop_reason=stop_reason,
- )