fix(#5818): Force to use string serializer for deepseek function calling (#5824)

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
Xingyao Wang 2024-12-26 15:45:39 -05:00 committed by GitHub
parent ad45f8dab0
commit a021045dce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 13 additions and 1 deletions

View File

@ -62,6 +62,8 @@ class Message(BaseModel):
# - tool execution result (to LLM)
tool_call_id: str | None = None
name: str | None = None # name of the tool
# force string serializer
force_string_serializer: bool = False
@property
def contains_image(self) -> bool:
@ -73,7 +75,9 @@ class Message(BaseModel):
# - into a single string: for providers that don't support list of content items (e.g. no vision, no tool calls)
# - into a list of content items: the new APIs of providers with vision/prompt caching/tool calls
# NOTE: remove this when litellm or providers support the new API
if self.cache_enabled or self.vision_enabled or self.function_calling_enabled:
if not self.force_string_serializer and (
self.cache_enabled or self.vision_enabled or self.function_calling_enabled
):
return self._list_serializer()
# some providers, like HF and Groq/llama, don't support a list here, but a single string
return self._string_serializer()

View File

@ -122,6 +122,12 @@ class LLM(RetryMixin, DebugMixin):
if self.is_function_calling_active():
logger.debug('LLM: model supports function calling')
# Compatibility flag: use string serializer for DeepSeek models
# See this issue: https://github.com/All-Hands-AI/OpenHands/issues/5818
self._use_string_serializer = False
if 'deepseek' in self.config.model:
self._use_string_serializer = True
# if using a custom tokenizer, make sure it's loaded and accessible in the format expected by litellm
if self.config.custom_tokenizer is not None:
self.tokenizer = create_pretrained_tokenizer(self.config.custom_tokenizer)
@ -618,6 +624,8 @@ class LLM(RetryMixin, DebugMixin):
message.cache_enabled = self.is_caching_prompt_active()
message.vision_enabled = self.vision_is_active()
message.function_calling_enabled = self.is_function_calling_active()
if 'deepseek' in self.config.model:
message.force_string_serializer = True
# let pydantic handle the serialization
return [message.model_dump() for message in messages]