mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 13:47:19 +08:00
(fix) CodeActAgent: fix issues with vision support in prompts (#3665)
* CodeActAgent: fix message prep if prompt caching is not supported * fix python version in regen tests workflow * fix in conftest "mock_completion" method * add disable_vision to LLMConfig; revert change in message parsing in llm.py * format messages in several files for completion * refactored message(s) formatting (llm.py); added vision_is_active() * fix a unit test * regenerate: added LOG_TO_FILE and FORCE_REGENERATE env flags * try to fix path to logs folder in workflow * llm: prevent index error * try FORCE_USE_LLM in regenerate * tweaks everywhere... * fix 2 random unit test errors :( * added FORCE_REGENERATE_TESTS=true to regenerate CLI * fix test_lint_file_fail_typescript again * double-quotes for env vars in workflow; llm logger set to debug * fix typo in regenerate * regenerate iterations now 20; applied iteration counter fix by Li * regenerate: pass FORCE_REGENERATE flag into env * fixes for int tests. several mock files updated. * browsing_agent: fix response_parser.py adding ) to empty response * test_browse_internet: fix skipif and revert obsolete mock files * regenerate: fi bracketing for http server start/kill conditions * disable test_browse_internet for CodeAct*Agents; mock files updated after merge * missed to include more mock files earlier * reverts after review feedback from Li * forgot one * browsing agent test, partial fixes and updated mock files * test_browse_internet works in my WSL now! * adapt unit test test_prompt_caching.py * add DEBUG to regenerate workflow command * convert regenerate workflow params to inputs * more integration test mock files updated * more files * test_prompt_caching: restored test_prompt_caching_headers purpose * file_ops: fix potential exception, like "cross device copy"; fixed mock files accordingly * reverts/changes wrt feedback from xingyao * updated docs and config template * code cleanup wrt review feedback
This commit is contained in:
@@ -1,8 +1,11 @@
|
||||
from enum import Enum
|
||||
from typing import Union
|
||||
|
||||
from pydantic import BaseModel, Field, model_serializer
|
||||
from typing_extensions import Literal
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
class ContentType(Enum):
|
||||
TEXT = 'text'
|
||||
@@ -10,7 +13,7 @@ class ContentType(Enum):
|
||||
|
||||
|
||||
class Content(BaseModel):
|
||||
type: ContentType
|
||||
type: str
|
||||
cache_prompt: bool = False
|
||||
|
||||
@model_serializer
|
||||
@@ -19,13 +22,13 @@ class Content(BaseModel):
|
||||
|
||||
|
||||
class TextContent(Content):
|
||||
type: ContentType = ContentType.TEXT
|
||||
type: str = ContentType.TEXT.value
|
||||
text: str
|
||||
|
||||
@model_serializer
|
||||
def serialize_model(self):
|
||||
data: dict[str, str | dict[str, str]] = {
|
||||
'type': self.type.value,
|
||||
'type': self.type,
|
||||
'text': self.text,
|
||||
}
|
||||
if self.cache_prompt:
|
||||
@@ -34,14 +37,14 @@ class TextContent(Content):
|
||||
|
||||
|
||||
class ImageContent(Content):
|
||||
type: ContentType = ContentType.IMAGE_URL
|
||||
type: str = ContentType.IMAGE_URL.value
|
||||
image_urls: list[str]
|
||||
|
||||
@model_serializer
|
||||
def serialize_model(self):
|
||||
images: list[dict[str, str | dict[str, str]]] = []
|
||||
for url in self.image_urls:
|
||||
images.append({'type': self.type.value, 'image_url': {'url': url}})
|
||||
images.append({'type': self.type, 'image_url': {'url': url}})
|
||||
if self.cache_prompt and images:
|
||||
images[-1]['cache_control'] = {'type': 'ephemeral'}
|
||||
return images
|
||||
@@ -65,4 +68,50 @@ class Message(BaseModel):
|
||||
elif isinstance(item, ImageContent):
|
||||
content.extend(item.model_dump())
|
||||
|
||||
return {'role': self.role, 'content': content}
|
||||
return {'content': content, 'role': self.role}
|
||||
|
||||
|
||||
def format_messages(
|
||||
messages: Union[Message, list[Message]], with_images: bool
|
||||
) -> list[dict]:
|
||||
if not isinstance(messages, list):
|
||||
messages = [messages]
|
||||
|
||||
if with_images:
|
||||
return [message.model_dump() for message in messages]
|
||||
|
||||
converted_messages = []
|
||||
for message in messages:
|
||||
content_str = ''
|
||||
role = 'user'
|
||||
if 'role' in message:
|
||||
role = message['role']
|
||||
if isinstance(message, str):
|
||||
content_str = content_str + message + '\n'
|
||||
continue
|
||||
|
||||
if isinstance(message, dict):
|
||||
if 'content' in message:
|
||||
content_str = content_str + message['content'] + '\n'
|
||||
elif isinstance(message, Message):
|
||||
role = message.role
|
||||
for content in message.content:
|
||||
if isinstance(content, list):
|
||||
for item in content:
|
||||
if isinstance(item, TextContent):
|
||||
content_str = content_str + item.text + '\n'
|
||||
elif isinstance(content, TextContent):
|
||||
content_str = content_str + content.text + '\n'
|
||||
else:
|
||||
logger.error(
|
||||
f'>>> `message` is not a string, dict, or Message: {type(message)}'
|
||||
)
|
||||
|
||||
if content_str:
|
||||
converted_messages.append(
|
||||
{
|
||||
'role': role,
|
||||
'content': content_str,
|
||||
}
|
||||
)
|
||||
return converted_messages
|
||||
|
||||
Reference in New Issue
Block a user