(fix) CodeActAgent: fix issues with vision support in prompts (#3665)

* CodeActAgent: fix message prep if prompt caching is not supported

* fix python version in regen tests workflow

* fix in conftest "mock_completion" method

* add disable_vision to LLMConfig; revert change in message parsing in llm.py

* format messages in several files for completion

* refactored message(s) formatting (llm.py); added vision_is_active()

* fix a unit test

* regenerate: added LOG_TO_FILE and FORCE_REGENERATE env flags

* try to fix path to logs folder in workflow

* llm: prevent index error

* try FORCE_USE_LLM in regenerate

* tweaks everywhere...

* fix 2 random unit test errors :(

* added FORCE_REGENERATE_TESTS=true to regenerate CLI

* fix test_lint_file_fail_typescript again

* double-quotes for env vars in workflow; llm logger set to debug

* fix typo in regenerate

* regenerate iterations now 20; applied iteration counter fix by Li

* regenerate: pass FORCE_REGENERATE flag into env

* fixes for int tests. several mock files updated.

* browsing_agent: fix response_parser.py adding ) to empty response

* test_browse_internet: fix skipif and revert obsolete mock files

* regenerate: fi bracketing for http server start/kill conditions

* disable test_browse_internet for CodeAct*Agents; mock files updated after merge

* missed to include more mock files earlier

* reverts after review feedback from Li

* forgot one

* browsing agent test, partial fixes and updated mock files

* test_browse_internet works in my WSL now!

* adapt unit test test_prompt_caching.py

* add DEBUG to regenerate workflow command

* convert regenerate workflow params to inputs

* more integration test mock files updated

* more files

* test_prompt_caching: restored test_prompt_caching_headers purpose

* file_ops: fix potential exception, like "cross device copy"; fixed mock files accordingly

* reverts/changes wrt feedback from xingyao

* updated docs and config template

* code cleanup wrt review feedback
This commit is contained in:
tobitege
2024-09-04 17:58:30 +02:00
committed by GitHub
parent 1b66f2e777
commit bc31fb15fe
106 changed files with 8858 additions and 2144 deletions

View File

@@ -1,8 +1,11 @@
from enum import Enum
from typing import Union
from pydantic import BaseModel, Field, model_serializer
from typing_extensions import Literal
from openhands.core.logger import openhands_logger as logger
class ContentType(Enum):
TEXT = 'text'
@@ -10,7 +13,7 @@ class ContentType(Enum):
class Content(BaseModel):
type: ContentType
type: str
cache_prompt: bool = False
@model_serializer
@@ -19,13 +22,13 @@ class Content(BaseModel):
class TextContent(Content):
type: ContentType = ContentType.TEXT
type: str = ContentType.TEXT.value
text: str
@model_serializer
def serialize_model(self):
data: dict[str, str | dict[str, str]] = {
'type': self.type.value,
'type': self.type,
'text': self.text,
}
if self.cache_prompt:
@@ -34,14 +37,14 @@ class TextContent(Content):
class ImageContent(Content):
type: ContentType = ContentType.IMAGE_URL
type: str = ContentType.IMAGE_URL.value
image_urls: list[str]
@model_serializer
def serialize_model(self):
images: list[dict[str, str | dict[str, str]]] = []
for url in self.image_urls:
images.append({'type': self.type.value, 'image_url': {'url': url}})
images.append({'type': self.type, 'image_url': {'url': url}})
if self.cache_prompt and images:
images[-1]['cache_control'] = {'type': 'ephemeral'}
return images
@@ -65,4 +68,50 @@ class Message(BaseModel):
elif isinstance(item, ImageContent):
content.extend(item.model_dump())
return {'role': self.role, 'content': content}
return {'content': content, 'role': self.role}
def format_messages(
messages: Union[Message, list[Message]], with_images: bool
) -> list[dict]:
if not isinstance(messages, list):
messages = [messages]
if with_images:
return [message.model_dump() for message in messages]
converted_messages = []
for message in messages:
content_str = ''
role = 'user'
if 'role' in message:
role = message['role']
if isinstance(message, str):
content_str = content_str + message + '\n'
continue
if isinstance(message, dict):
if 'content' in message:
content_str = content_str + message['content'] + '\n'
elif isinstance(message, Message):
role = message.role
for content in message.content:
if isinstance(content, list):
for item in content:
if isinstance(item, TextContent):
content_str = content_str + item.text + '\n'
elif isinstance(content, TextContent):
content_str = content_str + content.text + '\n'
else:
logger.error(
f'>>> `message` is not a string, dict, or Message: {type(message)}'
)
if content_str:
converted_messages.append(
{
'role': role,
'content': content_str,
}
)
return converted_messages