mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 13:47:19 +08:00
Fix empty image URLs in multimodal browsing causing litellm.BadRequestError (#9214)
Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
@@ -56,6 +56,18 @@ class ConversationMemory:
|
||||
self.agent_config = config
|
||||
self.prompt_manager = prompt_manager
|
||||
|
||||
@staticmethod
|
||||
def _is_valid_image_url(url: str | None) -> bool:
|
||||
"""Check if an image URL is valid and non-empty.
|
||||
|
||||
Args:
|
||||
url: The image URL to validate
|
||||
|
||||
Returns:
|
||||
True if the URL is valid, False otherwise
|
||||
"""
|
||||
return bool(url and url.strip())
|
||||
|
||||
def process_events(
|
||||
self,
|
||||
condensed_history: list[Event],
|
||||
@@ -380,7 +392,27 @@ class ConversationMemory:
|
||||
|
||||
# Add image URLs if available and vision is active
|
||||
if vision_is_active and obs.image_urls:
|
||||
content.append(ImageContent(image_urls=obs.image_urls))
|
||||
# Filter out empty or invalid image URLs
|
||||
valid_image_urls = [
|
||||
url for url in obs.image_urls if self._is_valid_image_url(url)
|
||||
]
|
||||
invalid_count = len(obs.image_urls) - len(valid_image_urls)
|
||||
|
||||
if valid_image_urls:
|
||||
content.append(ImageContent(image_urls=valid_image_urls))
|
||||
if invalid_count > 0:
|
||||
# Add text indicating some images were filtered
|
||||
content[
|
||||
0
|
||||
].text += f'\n\nNote: {invalid_count} invalid or empty image(s) were filtered from this output. The agent may need to use alternative methods to access visual information.'
|
||||
else:
|
||||
logger.debug(
|
||||
'IPython observation has image URLs but none are valid'
|
||||
)
|
||||
# Add text indicating all images were filtered
|
||||
content[
|
||||
0
|
||||
].text += f'\n\nNote: All {len(obs.image_urls)} image(s) in this output were invalid or empty and have been filtered. The agent should use alternative methods to access visual information.'
|
||||
|
||||
message = Message(role='user', content=content)
|
||||
elif isinstance(obs, FileEditObservation):
|
||||
@@ -398,25 +430,42 @@ class ConversationMemory:
|
||||
and vision_is_active
|
||||
):
|
||||
text += 'Image: Current webpage screenshot (Note that only visible portion of webpage is present in the screenshot. You may need to scroll to view the remaining portion of the web-page.)\n'
|
||||
message = Message(
|
||||
role='user',
|
||||
content=[
|
||||
TextContent(text=text),
|
||||
ImageContent(
|
||||
image_urls=[
|
||||
# show set of marks if it exists
|
||||
# otherwise, show raw screenshot when using vision-supported model
|
||||
obs.set_of_marks
|
||||
if obs.set_of_marks is not None
|
||||
and len(obs.set_of_marks) > 0
|
||||
else obs.screenshot
|
||||
]
|
||||
),
|
||||
],
|
||||
)
|
||||
logger.debug(
|
||||
f'Vision enabled for browsing, showing {"set of marks" if obs.set_of_marks and len(obs.set_of_marks) > 0 else "screenshot"}'
|
||||
)
|
||||
|
||||
# Determine which image to use and validate it
|
||||
image_url = None
|
||||
if obs.set_of_marks is not None and len(obs.set_of_marks) > 0:
|
||||
image_url = obs.set_of_marks
|
||||
image_type = 'set of marks'
|
||||
elif obs.screenshot is not None and len(obs.screenshot) > 0:
|
||||
image_url = obs.screenshot
|
||||
image_type = 'screenshot'
|
||||
|
||||
# Create message content with text
|
||||
content = [TextContent(text=text)]
|
||||
|
||||
# Only add ImageContent if we have a valid image URL
|
||||
if self._is_valid_image_url(image_url):
|
||||
content.append(ImageContent(image_urls=[image_url]))
|
||||
logger.debug(f'Vision enabled for browsing, showing {image_type}')
|
||||
else:
|
||||
if image_url:
|
||||
logger.warning(
|
||||
f'Invalid image URL format for {image_type}: {image_url[:50]}...'
|
||||
)
|
||||
# Add text indicating the image was filtered
|
||||
content[
|
||||
0
|
||||
].text += f'\n\nNote: The {image_type} for this webpage was invalid or empty and has been filtered. The agent should use alternative methods to access visual information about the webpage.'
|
||||
else:
|
||||
logger.debug(
|
||||
'Vision enabled for browsing, but no valid image available'
|
||||
)
|
||||
# Add text indicating no image was available
|
||||
content[
|
||||
0
|
||||
].text += '\n\nNote: No visual information (screenshot or set of marks) is available for this webpage. The agent should rely on the text content above.'
|
||||
|
||||
message = Message(role='user', content=content)
|
||||
else:
|
||||
message = Message(
|
||||
role='user',
|
||||
|
||||
Reference in New Issue
Block a user