From a234f0ca7e51640732eabdd88e8559c5397bfd98 Mon Sep 17 00:00:00 2001 From: vvincent1234 Date: Sat, 11 Jan 2025 16:58:41 +0800 Subject: [PATCH] add generate gif --- src/agent/custom_agent.py | 120 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py index 3bf5496..f4c1df5 100644 --- a/src/agent/custom_agent.py +++ b/src/agent/custom_agent.py @@ -9,6 +9,10 @@ import logging import pdb import traceback from typing import Optional, Type +from PIL import Image, ImageDraw, ImageFont +import os +import base64 +import io from browser_use.agent.prompts import SystemPrompt from browser_use.agent.service import Agent @@ -227,6 +231,119 @@ class CustomAgent(Agent): ) if state: self._make_history_item(model_output, state, result) + def create_history_gif( + self, + output_path: str = 'agent_history.gif', + duration: int = 3000, + show_goals: bool = True, + show_task: bool = True, + show_logo: bool = False, + font_size: int = 40, + title_font_size: int = 56, + goal_font_size: int = 44, + margin: int = 40, + line_spacing: float = 1.5, + ) -> None: + """Create a GIF from the agent's history with overlaid task and goal text.""" + if not self.history.history: + logger.warning('No history to create GIF from') + return + + images = [] + # if history is empty or first screenshot is None, we can't create a gif + if not self.history.history or not self.history.history[0].state.screenshot: + logger.warning('No history or first screenshot to create GIF from') + return + + # Try to load nicer fonts + try: + # Try different font options in order of preference + font_options = ['Helvetica', 'Arial', 'DejaVuSans', 'Verdana'] + font_loaded = False + + for font_name in font_options: + try: + import platform + if platform.system() == "Windows": + # Need to specify the abs font path on Windows + font_name = os.path.join(os.getenv("WIN_FONT_DIR", "C:\\Windows\\Fonts"), font_name + ".ttf") + regular_font = ImageFont.truetype(font_name, font_size) + title_font = ImageFont.truetype(font_name, title_font_size) + goal_font = ImageFont.truetype(font_name, goal_font_size) + font_loaded = True + break + except OSError: + continue + + if not font_loaded: + raise OSError('No preferred fonts found') + + except OSError: + regular_font = ImageFont.load_default() + title_font = ImageFont.load_default() + + goal_font = regular_font + + # Load logo if requested + logo = None + if show_logo: + try: + logo = Image.open('./static/browser-use.png') + # Resize logo to be small (e.g., 40px height) + logo_height = 150 + aspect_ratio = logo.width / logo.height + logo_width = int(logo_height * aspect_ratio) + logo = logo.resize((logo_width, logo_height), Image.Resampling.LANCZOS) + except Exception as e: + logger.warning(f'Could not load logo: {e}') + + # Create task frame if requested + if show_task and self.task: + task_frame = self._create_task_frame( + self.task, + self.history.history[0].state.screenshot, + title_font, + regular_font, + logo, + line_spacing, + ) + images.append(task_frame) + + # Process each history item + for i, item in enumerate(self.history.history, 1): + if not item.state.screenshot: + continue + + # Convert base64 screenshot to PIL Image + img_data = base64.b64decode(item.state.screenshot) + image = Image.open(io.BytesIO(img_data)) + + if show_goals and item.model_output: + image = self._add_overlay_to_image( + image=image, + step_number=i, + goal_text=item.model_output.current_state.thought, + regular_font=regular_font, + title_font=title_font, + margin=margin, + logo=logo, + ) + + images.append(image) + + if images: + # Save the GIF + images[0].save( + output_path, + save_all=True, + append_images=images[1:], + duration=duration, + loop=0, + optimize=False, + ) + logger.info(f'Created GIF at {output_path}') + else: + logger.warning('No images found in history to create GIF') async def run(self, max_steps: int = 100) -> AgentHistoryList: """Execute the task with maximum number of steps""" @@ -283,3 +400,6 @@ class CustomAgent(Agent): if not self.injected_browser and self.browser: await self.browser.close() + + if self.generate_gif: + self.create_history_gif()