Update web_toolkit.py

This commit is contained in:
Sun Tao
2025-03-09 17:11:16 +08:00
committed by GitHub
parent 1456356415
commit 5263fe5615

View File

@@ -106,26 +106,60 @@ def _get_bool(d: Any, k: str) -> bool:
def _parse_json_output(text: str) -> Dict[str, Any]:
# judge if text is markdown format (```json ````)
if "```json" in text and "```" in text:
# Extract content between ```json and the last ```
start_idx = text.find("```json") + len("```json")
end_idx = text.rfind("```")
if start_idx > -1 and end_idx > start_idx:
text = text[start_idx:end_idx].strip()
"""Extract JSON output from a string."""
markdown_pattern = r'```(?:json)?\s*(.*?)\s*```'
markdown_match = re.search(markdown_pattern, text, re.DOTALL)
if markdown_match:
text = markdown_match.group(1).strip()
triple_quotes_pattern = r'"""(?:json)?\s*(.*?)\s*"""'
triple_quotes_match = re.search(triple_quotes_pattern, text, re.DOTALL)
if triple_quotes_match:
text = triple_quotes_match.group(1).strip()
text = text.replace("`", '"')
try:
return json.loads(text)
except json.JSONDecodeError:
logger.warning(f"Failed to parse JSON output: {text}")
# Try to clean the text further and attempt parsing again
try:
# Remove any extra whitespace or control characters
cleaned_text = text.strip()
return json.loads(cleaned_text)
fixed_text = re.sub(r'`([^`]*)`', r'"\1"', text)
return json.loads(fixed_text)
except json.JSONDecodeError:
logger.error(f"Failed to parse JSON even after cleaning: {text}")
return {}
result = {}
try:
bool_pattern = r'"(\w+)"\s*:\s*(true|false)'
for match in re.finditer(bool_pattern, text, re.IGNORECASE):
key, value = match.groups()
result[key] = value.lower() == "true"
str_pattern = r'"(\w+)"\s*:\s*"([^"]*)"'
for match in re.finditer(str_pattern, text):
key, value = match.groups()
result[key] = value
num_pattern = r'"(\w+)"\s*:\s*(-?\d+(?:\.\d+)?)'
for match in re.finditer(num_pattern, text):
key, value = match.groups()
try:
result[key] = int(value)
except ValueError:
result[key] = float(value)
empty_str_pattern = r'"(\w+)"\s*:\s*""'
for match in re.finditer(empty_str_pattern, text):
key = match.group(1)
result[key] = ""
if result:
return result
logger.warning(f"Failed to parse JSON output: {text}")
return {}
except Exception as e:
logger.warning(f"Error while extracting fields from JSON: {e}")
return {}
def _reload_image(image: Image.Image):
@@ -379,7 +413,9 @@ class BaseBrowser:
file_path = None
if save_image:
# get url name to form a file name
url_name = self.page_url.split("/")[-1].replace(".", "_").replace(":", "_")
url_name = self.page_url.split("/")[-1]
for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '.']:
url_name = url_name.replace(char, "_")
# get formatted time: mmddhhmmss
timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
@@ -486,14 +522,16 @@ class BaseBrowser:
file_path = None
comp, visible_rects, rects_above, rects_below = add_set_of_mark(screenshot, rects)
if save_image:
url_name = self.page_url.split("/")[-1].replace(".", "_").replace(":", "_")
url_name = self.page_url.split("/")[-1]
for char in ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '.']:
url_name = url_name.replace(char, "_")
timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
file_path = os.path.join(self.cache_dir, f"{url_name}_{timestamp}.png")
with open(file_path, "wb") as f:
comp.save(f, "PNG")
f.close()
return comp, file_path
return comp, file_path
def scroll_up(self) -> None:
@@ -738,6 +776,7 @@ class WebToolkit(BaseToolkit):
history_window: int = 5,
web_agent_model: Optional[BaseModelBackend] = None,
planning_agent_model: Optional[BaseModelBackend] = None,
output_language: str = "en"
):
self.browser = BaseBrowser(
@@ -749,6 +788,7 @@ class WebToolkit(BaseToolkit):
self.history_window = history_window
self.web_agent_model = web_agent_model
self.planning_agent_model = planning_agent_model
self.output_language = output_language
self.history = []
# self.search_toolkit = SearchToolkit()
@@ -789,6 +829,7 @@ Given a high-level task, you can leverage predefined browser tools to help users
web_agent = ChatAgent(
system_message=system_prompt,
model=web_agent_model,
output_language=self.output_language
)
planning_system_prompt = """
@@ -797,7 +838,8 @@ You are a helpful planning agent that can assist users in planning complex tasks
planning_agent = ChatAgent(
system_message=planning_system_prompt,
model=planning_model
model=planning_model,
output_language=self.output_language
)
return web_agent, planning_agent