mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
A few fixes for TAC evaluation harness (#6586)
This commit is contained in:
parent
efbff2e655
commit
4443417c75
@ -267,7 +267,9 @@ def pre_login(
|
||||
obs: BrowserOutputObservation = runtime.run_action(browser_action)
|
||||
logger.debug(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
if save_screenshots:
|
||||
image_data = base64.b64decode(obs.screenshot)
|
||||
image_data = base64.b64decode(
|
||||
obs.screenshot.replace('data:image/png;base64,', '')
|
||||
)
|
||||
with open(os.path.join(directory, f'{image_id}.png'), 'wb') as file:
|
||||
file.write(image_data)
|
||||
image_id += 1
|
||||
|
||||
@ -36,7 +36,7 @@ def get_config(
|
||||
task_short_name: str,
|
||||
mount_path_on_host: str,
|
||||
llm_config: LLMConfig,
|
||||
agent_config: AgentConfig,
|
||||
agent_config: AgentConfig | None,
|
||||
) -> AppConfig:
|
||||
config = AppConfig(
|
||||
run_as_openhands=False,
|
||||
@ -159,11 +159,21 @@ def run_solver(
|
||||
os.makedirs(screenshots_dir, exist_ok=True)
|
||||
for image_id, obs in enumerate(state.history):
|
||||
if isinstance(obs, BrowserOutputObservation):
|
||||
image_data = base64.b64decode(obs.screenshot)
|
||||
image_data = base64.b64decode(
|
||||
obs.screenshot.replace('data:image/png;base64,', '')
|
||||
)
|
||||
with open(
|
||||
os.path.join(screenshots_dir, f'{image_id}.png'), 'wb'
|
||||
) as file:
|
||||
file.write(image_data)
|
||||
if obs.set_of_marks:
|
||||
som_image_data = base64.b64decode(
|
||||
obs.set_of_marks.replace('data:image/png;base64,', '')
|
||||
)
|
||||
with open(
|
||||
os.path.join(screenshots_dir, f'{image_id}_som.png'), 'wb'
|
||||
) as file:
|
||||
file.write(som_image_data)
|
||||
|
||||
if save_final_state:
|
||||
os.makedirs(state_dir, exist_ok=True)
|
||||
|
||||
@ -129,8 +129,6 @@ temp_file="tasks_${START_PERCENTILE}_${END_PERCENTILE}.md"
|
||||
sed -n "${start_line},${end_line}p" tasks.md > "$temp_file"
|
||||
|
||||
while IFS= read -r task_image; do
|
||||
docker pull $task_image
|
||||
|
||||
# Remove prefix using ## to remove longest matching pattern from start
|
||||
task_name=${task_image##ghcr.io/theagentcompany/}
|
||||
|
||||
@ -144,6 +142,8 @@ while IFS= read -r task_image; do
|
||||
continue
|
||||
fi
|
||||
|
||||
docker pull $task_image
|
||||
|
||||
# Build the Python command
|
||||
COMMAND="poetry run python run_infer.py \
|
||||
--agent-llm-config \"$AGENT_LLM_CONFIG\" \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user