mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
[eval] stop set sid in eval (#4311)
This commit is contained in:
parent
a6993b7bf5
commit
b23c7aab5a
@ -136,7 +136,7 @@ To create an evaluation workflow for your benchmark, follow these steps:
|
||||
```python
|
||||
def process_instance(instance: pd.Series, metadata: EvalMetadata) -> EvalOutput:
|
||||
config = get_config(instance, metadata)
|
||||
runtime = create_runtime(config, sid=instance.instance_id)
|
||||
runtime = create_runtime(config)
|
||||
initialize_runtime(runtime, instance)
|
||||
|
||||
instruction = get_instruction(instance, metadata)
|
||||
|
||||
@ -118,7 +118,7 @@ def process_instance(
|
||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
runtime = create_runtime(config, sid=instance['text'].strip())
|
||||
runtime = create_runtime(config)
|
||||
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
|
||||
@ -209,7 +209,7 @@ def process_instance(
|
||||
# create sandbox and run the agent
|
||||
# =============================================
|
||||
|
||||
runtime: Runtime = create_runtime(config, sid=instance.instance_id)
|
||||
runtime: Runtime = create_runtime(config)
|
||||
|
||||
initialize_runtime(runtime, instance=instance)
|
||||
|
||||
|
||||
@ -203,7 +203,7 @@ def process_instance(
|
||||
# create sandbox and run the agent
|
||||
# =============================================
|
||||
|
||||
runtime: Runtime = create_runtime(config, sid=str(instance.instance_id))
|
||||
runtime: Runtime = create_runtime(config)
|
||||
|
||||
initialize_runtime(runtime, instance=instance)
|
||||
|
||||
|
||||
@ -274,10 +274,7 @@ def process_instance(
|
||||
# NOTE: You can actually set slightly different instruction for different agents
|
||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||
|
||||
# use a session id for concurrent evaluation
|
||||
sid = instance.instance_id.replace('/', '__')
|
||||
|
||||
runtime = create_runtime(config, sid=sid)
|
||||
runtime = create_runtime(config)
|
||||
|
||||
initialize_runtime(runtime, instance)
|
||||
|
||||
|
||||
@ -402,7 +402,7 @@ def process_instance(
|
||||
# NOTE: You can actually set slightly different instruction for different agents
|
||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||
|
||||
runtime = create_runtime(config, sid=instance_id)
|
||||
runtime = create_runtime(config)
|
||||
initialize_runtime(runtime, instance)
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
|
||||
@ -72,7 +72,7 @@ def process_instance(
|
||||
f'NOTE: You should copy the "query" as is into the <execute_browse> tag. DO NOT change ANYTHING in the query.'
|
||||
)
|
||||
|
||||
runtime = create_runtime(config, sid=instance.instance_id)
|
||||
runtime = create_runtime(config)
|
||||
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
|
||||
@ -141,7 +141,7 @@ def process_instance(
|
||||
instruction += AGENT_CLS_TO_INST_SUFFIX.get(metadata.agent_class, '')
|
||||
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
runtime = create_runtime(config, sid=instance['instance_id'])
|
||||
runtime = create_runtime(config)
|
||||
initialize_runtime(runtime, instance)
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
|
||||
@ -80,7 +80,7 @@ def process_instance(
|
||||
# logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
runtime = create_runtime(config, sid=instance_id)
|
||||
runtime = create_runtime(config)
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
config=config,
|
||||
|
||||
@ -214,7 +214,7 @@ Again do not quit without reporting the answer first.
|
||||
Ok now its time to start solving the question. Good luck!
|
||||
"""
|
||||
|
||||
runtime = create_runtime(config, sid=f'gptq_{str(instance.instance_id)}')
|
||||
runtime = create_runtime(config)
|
||||
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
|
||||
@ -232,7 +232,7 @@ def process_instance(
|
||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
runtime = create_runtime(config, sid=sid)
|
||||
runtime = create_runtime(config)
|
||||
initialize_runtime(runtime, instance)
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
|
||||
@ -201,10 +201,7 @@ def process_instance(
|
||||
# NOTE: You can actually set slightly different instruction for different agents
|
||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||
|
||||
# use a session id for concurrent evaluation
|
||||
sid = instance['instance_id']
|
||||
|
||||
runtime = create_runtime(config, sid=sid)
|
||||
runtime = create_runtime(config)
|
||||
initialize_runtime(runtime, instance)
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
|
||||
@ -126,7 +126,7 @@ def process_instance(
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {env_id}.')
|
||||
|
||||
runtime = create_runtime(config, sid=env_id)
|
||||
runtime = create_runtime(config)
|
||||
task_str = initialize_runtime(runtime)
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
|
||||
@ -175,7 +175,7 @@ def process_instance(
|
||||
},
|
||||
)
|
||||
|
||||
runtime = create_runtime(config, sid=instance.instance_id)
|
||||
runtime = create_runtime(config)
|
||||
initialize_runtime(runtime)
|
||||
|
||||
state: State | None = asyncio.run(
|
||||
|
||||
@ -211,9 +211,6 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {instance["instance_id"]}.')
|
||||
|
||||
# Create a sandbox, using the instance ID and PID as the session ID to avoid conflicts
|
||||
sid = str(instance['instance_id'])
|
||||
|
||||
repo_url = instance['github']
|
||||
repo_name = repo_url.split('/')[-1]
|
||||
task_path = os.path.join('/workspace', repo_name, instance['path'][2:])
|
||||
@ -235,7 +232,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
||||
)
|
||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||
|
||||
runtime = create_runtime(config, sid=sid)
|
||||
runtime = create_runtime(config)
|
||||
initialize_runtime(runtime, instance)
|
||||
|
||||
# Run the agent
|
||||
|
||||
@ -127,7 +127,7 @@ def process_instance(
|
||||
test_result=instance['test_result'],
|
||||
)
|
||||
|
||||
runtime = create_runtime(config, sid=instance_id)
|
||||
runtime = create_runtime(config)
|
||||
|
||||
# Get patch and save it to /tmp/patch.diff
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
|
||||
@ -365,7 +365,7 @@ def process_instance(
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
|
||||
|
||||
runtime = create_runtime(config, sid=instance.instance_id)
|
||||
runtime = create_runtime(config)
|
||||
|
||||
try:
|
||||
initialize_runtime(runtime, instance)
|
||||
|
||||
@ -102,7 +102,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
runtime = create_runtime(config, sid=qid)
|
||||
runtime = create_runtime(config)
|
||||
initialize_runtime(runtime)
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
|
||||
@ -142,7 +142,7 @@ def process_instance(
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {env_id}.')
|
||||
|
||||
runtime = create_runtime(config, sid=env_id)
|
||||
runtime = create_runtime(config)
|
||||
task_str = initialize_runtime(runtime)
|
||||
|
||||
state: State | None = asyncio.run(
|
||||
|
||||
@ -211,7 +211,7 @@ def generate_sid(config: AppConfig, session_name: str | None = None) -> str:
|
||||
jwt_secret = config.jwt_secret
|
||||
|
||||
hash_str = hashlib.sha256(f'{session_name}{jwt_secret}'.encode('utf-8')).hexdigest()
|
||||
return f'{session_name}_{hash_str[:16]}'
|
||||
return f'{session_name}-{hash_str[:16]}'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@ -126,7 +126,7 @@ class RemoteRuntime(Runtime):
|
||||
timeout=5,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f'Error while looking for remote runtime: {e}')
|
||||
logger.debug(f'Error while looking for remote runtime: {e}')
|
||||
return False
|
||||
|
||||
if response.status_code == 200:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user