[eval] stop set sid in eval (#4311)

This commit is contained in:
Xingyao Wang 2024-10-09 22:47:27 -05:00 committed by GitHub
parent a6993b7bf5
commit b23c7aab5a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 21 additions and 30 deletions

View File

@ -136,7 +136,7 @@ To create an evaluation workflow for your benchmark, follow these steps:
```python
def process_instance(instance: pd.Series, metadata: EvalMetadata) -> EvalOutput:
config = get_config(instance, metadata)
runtime = create_runtime(config, sid=instance.instance_id)
runtime = create_runtime(config)
initialize_runtime(runtime, instance)
instruction = get_instruction(instance, metadata)

View File

@ -118,7 +118,7 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config, sid=instance['text'].strip())
runtime = create_runtime(config)
state: State | None = asyncio.run(
run_controller(

View File

@ -209,7 +209,7 @@ def process_instance(
# create sandbox and run the agent
# =============================================
runtime: Runtime = create_runtime(config, sid=instance.instance_id)
runtime: Runtime = create_runtime(config)
initialize_runtime(runtime, instance=instance)

View File

@ -203,7 +203,7 @@ def process_instance(
# create sandbox and run the agent
# =============================================
runtime: Runtime = create_runtime(config, sid=str(instance.instance_id))
runtime: Runtime = create_runtime(config)
initialize_runtime(runtime, instance=instance)

View File

@ -274,10 +274,7 @@ def process_instance(
# NOTE: You can actually set slightly different instruction for different agents
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
# use a session id for concurrent evaluation
sid = instance.instance_id.replace('/', '__')
runtime = create_runtime(config, sid=sid)
runtime = create_runtime(config)
initialize_runtime(runtime, instance)

View File

@ -402,7 +402,7 @@ def process_instance(
# NOTE: You can actually set slightly different instruction for different agents
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config, sid=instance_id)
runtime = create_runtime(config)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@ -72,7 +72,7 @@ def process_instance(
f'NOTE: You should copy the "query" as is into the <execute_browse> tag. DO NOT change ANYTHING in the query.'
)
runtime = create_runtime(config, sid=instance.instance_id)
runtime = create_runtime(config)
state: State | None = asyncio.run(
run_controller(

View File

@ -141,7 +141,7 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX.get(metadata.agent_class, '')
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
runtime = create_runtime(config, sid=instance['instance_id'])
runtime = create_runtime(config)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@ -80,7 +80,7 @@ def process_instance(
# logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config, sid=instance_id)
runtime = create_runtime(config)
state: State | None = asyncio.run(
run_controller(
config=config,

View File

@ -214,7 +214,7 @@ Again do not quit without reporting the answer first.
Ok now its time to start solving the question. Good luck!
"""
runtime = create_runtime(config, sid=f'gptq_{str(instance.instance_id)}')
runtime = create_runtime(config)
state: State | None = asyncio.run(
run_controller(

View File

@ -232,7 +232,7 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config, sid=sid)
runtime = create_runtime(config)
initialize_runtime(runtime, instance)
state: State | None = asyncio.run(
run_controller(

View File

@ -201,10 +201,7 @@ def process_instance(
# NOTE: You can actually set slightly different instruction for different agents
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
# use a session id for concurrent evaluation
sid = instance['instance_id']
runtime = create_runtime(config, sid=sid)
runtime = create_runtime(config)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@ -126,7 +126,7 @@ def process_instance(
else:
logger.info(f'Starting evaluation for instance {env_id}.')
runtime = create_runtime(config, sid=env_id)
runtime = create_runtime(config)
task_str = initialize_runtime(runtime)
state: State | None = asyncio.run(
run_controller(

View File

@ -175,7 +175,7 @@ def process_instance(
},
)
runtime = create_runtime(config, sid=instance.instance_id)
runtime = create_runtime(config)
initialize_runtime(runtime)
state: State | None = asyncio.run(

View File

@ -211,9 +211,6 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
else:
logger.info(f'Starting evaluation for instance {instance["instance_id"]}.')
# Create a sandbox, using the instance ID and PID as the session ID to avoid conflicts
sid = str(instance['instance_id'])
repo_url = instance['github']
repo_name = repo_url.split('/')[-1]
task_path = os.path.join('/workspace', repo_name, instance['path'][2:])
@ -235,7 +232,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
)
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config, sid=sid)
runtime = create_runtime(config)
initialize_runtime(runtime, instance)
# Run the agent

View File

@ -127,7 +127,7 @@ def process_instance(
test_result=instance['test_result'],
)
runtime = create_runtime(config, sid=instance_id)
runtime = create_runtime(config)
# Get patch and save it to /tmp/patch.diff
with tempfile.TemporaryDirectory() as temp_dir:

View File

@ -365,7 +365,7 @@ def process_instance(
else:
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
runtime = create_runtime(config, sid=instance.instance_id)
runtime = create_runtime(config)
try:
initialize_runtime(runtime, instance)

View File

@ -102,7 +102,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
runtime = create_runtime(config, sid=qid)
runtime = create_runtime(config)
initialize_runtime(runtime)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@ -142,7 +142,7 @@ def process_instance(
else:
logger.info(f'Starting evaluation for instance {env_id}.')
runtime = create_runtime(config, sid=env_id)
runtime = create_runtime(config)
task_str = initialize_runtime(runtime)
state: State | None = asyncio.run(

View File

@ -211,7 +211,7 @@ def generate_sid(config: AppConfig, session_name: str | None = None) -> str:
jwt_secret = config.jwt_secret
hash_str = hashlib.sha256(f'{session_name}{jwt_secret}'.encode('utf-8')).hexdigest()
return f'{session_name}_{hash_str[:16]}'
return f'{session_name}-{hash_str[:16]}'
if __name__ == '__main__':

View File

@ -126,7 +126,7 @@ class RemoteRuntime(Runtime):
timeout=5,
)
except Exception as e:
logger.error(f'Error while looking for remote runtime: {e}')
logger.debug(f'Error while looking for remote runtime: {e}')
return False
if response.status_code == 200: