diff --git a/docs/modules/usage/how-to/evaluation-harness.md b/docs/modules/usage/how-to/evaluation-harness.md
index 7209e9a567..32717675e3 100644
--- a/docs/modules/usage/how-to/evaluation-harness.md
+++ b/docs/modules/usage/how-to/evaluation-harness.md
@@ -136,7 +136,7 @@ To create an evaluation workflow for your benchmark, follow these steps:
    ```python
    def process_instance(instance: pd.Series, metadata: EvalMetadata) -> EvalOutput:
        config = get_config(instance, metadata)
-       runtime = create_runtime(config, sid=instance.instance_id)
+       runtime = create_runtime(config)
        initialize_runtime(runtime, instance)
 
        instruction = get_instruction(instance, metadata)
diff --git a/evaluation/EDA/run_infer.py b/evaluation/EDA/run_infer.py
index 2607ba10f8..81c7455e00 100644
--- a/evaluation/EDA/run_infer.py
+++ b/evaluation/EDA/run_infer.py
@@ -118,7 +118,7 @@ def process_instance(
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
-    runtime = create_runtime(config, sid=instance['text'].strip())
+    runtime = create_runtime(config)
 
     state: State | None = asyncio.run(
         run_controller(
diff --git a/evaluation/agent_bench/run_infer.py b/evaluation/agent_bench/run_infer.py
index 3b50c526de..b851f86fa4 100644
--- a/evaluation/agent_bench/run_infer.py
+++ b/evaluation/agent_bench/run_infer.py
@@ -209,7 +209,7 @@ def process_instance(
     # create sandbox and run the agent
     # =============================================
 
-    runtime: Runtime = create_runtime(config, sid=instance.instance_id)
+    runtime: Runtime = create_runtime(config)
 
     initialize_runtime(runtime, instance=instance)
 
diff --git a/evaluation/aider_bench/run_infer.py b/evaluation/aider_bench/run_infer.py
index fde483d0be..b4698a7c69 100644
--- a/evaluation/aider_bench/run_infer.py
+++ b/evaluation/aider_bench/run_infer.py
@@ -203,7 +203,7 @@ def process_instance(
     # create sandbox and run the agent
     # =============================================
 
-    runtime: Runtime = create_runtime(config, sid=str(instance.instance_id))
+    runtime: Runtime = create_runtime(config)
 
     initialize_runtime(runtime, instance=instance)
 
diff --git a/evaluation/biocoder/run_infer.py b/evaluation/biocoder/run_infer.py
index 5732f8ac65..3574089291 100644
--- a/evaluation/biocoder/run_infer.py
+++ b/evaluation/biocoder/run_infer.py
@@ -274,10 +274,7 @@ def process_instance(
     # NOTE: You can actually set slightly different instruction for different agents
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
-    # use a session id for concurrent evaluation
-    sid = instance.instance_id.replace('/', '__')
-
-    runtime = create_runtime(config, sid=sid)
+    runtime = create_runtime(config)
 
     initialize_runtime(runtime, instance)
 
diff --git a/evaluation/bird/run_infer.py b/evaluation/bird/run_infer.py
index 36d5a4a23b..aae58cc7d5 100644
--- a/evaluation/bird/run_infer.py
+++ b/evaluation/bird/run_infer.py
@@ -402,7 +402,7 @@ def process_instance(
     # NOTE: You can actually set slightly different instruction for different agents
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
-    runtime = create_runtime(config, sid=instance_id)
+    runtime = create_runtime(config)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/browsing_delegation/run_infer.py b/evaluation/browsing_delegation/run_infer.py
index c84dac7bf9..c9fe2ebd18 100644
--- a/evaluation/browsing_delegation/run_infer.py
+++ b/evaluation/browsing_delegation/run_infer.py
@@ -72,7 +72,7 @@ def process_instance(
         f'NOTE: You should copy the "query" as is into the <execute_browse> tag. DO NOT change ANYTHING in the query.'
     )
 
-    runtime = create_runtime(config, sid=instance.instance_id)
+    runtime = create_runtime(config)
 
     state: State | None = asyncio.run(
         run_controller(
diff --git a/evaluation/gaia/run_infer.py b/evaluation/gaia/run_infer.py
index f460372cc8..9f6f3884f5 100644
--- a/evaluation/gaia/run_infer.py
+++ b/evaluation/gaia/run_infer.py
@@ -141,7 +141,7 @@ def process_instance(
     instruction += AGENT_CLS_TO_INST_SUFFIX.get(metadata.agent_class, '')
     logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
 
-    runtime = create_runtime(config, sid=instance['instance_id'])
+    runtime = create_runtime(config)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/gorilla/run_infer.py b/evaluation/gorilla/run_infer.py
index 092c58e610..ac49a8078d 100644
--- a/evaluation/gorilla/run_infer.py
+++ b/evaluation/gorilla/run_infer.py
@@ -80,7 +80,7 @@ def process_instance(
     # logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
-    runtime = create_runtime(config, sid=instance_id)
+    runtime = create_runtime(config)
     state: State | None = asyncio.run(
         run_controller(
             config=config,
diff --git a/evaluation/gpqa/run_infer.py b/evaluation/gpqa/run_infer.py
index dc90b88795..fe7ff4bf1c 100644
--- a/evaluation/gpqa/run_infer.py
+++ b/evaluation/gpqa/run_infer.py
@@ -214,7 +214,7 @@ Again do not quit without reporting the answer first.
 Ok now its time to start solving the question. Good luck!
 """
 
-    runtime = create_runtime(config, sid=f'gptq_{str(instance.instance_id)}')
+    runtime = create_runtime(config)
 
     state: State | None = asyncio.run(
         run_controller(
diff --git a/evaluation/humanevalfix/run_infer.py b/evaluation/humanevalfix/run_infer.py
index 4bd3663c86..c6d643f94e 100644
--- a/evaluation/humanevalfix/run_infer.py
+++ b/evaluation/humanevalfix/run_infer.py
@@ -232,7 +232,7 @@ def process_instance(
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
-    runtime = create_runtime(config, sid=sid)
+    runtime = create_runtime(config)
     initialize_runtime(runtime, instance)
     state: State | None = asyncio.run(
         run_controller(
diff --git a/evaluation/logic_reasoning/run_infer.py b/evaluation/logic_reasoning/run_infer.py
index 2f0a306154..7fa6a5bb50 100644
--- a/evaluation/logic_reasoning/run_infer.py
+++ b/evaluation/logic_reasoning/run_infer.py
@@ -201,10 +201,7 @@ def process_instance(
     # NOTE: You can actually set slightly different instruction for different agents
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
-    # use a session id for concurrent evaluation
-    sid = instance['instance_id']
-
-    runtime = create_runtime(config, sid=sid)
+    runtime = create_runtime(config)
     initialize_runtime(runtime, instance)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py
index 63a306ac81..1df7dac028 100644
--- a/evaluation/miniwob/run_infer.py
+++ b/evaluation/miniwob/run_infer.py
@@ -126,7 +126,7 @@ def process_instance(
     else:
         logger.info(f'Starting evaluation for instance {env_id}.')
 
-    runtime = create_runtime(config, sid=env_id)
+    runtime = create_runtime(config)
     task_str = initialize_runtime(runtime)
     state: State | None = asyncio.run(
         run_controller(
diff --git a/evaluation/mint/run_infer.py b/evaluation/mint/run_infer.py
index fe2fd04203..481336d59d 100644
--- a/evaluation/mint/run_infer.py
+++ b/evaluation/mint/run_infer.py
@@ -175,7 +175,7 @@ def process_instance(
         },
     )
 
-    runtime = create_runtime(config, sid=instance.instance_id)
+    runtime = create_runtime(config)
     initialize_runtime(runtime)
 
     state: State | None = asyncio.run(
diff --git a/evaluation/ml_bench/run_infer.py b/evaluation/ml_bench/run_infer.py
index 0c487b4ce7..671c6350a4 100644
--- a/evaluation/ml_bench/run_infer.py
+++ b/evaluation/ml_bench/run_infer.py
@@ -211,9 +211,6 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
     else:
         logger.info(f'Starting evaluation for instance {instance["instance_id"]}.')
 
-    # Create a sandbox, using the instance ID and PID as the session ID to avoid conflicts
-    sid = str(instance['instance_id'])
-
     repo_url = instance['github']
     repo_name = repo_url.split('/')[-1]
     task_path = os.path.join('/workspace', repo_name, instance['path'][2:])
@@ -235,7 +232,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
     )
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
 
-    runtime = create_runtime(config, sid=sid)
+    runtime = create_runtime(config)
     initialize_runtime(runtime, instance)
 
     # Run the agent
diff --git a/evaluation/swe_bench/eval_infer.py b/evaluation/swe_bench/eval_infer.py
index 525bc17e97..14429fc985 100644
--- a/evaluation/swe_bench/eval_infer.py
+++ b/evaluation/swe_bench/eval_infer.py
@@ -127,7 +127,7 @@ def process_instance(
             test_result=instance['test_result'],
         )
 
-    runtime = create_runtime(config, sid=instance_id)
+    runtime = create_runtime(config)
 
     # Get patch and save it to /tmp/patch.diff
     with tempfile.TemporaryDirectory() as temp_dir:
diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
index 835f80ef55..9e4f1c4165 100644
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -365,7 +365,7 @@ def process_instance(
     else:
         logger.info(f'Starting evaluation for instance {instance.instance_id}.')
 
-    runtime = create_runtime(config, sid=instance.instance_id)
+    runtime = create_runtime(config)
 
     try:
         initialize_runtime(runtime, instance)
diff --git a/evaluation/toolqa/run_infer.py b/evaluation/toolqa/run_infer.py
index 2a9dba4114..8b3ebdc58b 100644
--- a/evaluation/toolqa/run_infer.py
+++ b/evaluation/toolqa/run_infer.py
@@ -102,7 +102,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
     instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
     logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
 
-    runtime = create_runtime(config, sid=qid)
+    runtime = create_runtime(config)
     initialize_runtime(runtime)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
diff --git a/evaluation/webarena/run_infer.py b/evaluation/webarena/run_infer.py
index 159f49a800..26637e00be 100644
--- a/evaluation/webarena/run_infer.py
+++ b/evaluation/webarena/run_infer.py
@@ -142,7 +142,7 @@ def process_instance(
     else:
         logger.info(f'Starting evaluation for instance {env_id}.')
 
-    runtime = create_runtime(config, sid=env_id)
+    runtime = create_runtime(config)
     task_str = initialize_runtime(runtime)
 
     state: State | None = asyncio.run(
diff --git a/openhands/core/main.py b/openhands/core/main.py
index adda15aeab..b0702c943b 100644
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -211,7 +211,7 @@ def generate_sid(config: AppConfig, session_name: str | None = None) -> str:
     jwt_secret = config.jwt_secret
 
     hash_str = hashlib.sha256(f'{session_name}{jwt_secret}'.encode('utf-8')).hexdigest()
-    return f'{session_name}_{hash_str[:16]}'
+    return f'{session_name}-{hash_str[:16]}'
 
 
 if __name__ == '__main__':
diff --git a/openhands/runtime/remote/runtime.py b/openhands/runtime/remote/runtime.py
index 5e2c35af8d..085a160597 100644
--- a/openhands/runtime/remote/runtime.py
+++ b/openhands/runtime/remote/runtime.py
@@ -126,7 +126,7 @@ class RemoteRuntime(Runtime):
                 timeout=5,
             )
         except Exception as e:
-            logger.error(f'Error while looking for remote runtime: {e}')
+            logger.debug(f'Error while looking for remote runtime: {e}')
             return False
 
         if response.status_code == 200: