diff --git a/evaluation/benchmarks/multi_swe_bench/README.md b/evaluation/benchmarks/multi_swe_bench/README.md
index 843343f922..88843ca9ce 100644
--- a/evaluation/benchmarks/multi_swe_bench/README.md
+++ b/evaluation/benchmarks/multi_swe_bench/README.md
@@ -41,6 +41,10 @@ default, it is set to 1.
 - `language`, the language of your evaluating dataset.
 - `dataset`, the absolute position of the dataset jsonl.
 
+**Skipping errors on build**
+
+For debugging purposes, you can set `export EVAL_SKIP_MAXIMUM_RETRIES_EXCEEDED=true` to continue evaluation even when instances reach maximum retries. After evaluation completes, check `maximum_retries_exceeded.jsonl` for a list of failed instances, fix those issues, and then run the evaluation again with `export EVAL_SKIP_MAXIMUM_RETRIES_EXCEEDED=false`.
+
 The results will be generated in evaluation/evaluation_outputs/outputs/XXX/CodeActAgent/YYY/output.jsonl, you can refer to the [example](examples/output.jsonl).
 
 ## Runing evaluation
diff --git a/evaluation/benchmarks/multi_swe_bench/run_infer.py b/evaluation/benchmarks/multi_swe_bench/run_infer.py
index df0efd339d..1881b1e422 100644
--- a/evaluation/benchmarks/multi_swe_bench/run_infer.py
+++ b/evaluation/benchmarks/multi_swe_bench/run_infer.py
@@ -17,6 +17,7 @@ from evaluation.utils.shared import (
     EvalMetadata,
     EvalOutput,
     assert_and_raise,
+    check_maximum_retries_exceeded,
     codeact_user_response,
     get_default_sandbox_config_for_eval,
     get_metrics,
@@ -843,3 +844,5 @@ if __name__ == '__main__':
         timeout_seconds=120 * 60,  # 2 hour PER instance should be more than enough
         max_retries=5,
     )
+    # Check if any instances reached maximum retries
+    check_maximum_retries_exceeded(metadata.eval_output_dir)
diff --git a/evaluation/benchmarks/swe_bench/README.md b/evaluation/benchmarks/swe_bench/README.md
index f968130211..b1858bf70a 100644
--- a/evaluation/benchmarks/swe_bench/README.md
+++ b/evaluation/benchmarks/swe_bench/README.md
@@ -38,6 +38,10 @@ Please follow instruction [here](../../README.md#setup) to setup your local deve
 > - If your LLM config has temperature=0, we will automatically use temperature=0.1 for the 2nd and 3rd attempts
 >
 > To enable this iterative protocol, set `export ITERATIVE_EVAL_MODE=true`
+>
+> **Skipping errors on build**
+>
+> For debugging purposes, you can set `export EVAL_SKIP_MAXIMUM_RETRIES_EXCEEDED=true` to continue evaluation even when instances reach maximum retries. After evaluation completes, check `maximum_retries_exceeded.jsonl` for a list of failed instances, fix those issues, and then run the evaluation again with `export EVAL_SKIP_MAXIMUM_RETRIES_EXCEEDED=false`.
 
 
 ### Running Locally with Docker
diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py
index 30a40c8e66..876645b910 100644
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -28,6 +28,7 @@ from evaluation.utils.shared import (
     EvalMetadata,
     EvalOutput,
     assert_and_raise,
+    check_maximum_retries_exceeded,
     codeact_user_response,
     get_default_sandbox_config_for_eval,
     get_metrics,
@@ -968,3 +969,5 @@ if __name__ == '__main__':
         logger.info(
             f'Done! Total {len(added_instance_ids)} instances added to {output_file}'
         )
+        # Check if any instances reached maximum retries
+        check_maximum_retries_exceeded(metadata.eval_output_dir)
diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py
index 2338277d48..aeb563240d 100644
--- a/evaluation/utils/shared.py
+++ b/evaluation/utils/shared.py
@@ -311,6 +311,76 @@ def assert_and_raise(condition: bool, msg: str):
         raise EvalException(msg)
 
 
+def log_skipped_maximum_retries_exceeded(instance, metadata, error, max_retries=5):
+    """Log and skip the instance when maximum retries are exceeded.
+
+    Args:
+        instance: The instance that failed
+        metadata: The evaluation metadata
+        error: The error that occurred
+        max_retries: The maximum number of retries that were attempted
+
+    Returns:
+        EvalOutput with the error information
+    """
+    from openhands.core.logger import openhands_logger as logger
+
+    # Log the error
+    logger.exception(error)
+    logger.error(
+        f'Maximum error retries reached for instance {instance.instance_id}. '
+        f'Check maximum_retries_exceeded.jsonl, fix the issue and run evaluation again. '
+        f'Skipping this instance and continuing with others.'
+    )
+
+    # Add the instance name to maximum_retries_exceeded.jsonl in the same folder as output.jsonl
+    if metadata and metadata.eval_output_dir:
+        retries_file_path = os.path.join(
+            metadata.eval_output_dir,
+            'maximum_retries_exceeded.jsonl',
+        )
+        try:
+            # Write the instance info as a JSON line
+            with open(retries_file_path, 'a') as f:
+                import json
+
+                # No need to get Docker image as we're not including it in the error entry
+
+                error_entry = {
+                    'instance_id': instance.instance_id,
+                    'error': str(error),
+                    'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
+                }
+                f.write(json.dumps(error_entry) + '\n')
+            logger.info(f'Added instance {instance.instance_id} to {retries_file_path}')
+        except Exception as write_error:
+            logger.error(
+                f'Failed to write to maximum_retries_exceeded.jsonl: {write_error}'
+            )
+
+    return EvalOutput(
+        instance_id=instance.instance_id,
+        test_result={},
+        error=f'Maximum retries ({max_retries}) reached: {str(error)}',
+        status='error',
+    )
+
+
+def check_maximum_retries_exceeded(eval_output_dir):
+    """Check if maximum_retries_exceeded.jsonl exists and output a message."""
+    from openhands.core.logger import openhands_logger as logger
+
+    retries_file_path = os.path.join(eval_output_dir, 'maximum_retries_exceeded.jsonl')
+    if os.path.exists(retries_file_path):
+        logger.info(
+            'ATTENTION: Some instances reached maximum error retries and were skipped.'
+        )
+        logger.info(f'These instances are listed in: {retries_file_path}')
+        logger.info(
+            'Fix these instances and run evaluation again with EVAL_SKIP_MAXIMUM_RETRIES_EXCEEDED=false'
+        )
+
+
 def _process_instance_wrapper(
     process_instance_func: Callable[[pd.Series, EvalMetadata, bool], EvalOutput],
     instance: pd.Series,
@@ -363,11 +433,26 @@ def _process_instance_wrapper(
                     + f'[Encountered after {max_retries} retries. Please check the logs and report the issue.]'
                     + '-' * 10
                 )
-                # Raise an error after all retries & stop the evaluation
-                logger.exception(e)
-                raise RuntimeError(
-                    f'Maximum error retries reached for instance {instance.instance_id}'
-                ) from e
+
+                # Check if EVAL_SKIP_MAXIMUM_RETRIES_EXCEEDED is set to true
+                skip_errors = (
+                    os.environ.get(
+                        'EVAL_SKIP_MAXIMUM_RETRIES_EXCEEDED', 'false'
+                    ).lower()
+                    == 'true'
+                )
+
+                if skip_errors:
+                    # Use the dedicated function to log and skip maximum retries exceeded
+                    return log_skipped_maximum_retries_exceeded(
+                        instance, metadata, e, max_retries
+                    )
+                else:
+                    # Raise an error after all retries & stop the evaluation
+                    logger.exception(e)
+                    raise RuntimeError(
+                        f'Maximum error retries reached for instance {instance.instance_id}'
+                    ) from e
             msg = (
                 '-' * 10
                 + '\n'
@@ -456,6 +541,10 @@ def run_evaluation(
     output_fp.close()
     logger.info('\nEvaluation finished.\n')
 
+    # Check if any instances reached maximum retries
+    if metadata and metadata.eval_output_dir:
+        check_maximum_retries_exceeded(metadata.eval_output_dir)
+
 
 def reset_logger_for_multiprocessing(
     logger: logging.Logger, instance_id: str, log_dir: str