Only regenerate integratio tests for failed ones (#1661)

2026-03-22 13:47:19 +08:00 · 2024-05-09 06:32:00 -07:00
parent 06aae67fed
commit a60a6a40d6
2 changed files with 36 additions and 13 deletions
--- a/tests/integration/regenerate.sh
+++ b/tests/integration/regenerate.sh
@@ -1,24 +1,47 @@
 #!/bin/bash
 set -eo pipefail

-WORKSPACE_MOUNT_PATH=$(pwd)/workspace
+WORKSPACE_MOUNT_PATH=$(pwd)/_test_workspace
+WORKSPACE_BASE=$(pwd)/_test_workspace
+SANDBOX_TYPE="ssh"

 # FIXME: SWEAgent hangs, so it goes last
 agents=("MonologueAgent" "CodeActAgent" "PlannerAgent" "SWEAgent")

+# TODO: currently we only have one test: test_write_simple_script. We need to revisit
+# this script when we have more than one integration test.
 for agent in "${agents[@]}"; do
-  echo -e "\n\n\n\n========Generating test data for $agent========\n\n\n\n"
-  rm -rf logs
-  rm -rf _test_workspace
-  rm -rf tests/integration/mock/$agent/test_write_simple_script/*
-  mkdir _test_workspace
-  echo -e "/exit\n" | poetry run python ./opendevin/core/main.py \
-    -i 10 \
-    -t "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point." \
-    -c $agent \
-    -d "./_test_workspace"
+  echo -e "\n\n\n\n========Running integration test for $agent========\n\n\n\n"
+  rm -rf $WORKSPACE_BASE

-  mv logs/llm/**/* tests/integration/mock/$agent/test_write_simple_script/
+  # Temporarily disable 'exit on error'
+  set +e
+  SANDBOX_TYPE=$SANDBOX_TYPE WORKSPACE_BASE=$WORKSPACE_BASE \
+    WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
+    poetry run pytest ./tests/integration
+  TEST_STATUS=$?
+  # Re-enable 'exit on error'
+  set -e
+
+  if [[ $TEST_STATUS -ne 0 ]]; then
+    echo -e "\n\n\n\n========Test failed, regenerating test data for $agent========\n\n\n\n"
+    sleep 1
+    rm -rf logs
+    rm -rf $WORKSPACE_BASE
+    rm -rf tests/integration/mock/$agent/test_write_simple_script/*
+    mkdir $WORKSPACE_BASE
+    echo -e "/exit\n" | SANDBOX_TYPE=$SANDBOX_TYPE WORKSPACE_BASE=$WORKSPACE_BASE \
+      WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
+      poetry run python ./opendevin/core/main.py \
+      -i 10 \
+      -t "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point." \
+      -c $agent
+
+    mv logs/llm/**/* tests/integration/mock/$agent/test_write_simple_script/
+  else
+    echo -e "\n\n\n\n========Integration test for $agent PASSED========\n\n\n\n"
+    sleep 1
+  fi
 done

 rm -rf logs
--- a/tests/integration/test_agent.py
+++ b/tests/integration/test_agent.py
@@ -18,7 +18,7 @@ def test_write_simple_script():
    asyncio.run(main(task))

    # Verify the script file exists
-    script_path = 'workspace/hello.sh'
+    script_path = os.path.join(os.getenv('WORKSPACE_BASE'), 'hello.sh')
    assert os.path.exists(script_path), 'The file "hello.sh" does not exist'

    # Run the script and capture the output