From cfefc47439497c8b8c70e9b1b4717167ee4a0b47 Mon Sep 17 00:00:00 2001 From: zch-cc <136044586+zch-cc@users.noreply.github.com> Date: Fri, 22 Mar 2024 08:26:37 -0700 Subject: [PATCH] Move regression tests to evaluation/ (#86) * Move regression tests to evaluation/ * use pythnon instead of docker in the script * add model para * change python to python3 * bug fix --- .../cases/client-server/workspace/client/.env | 1 - agenthub/langchains_agent/regression/run.sh | 26 ------- .../regression/.gitignore | 0 .../regression/README.md | 0 .../outputs/langchains_agent}/logs.txt | 0 .../workspace/client/.gitignore | 0 .../workspace/client/README.md | 0 .../workspace/client/package-lock.json | 0 .../workspace/client/package.json | 0 .../workspace/client/public/favicon.ico | Bin .../workspace/client/public/index.html | 0 .../workspace/client/public/logo192.png | Bin .../workspace/client/public/logo512.png | Bin .../workspace/client/public/manifest.json | 0 .../workspace/client/public/robots.txt | 0 .../workspace/client/src/App.css | 0 .../workspace/client/src/App.js | 0 .../workspace/client/src/App.test.js | 0 .../workspace/client/src/index.css | 0 .../workspace/client/src/index.js | 0 .../workspace/client/src/logo.svg | 0 .../workspace/client/src/reportWebVitals.js | 0 .../workspace/client/src/setupTests.js | 0 .../workspace/package-lock.json | 0 .../langchains_agent}/workspace/package.json | 0 .../langchains_agent}/workspace/server.js | 0 .../regression/cases/client-server/task.txt | 0 .../outputs/langchains_agent}/logs.txt | 0 .../langchains_agent}/workspace/index.js | 0 .../workspace/package-lock.json | 0 .../langchains_agent}/workspace/package.json | 0 .../regression/cases/express/task.txt | 0 .../outputs/langchains_agent}/logs.txt | 0 .../workspace/hello_world.sh | 0 .../cases/hello-name/start/hello_world.sh | 0 .../regression/cases/hello-name/task.txt | 0 .../outputs/langchains_agent}/logs.txt | 0 .../workspace/hello_world.sh | 0 .../regression/cases/hello-world/task.txt | 0 .../outputs/langchains_agent}/logs.txt | 0 .../langchains_agent}/workspace/cli.js | 0 .../workspace/commands/index.js | 0 .../workspace/commands/length.js | 0 .../workspace}/commands/length.py | 0 .../workspace/commands/lowercase.js | 0 .../workspace}/commands/lowercase.py | 0 .../workspace/commands/reverse.js | 0 .../workspace}/commands/reverse.py | 0 .../workspace/commands/scramble.js | 0 .../workspace}/commands/scramble.py | 0 .../workspace/commands/spongebob.js | 0 .../workspace}/commands/spongebob.py | 0 .../workspace/commands/uppercase.js | 0 .../workspace}/commands/uppercase.py | 0 .../langchains_agent/workspace}/string_cli.py | 0 .../start}/commands/length.py | 0 .../start}/commands/lowercase.py | 0 .../start}/commands/reverse.py | 0 .../start}/commands/scramble.py | 0 .../start}/commands/spongebob.py | 0 .../start}/commands/uppercase.py | 0 .../node-cli-rewrite/start}/string_cli.py | 0 .../cases/node-cli-rewrite/task.txt | 0 .../outputs/langchains_agent}/logs.txt | 0 .../workspace}/commands/length.py | 0 .../workspace}/commands/lowercase.py | 0 .../workspace}/commands/reverse.py | 0 .../workspace}/commands/scramble.py | 0 .../workspace}/commands/spongebob.py | 0 .../workspace}/commands/uppercase.py | 0 .../langchains_agent}/workspace/string_cli.py | 0 .../python-cli-help/start}/commands/length.py | 0 .../start}/commands/lowercase.py | 0 .../start}/commands/reverse.py | 0 .../start}/commands/scramble.py | 0 .../start}/commands/spongebob.py | 0 .../start}/commands/uppercase.py | 0 .../cases/python-cli-help/start/string_cli.py | 0 .../regression/cases/python-cli-help/task.txt | 0 .../outputs/langchains_agent}/logs.txt | 0 .../workspace/commands/length.py | 0 .../workspace/commands/lowercase.py | 0 .../workspace/commands/reverse.py | 0 .../workspace/commands/scramble.py | 0 .../workspace/commands/spongebob.py | 0 .../workspace/commands/uppercase.py | 0 .../langchains_agent}/workspace/string_cli.py | 0 .../regression/cases/python-cli/task.txt | 0 .../outputs/langchains_agent}/logs.txt | 0 .../workspace/todo-app/.gitignore | 0 .../workspace/todo-app/README.md | 0 .../workspace/todo-app/package-lock.json | 0 .../workspace/todo-app/package.json | 0 .../workspace/todo-app/public/favicon.ico | Bin .../workspace/todo-app/public/index.html | 0 .../workspace/todo-app/public/logo192.png | Bin .../workspace/todo-app/public/logo512.png | Bin .../workspace/todo-app/public/manifest.json | 0 .../workspace/todo-app/public/robots.txt | 0 .../workspace/todo-app/src/App.css | 0 .../workspace/todo-app/src/App.js | 0 .../workspace/todo-app/src/App.test.js | 0 .../workspace/todo-app/src/index.css | 0 .../workspace/todo-app/src/index.js | 0 .../workspace/todo-app/src/logo.svg | 0 .../workspace/todo-app/src/reportWebVitals.js | 0 .../workspace/todo-app/src/setupTests.js | 0 .../regression/cases/react-todo/task.txt | 0 .../outputs/langchains_agent}/logs.txt | 0 .../langchains_agent}/workspace/README.md | 0 .../langchains_agent}/workspace/server.py | 0 .../cases/server-test/start/server.py | 0 .../regression/cases/server-test/task.txt | 0 evaluation/regression/run.sh | 67 ++++++++++++++++++ 114 files changed, 67 insertions(+), 27 deletions(-) delete mode 100644 agenthub/langchains_agent/regression/cases/client-server/workspace/client/.env delete mode 100755 agenthub/langchains_agent/regression/run.sh rename {agenthub/langchains_agent => evaluation}/regression/.gitignore (100%) rename {agenthub/langchains_agent => evaluation}/regression/README.md (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/logs.txt (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/.gitignore (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/README.md (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/package-lock.json (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/package.json (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/public/favicon.ico (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/public/index.html (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/public/logo192.png (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/public/logo512.png (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/public/manifest.json (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/public/robots.txt (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/src/App.css (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/src/App.js (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/src/App.test.js (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/src/index.css (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/src/index.js (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/src/logo.svg (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/src/reportWebVitals.js (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/client/src/setupTests.js (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/package-lock.json (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/package.json (100%) rename {agenthub/langchains_agent/regression/cases/client-server => evaluation/regression/cases/client-server/outputs/langchains_agent}/workspace/server.js (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/client-server/task.txt (100%) rename {agenthub/langchains_agent/regression/cases/express => evaluation/regression/cases/express/outputs/langchains_agent}/logs.txt (100%) rename {agenthub/langchains_agent/regression/cases/express => evaluation/regression/cases/express/outputs/langchains_agent}/workspace/index.js (100%) rename {agenthub/langchains_agent/regression/cases/express => evaluation/regression/cases/express/outputs/langchains_agent}/workspace/package-lock.json (100%) rename {agenthub/langchains_agent/regression/cases/express => evaluation/regression/cases/express/outputs/langchains_agent}/workspace/package.json (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/express/task.txt (100%) rename {agenthub/langchains_agent/regression/cases/hello-name => evaluation/regression/cases/hello-name/outputs/langchains_agent}/logs.txt (100%) rename {agenthub/langchains_agent/regression/cases/hello-name => evaluation/regression/cases/hello-name/outputs/langchains_agent}/workspace/hello_world.sh (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/hello-name/start/hello_world.sh (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/hello-name/task.txt (100%) rename {agenthub/langchains_agent/regression/cases/hello-world => evaluation/regression/cases/hello-world/outputs/langchains_agent}/logs.txt (100%) rename {agenthub/langchains_agent/regression/cases/hello-world => evaluation/regression/cases/hello-world/outputs/langchains_agent}/workspace/hello_world.sh (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/hello-world/task.txt (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent}/logs.txt (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent}/workspace/cli.js (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent}/workspace/commands/index.js (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent}/workspace/commands/length.js (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/start => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace}/commands/length.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent}/workspace/commands/lowercase.js (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/start => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace}/commands/lowercase.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent}/workspace/commands/reverse.js (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/start => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace}/commands/reverse.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent}/workspace/commands/scramble.js (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/start => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace}/commands/scramble.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent}/workspace/commands/spongebob.js (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/start => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace}/commands/spongebob.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent}/workspace/commands/uppercase.js (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/start => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace}/commands/uppercase.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/start => evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace}/string_cli.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace => evaluation/regression/cases/node-cli-rewrite/start}/commands/length.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace => evaluation/regression/cases/node-cli-rewrite/start}/commands/lowercase.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace => evaluation/regression/cases/node-cli-rewrite/start}/commands/reverse.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace => evaluation/regression/cases/node-cli-rewrite/start}/commands/scramble.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace => evaluation/regression/cases/node-cli-rewrite/start}/commands/spongebob.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace => evaluation/regression/cases/node-cli-rewrite/start}/commands/uppercase.py (100%) rename {agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace => evaluation/regression/cases/node-cli-rewrite/start}/string_cli.py (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/node-cli-rewrite/task.txt (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help => evaluation/regression/cases/python-cli-help/outputs/langchains_agent}/logs.txt (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/start => evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace}/commands/length.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/start => evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace}/commands/lowercase.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/start => evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace}/commands/reverse.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/start => evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace}/commands/scramble.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/start => evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace}/commands/spongebob.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/start => evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace}/commands/uppercase.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help => evaluation/regression/cases/python-cli-help/outputs/langchains_agent}/workspace/string_cli.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/workspace => evaluation/regression/cases/python-cli-help/start}/commands/length.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/workspace => evaluation/regression/cases/python-cli-help/start}/commands/lowercase.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/workspace => evaluation/regression/cases/python-cli-help/start}/commands/reverse.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/workspace => evaluation/regression/cases/python-cli-help/start}/commands/scramble.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/workspace => evaluation/regression/cases/python-cli-help/start}/commands/spongebob.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli-help/workspace => evaluation/regression/cases/python-cli-help/start}/commands/uppercase.py (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/python-cli-help/start/string_cli.py (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/python-cli-help/task.txt (100%) rename {agenthub/langchains_agent/regression/cases/python-cli => evaluation/regression/cases/python-cli/outputs/langchains_agent}/logs.txt (100%) rename {agenthub/langchains_agent/regression/cases/python-cli => evaluation/regression/cases/python-cli/outputs/langchains_agent}/workspace/commands/length.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli => evaluation/regression/cases/python-cli/outputs/langchains_agent}/workspace/commands/lowercase.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli => evaluation/regression/cases/python-cli/outputs/langchains_agent}/workspace/commands/reverse.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli => evaluation/regression/cases/python-cli/outputs/langchains_agent}/workspace/commands/scramble.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli => evaluation/regression/cases/python-cli/outputs/langchains_agent}/workspace/commands/spongebob.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli => evaluation/regression/cases/python-cli/outputs/langchains_agent}/workspace/commands/uppercase.py (100%) rename {agenthub/langchains_agent/regression/cases/python-cli => evaluation/regression/cases/python-cli/outputs/langchains_agent}/workspace/string_cli.py (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/python-cli/task.txt (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/logs.txt (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/.gitignore (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/README.md (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/package-lock.json (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/package.json (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/public/favicon.ico (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/public/index.html (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/public/logo192.png (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/public/logo512.png (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/public/manifest.json (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/public/robots.txt (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/src/App.css (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/src/App.js (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/src/App.test.js (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/src/index.css (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/src/index.js (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/src/logo.svg (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/src/reportWebVitals.js (100%) rename {agenthub/langchains_agent/regression/cases/react-todo => evaluation/regression/cases/react-todo/outputs/langchains_agent}/workspace/todo-app/src/setupTests.js (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/react-todo/task.txt (100%) rename {agenthub/langchains_agent/regression/cases/server-test => evaluation/regression/cases/server-test/outputs/langchains_agent}/logs.txt (100%) rename {agenthub/langchains_agent/regression/cases/server-test => evaluation/regression/cases/server-test/outputs/langchains_agent}/workspace/README.md (100%) rename {agenthub/langchains_agent/regression/cases/server-test => evaluation/regression/cases/server-test/outputs/langchains_agent}/workspace/server.py (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/server-test/start/server.py (100%) rename {agenthub/langchains_agent => evaluation}/regression/cases/server-test/task.txt (100%) create mode 100755 evaluation/regression/run.sh diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/.env b/agenthub/langchains_agent/regression/cases/client-server/workspace/client/.env deleted file mode 100644 index a43d6bb1e4..0000000000 --- a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/.env +++ /dev/null @@ -1 +0,0 @@ -PORT=3001 \ No newline at end of file diff --git a/agenthub/langchains_agent/regression/run.sh b/agenthub/langchains_agent/regression/run.sh deleted file mode 100755 index 715f2a65eb..0000000000 --- a/agenthub/langchains_agent/regression/run.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -set -eo pipefail - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -CASES_DIR=$SCRIPT_DIR/cases - -docker build -t control-loop $SCRIPT_DIR/.. - -# iterate over cases dir -for case in $(ls $CASES_DIR); do - # run the case - if [[ -n $TEST_CASE && $case != $TEST_CASE ]]; then - continue - fi - echo "Running case: $case" - case_dir=$CASES_DIR/$case - task=$(cat $case_dir/task.txt) - rm -rf $case_dir/workspace - if [[ -d $case_dir/start ]]; then - cp -r $case_dir/start $case_dir/workspace - else - mkdir $case_dir/workspace - fi - docker run -e DEBUG=$DEBUG -e OPENAI_API_KEY=$OPENAI_API_KEY -v $case_dir/workspace:/workspace control-loop python /app/main.py -d /workspace -t "${task}" | tee $case_dir/logs.txt - rm -rf $case_dir/workspace/.git -done diff --git a/agenthub/langchains_agent/regression/.gitignore b/evaluation/regression/.gitignore similarity index 100% rename from agenthub/langchains_agent/regression/.gitignore rename to evaluation/regression/.gitignore diff --git a/agenthub/langchains_agent/regression/README.md b/evaluation/regression/README.md similarity index 100% rename from agenthub/langchains_agent/regression/README.md rename to evaluation/regression/README.md diff --git a/agenthub/langchains_agent/regression/cases/client-server/logs.txt b/evaluation/regression/cases/client-server/outputs/langchains_agent/logs.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/logs.txt rename to evaluation/regression/cases/client-server/outputs/langchains_agent/logs.txt diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/.gitignore b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/.gitignore similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/.gitignore rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/.gitignore diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/README.md b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/README.md similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/README.md rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/README.md diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/package-lock.json b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/package-lock.json similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/package-lock.json rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/package-lock.json diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/package.json b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/package.json similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/package.json rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/package.json diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/favicon.ico b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/favicon.ico similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/favicon.ico rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/favicon.ico diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/index.html b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/index.html similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/index.html rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/index.html diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/logo192.png b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/logo192.png similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/logo192.png rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/logo192.png diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/logo512.png b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/logo512.png similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/logo512.png rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/logo512.png diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/manifest.json b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/manifest.json similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/manifest.json rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/manifest.json diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/robots.txt b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/robots.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/public/robots.txt rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/public/robots.txt diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/App.css b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/App.css similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/App.css rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/App.css diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/App.js b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/App.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/App.js rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/App.js diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/App.test.js b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/App.test.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/App.test.js rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/App.test.js diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/index.css b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/index.css similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/index.css rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/index.css diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/index.js b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/index.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/index.js rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/index.js diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/logo.svg b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/logo.svg similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/logo.svg rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/logo.svg diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/reportWebVitals.js b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/reportWebVitals.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/reportWebVitals.js rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/reportWebVitals.js diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/setupTests.js b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/setupTests.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/client/src/setupTests.js rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/client/src/setupTests.js diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/package-lock.json b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/package-lock.json similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/package-lock.json rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/package-lock.json diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/package.json b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/package.json similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/package.json rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/package.json diff --git a/agenthub/langchains_agent/regression/cases/client-server/workspace/server.js b/evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/server.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/workspace/server.js rename to evaluation/regression/cases/client-server/outputs/langchains_agent/workspace/server.js diff --git a/agenthub/langchains_agent/regression/cases/client-server/task.txt b/evaluation/regression/cases/client-server/task.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/client-server/task.txt rename to evaluation/regression/cases/client-server/task.txt diff --git a/agenthub/langchains_agent/regression/cases/express/logs.txt b/evaluation/regression/cases/express/outputs/langchains_agent/logs.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/express/logs.txt rename to evaluation/regression/cases/express/outputs/langchains_agent/logs.txt diff --git a/agenthub/langchains_agent/regression/cases/express/workspace/index.js b/evaluation/regression/cases/express/outputs/langchains_agent/workspace/index.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/express/workspace/index.js rename to evaluation/regression/cases/express/outputs/langchains_agent/workspace/index.js diff --git a/agenthub/langchains_agent/regression/cases/express/workspace/package-lock.json b/evaluation/regression/cases/express/outputs/langchains_agent/workspace/package-lock.json similarity index 100% rename from agenthub/langchains_agent/regression/cases/express/workspace/package-lock.json rename to evaluation/regression/cases/express/outputs/langchains_agent/workspace/package-lock.json diff --git a/agenthub/langchains_agent/regression/cases/express/workspace/package.json b/evaluation/regression/cases/express/outputs/langchains_agent/workspace/package.json similarity index 100% rename from agenthub/langchains_agent/regression/cases/express/workspace/package.json rename to evaluation/regression/cases/express/outputs/langchains_agent/workspace/package.json diff --git a/agenthub/langchains_agent/regression/cases/express/task.txt b/evaluation/regression/cases/express/task.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/express/task.txt rename to evaluation/regression/cases/express/task.txt diff --git a/agenthub/langchains_agent/regression/cases/hello-name/logs.txt b/evaluation/regression/cases/hello-name/outputs/langchains_agent/logs.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/hello-name/logs.txt rename to evaluation/regression/cases/hello-name/outputs/langchains_agent/logs.txt diff --git a/agenthub/langchains_agent/regression/cases/hello-name/workspace/hello_world.sh b/evaluation/regression/cases/hello-name/outputs/langchains_agent/workspace/hello_world.sh similarity index 100% rename from agenthub/langchains_agent/regression/cases/hello-name/workspace/hello_world.sh rename to evaluation/regression/cases/hello-name/outputs/langchains_agent/workspace/hello_world.sh diff --git a/agenthub/langchains_agent/regression/cases/hello-name/start/hello_world.sh b/evaluation/regression/cases/hello-name/start/hello_world.sh similarity index 100% rename from agenthub/langchains_agent/regression/cases/hello-name/start/hello_world.sh rename to evaluation/regression/cases/hello-name/start/hello_world.sh diff --git a/agenthub/langchains_agent/regression/cases/hello-name/task.txt b/evaluation/regression/cases/hello-name/task.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/hello-name/task.txt rename to evaluation/regression/cases/hello-name/task.txt diff --git a/agenthub/langchains_agent/regression/cases/hello-world/logs.txt b/evaluation/regression/cases/hello-world/outputs/langchains_agent/logs.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/hello-world/logs.txt rename to evaluation/regression/cases/hello-world/outputs/langchains_agent/logs.txt diff --git a/agenthub/langchains_agent/regression/cases/hello-world/workspace/hello_world.sh b/evaluation/regression/cases/hello-world/outputs/langchains_agent/workspace/hello_world.sh similarity index 100% rename from agenthub/langchains_agent/regression/cases/hello-world/workspace/hello_world.sh rename to evaluation/regression/cases/hello-world/outputs/langchains_agent/workspace/hello_world.sh diff --git a/agenthub/langchains_agent/regression/cases/hello-world/task.txt b/evaluation/regression/cases/hello-world/task.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/hello-world/task.txt rename to evaluation/regression/cases/hello-world/task.txt diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/logs.txt b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/logs.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/logs.txt rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/logs.txt diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/cli.js b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/cli.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/cli.js rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/cli.js diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/index.js b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/index.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/index.js rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/index.js diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/length.js b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/length.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/length.js rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/length.js diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/length.py b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/length.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/length.py rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/length.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/lowercase.js b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/lowercase.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/lowercase.js rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/lowercase.js diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/lowercase.py b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/lowercase.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/lowercase.py rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/lowercase.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/reverse.js b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/reverse.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/reverse.js rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/reverse.js diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/reverse.py b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/reverse.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/reverse.py rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/reverse.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/scramble.js b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/scramble.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/scramble.js rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/scramble.js diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/scramble.py b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/scramble.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/scramble.py rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/scramble.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/spongebob.js b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/spongebob.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/spongebob.js rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/spongebob.js diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/spongebob.py b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/spongebob.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/spongebob.py rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/spongebob.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/uppercase.js b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/uppercase.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/uppercase.js rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/uppercase.js diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/uppercase.py b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/uppercase.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/commands/uppercase.py rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/commands/uppercase.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/string_cli.py b/evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/string_cli.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/start/string_cli.py rename to evaluation/regression/cases/node-cli-rewrite/outputs/langchains_agent/workspace/string_cli.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/length.py b/evaluation/regression/cases/node-cli-rewrite/start/commands/length.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/length.py rename to evaluation/regression/cases/node-cli-rewrite/start/commands/length.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/lowercase.py b/evaluation/regression/cases/node-cli-rewrite/start/commands/lowercase.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/lowercase.py rename to evaluation/regression/cases/node-cli-rewrite/start/commands/lowercase.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/reverse.py b/evaluation/regression/cases/node-cli-rewrite/start/commands/reverse.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/reverse.py rename to evaluation/regression/cases/node-cli-rewrite/start/commands/reverse.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/scramble.py b/evaluation/regression/cases/node-cli-rewrite/start/commands/scramble.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/scramble.py rename to evaluation/regression/cases/node-cli-rewrite/start/commands/scramble.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/spongebob.py b/evaluation/regression/cases/node-cli-rewrite/start/commands/spongebob.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/spongebob.py rename to evaluation/regression/cases/node-cli-rewrite/start/commands/spongebob.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/uppercase.py b/evaluation/regression/cases/node-cli-rewrite/start/commands/uppercase.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/commands/uppercase.py rename to evaluation/regression/cases/node-cli-rewrite/start/commands/uppercase.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/string_cli.py b/evaluation/regression/cases/node-cli-rewrite/start/string_cli.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/workspace/string_cli.py rename to evaluation/regression/cases/node-cli-rewrite/start/string_cli.py diff --git a/agenthub/langchains_agent/regression/cases/node-cli-rewrite/task.txt b/evaluation/regression/cases/node-cli-rewrite/task.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/node-cli-rewrite/task.txt rename to evaluation/regression/cases/node-cli-rewrite/task.txt diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/logs.txt b/evaluation/regression/cases/python-cli-help/outputs/langchains_agent/logs.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/logs.txt rename to evaluation/regression/cases/python-cli-help/outputs/langchains_agent/logs.txt diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/length.py b/evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/length.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/length.py rename to evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/length.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/lowercase.py b/evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/lowercase.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/lowercase.py rename to evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/lowercase.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/reverse.py b/evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/reverse.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/reverse.py rename to evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/reverse.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/scramble.py b/evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/scramble.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/scramble.py rename to evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/scramble.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/spongebob.py b/evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/spongebob.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/spongebob.py rename to evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/spongebob.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/uppercase.py b/evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/uppercase.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/start/commands/uppercase.py rename to evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/commands/uppercase.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/workspace/string_cli.py b/evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/string_cli.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/workspace/string_cli.py rename to evaluation/regression/cases/python-cli-help/outputs/langchains_agent/workspace/string_cli.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/length.py b/evaluation/regression/cases/python-cli-help/start/commands/length.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/length.py rename to evaluation/regression/cases/python-cli-help/start/commands/length.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/lowercase.py b/evaluation/regression/cases/python-cli-help/start/commands/lowercase.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/lowercase.py rename to evaluation/regression/cases/python-cli-help/start/commands/lowercase.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/reverse.py b/evaluation/regression/cases/python-cli-help/start/commands/reverse.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/reverse.py rename to evaluation/regression/cases/python-cli-help/start/commands/reverse.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/scramble.py b/evaluation/regression/cases/python-cli-help/start/commands/scramble.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/scramble.py rename to evaluation/regression/cases/python-cli-help/start/commands/scramble.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/spongebob.py b/evaluation/regression/cases/python-cli-help/start/commands/spongebob.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/spongebob.py rename to evaluation/regression/cases/python-cli-help/start/commands/spongebob.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/uppercase.py b/evaluation/regression/cases/python-cli-help/start/commands/uppercase.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/workspace/commands/uppercase.py rename to evaluation/regression/cases/python-cli-help/start/commands/uppercase.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/start/string_cli.py b/evaluation/regression/cases/python-cli-help/start/string_cli.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/start/string_cli.py rename to evaluation/regression/cases/python-cli-help/start/string_cli.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli-help/task.txt b/evaluation/regression/cases/python-cli-help/task.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli-help/task.txt rename to evaluation/regression/cases/python-cli-help/task.txt diff --git a/agenthub/langchains_agent/regression/cases/python-cli/logs.txt b/evaluation/regression/cases/python-cli/outputs/langchains_agent/logs.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli/logs.txt rename to evaluation/regression/cases/python-cli/outputs/langchains_agent/logs.txt diff --git a/agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/length.py b/evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/length.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/length.py rename to evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/length.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/lowercase.py b/evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/lowercase.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/lowercase.py rename to evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/lowercase.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/reverse.py b/evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/reverse.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/reverse.py rename to evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/reverse.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/scramble.py b/evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/scramble.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/scramble.py rename to evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/scramble.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/spongebob.py b/evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/spongebob.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/spongebob.py rename to evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/spongebob.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/uppercase.py b/evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/uppercase.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli/workspace/commands/uppercase.py rename to evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/commands/uppercase.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli/workspace/string_cli.py b/evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/string_cli.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli/workspace/string_cli.py rename to evaluation/regression/cases/python-cli/outputs/langchains_agent/workspace/string_cli.py diff --git a/agenthub/langchains_agent/regression/cases/python-cli/task.txt b/evaluation/regression/cases/python-cli/task.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/python-cli/task.txt rename to evaluation/regression/cases/python-cli/task.txt diff --git a/agenthub/langchains_agent/regression/cases/react-todo/logs.txt b/evaluation/regression/cases/react-todo/outputs/langchains_agent/logs.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/logs.txt rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/logs.txt diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/.gitignore b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/.gitignore similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/.gitignore rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/.gitignore diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/README.md b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/README.md similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/README.md rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/README.md diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/package-lock.json b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/package-lock.json similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/package-lock.json rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/package-lock.json diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/package.json b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/package.json similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/package.json rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/package.json diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/favicon.ico b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/favicon.ico similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/favicon.ico rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/favicon.ico diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/index.html b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/index.html similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/index.html rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/index.html diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/logo192.png b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/logo192.png similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/logo192.png rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/logo192.png diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/logo512.png b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/logo512.png similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/logo512.png rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/logo512.png diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/manifest.json b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/manifest.json similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/manifest.json rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/manifest.json diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/robots.txt b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/robots.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/public/robots.txt rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/public/robots.txt diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/App.css b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/App.css similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/App.css rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/App.css diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/App.js b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/App.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/App.js rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/App.js diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/App.test.js b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/App.test.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/App.test.js rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/App.test.js diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/index.css b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/index.css similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/index.css rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/index.css diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/index.js b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/index.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/index.js rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/index.js diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/logo.svg b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/logo.svg similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/logo.svg rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/logo.svg diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/reportWebVitals.js b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/reportWebVitals.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/reportWebVitals.js rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/reportWebVitals.js diff --git a/agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/setupTests.js b/evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/setupTests.js similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/workspace/todo-app/src/setupTests.js rename to evaluation/regression/cases/react-todo/outputs/langchains_agent/workspace/todo-app/src/setupTests.js diff --git a/agenthub/langchains_agent/regression/cases/react-todo/task.txt b/evaluation/regression/cases/react-todo/task.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/react-todo/task.txt rename to evaluation/regression/cases/react-todo/task.txt diff --git a/agenthub/langchains_agent/regression/cases/server-test/logs.txt b/evaluation/regression/cases/server-test/outputs/langchains_agent/logs.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/server-test/logs.txt rename to evaluation/regression/cases/server-test/outputs/langchains_agent/logs.txt diff --git a/agenthub/langchains_agent/regression/cases/server-test/workspace/README.md b/evaluation/regression/cases/server-test/outputs/langchains_agent/workspace/README.md similarity index 100% rename from agenthub/langchains_agent/regression/cases/server-test/workspace/README.md rename to evaluation/regression/cases/server-test/outputs/langchains_agent/workspace/README.md diff --git a/agenthub/langchains_agent/regression/cases/server-test/workspace/server.py b/evaluation/regression/cases/server-test/outputs/langchains_agent/workspace/server.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/server-test/workspace/server.py rename to evaluation/regression/cases/server-test/outputs/langchains_agent/workspace/server.py diff --git a/agenthub/langchains_agent/regression/cases/server-test/start/server.py b/evaluation/regression/cases/server-test/start/server.py similarity index 100% rename from agenthub/langchains_agent/regression/cases/server-test/start/server.py rename to evaluation/regression/cases/server-test/start/server.py diff --git a/agenthub/langchains_agent/regression/cases/server-test/task.txt b/evaluation/regression/cases/server-test/task.txt similarity index 100% rename from agenthub/langchains_agent/regression/cases/server-test/task.txt rename to evaluation/regression/cases/server-test/task.txt diff --git a/evaluation/regression/run.sh b/evaluation/regression/run.sh new file mode 100755 index 0000000000..357611ee23 --- /dev/null +++ b/evaluation/regression/run.sh @@ -0,0 +1,67 @@ +#!/bin/bash +set -eo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +CASES_DIR=$SCRIPT_DIR/cases +AGENTHUB_DIR=$SCRIPT_DIR/../../agenthub +# Check if DEBUG variable is already set +if [ -z "${DEBUG}" ]; then + read -p "Enter value for DEBUG (leave blank for default): " debug_value + if [ -n "${debug_value}" ]; then + export DEBUG="${debug_value}" + else + export DEBUG="0" + fi +fi +# Check if OPENAI_API_KEY variable is already set +if [ -z "${OPENAI_API_KEY}" ]; then + read -sp "Enter value for OPENAI_API_KEY: " openai_key + echo + export OPENAI_API_KEY="${openai_key}" +fi +# Get the MODEL variable +read -sp "Enter value for model running agents: " model +echo + +if [ -z "$model" ]; then + MODEL="gpt-4-0125-preview" +else + MODEL="$model" +fi + +# hardcode pairs for directory to python class mapping +declare -A directory_class_pairs=( + [langchains_agent]="LangchainsAgent" + [codeact_agent]="CodeActAgent" +) + + +# for each agent +for agent_dir in $(find . -type d -name '*agent'); do + agent=$(basename "$agent_dir") + # iterate over cases dir + for case in $(ls $CASES_DIR); do + # run the case + if [[ -n $TEST_CASE && $case != $TEST_CASE ]]; then + continue + fi + echo "Running case: $case" + case_dir=$CASES_DIR/$case + task=$(cat $case_dir/task.txt) + outputs_dir=$case_dir/outputs + agent_dir=$outputs_dir/$agent + echo "agent: $agent_dir" + # create agent dir if not exist + if [ ! -d "$agent_dir" ]; then + mkdir -p $agent_dir + fi + rm -rf $agent_dir/workspace + if [[ -d $case_dir/start ]]; then + cp -r $case_dir/start $agent_dir/workspace + else + mkdir $agent_dir/workspace + fi + python3 $SCRIPT_DIR/../../opendevin/main.py -d $agent_dir/workspace -c ${directory_class_pairs[$agent]} -t "${task}" -m $MODEL | tee $agent_dir/logs.txt + rm -rf $agent_dir/workspace/.git + done +done