OpenHands/.github/workflows/run-eval.yml

# Run evaluation on a PR, after releases, or manually
name: Run Eval

# Runs when a PR is labeled with one of the "run-eval-" labels, after releases, or manually triggered
on:
  pull_request:
    types: [labeled]
  release:
    types: [published]
  workflow_dispatch:
    inputs:
      branch:
        description: 'Branch to evaluate'
        required: true
        default: 'main'
      eval_instances:
        description: 'Number of evaluation instances'
        required: true
        default: '50'
        type: choice
        options:
          - '1'
          - '2'
          - '50'
          - '100'
      reason:
        description: 'Reason for manual trigger'
        required: false
        default: ''

env:
  # Environment variable for the master GitHub issue number where all evaluation results will be commented
  # This should be set to the issue number where you want all evaluation results to be posted
  MASTER_EVAL_ISSUE_NUMBER: ${{ vars.MASTER_EVAL_ISSUE_NUMBER || '0' }}

jobs:
  trigger-job:
    name: Trigger remote eval job
    if: ${{ (github.event_name == 'pull_request' && (github.event.label.name == 'run-eval-1' || github.event.label.name == 'run-eval-2' || github.event.label.name == 'run-eval-50' || github.event.label.name == 'run-eval-100')) || github.event_name == 'release' || github.event_name == 'workflow_dispatch' }}
    runs-on: blacksmith-4vcpu-ubuntu-2204

    steps:
      - name: Checkout branch
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event_name == 'pull_request' && github.head_ref || (github.event_name == 'workflow_dispatch' && github.event.inputs.branch) || github.ref }}

      - name: Set evaluation parameters
        id: eval_params
        run: |
          REPO_URL="https://github.com/${{ github.repository }}"
          echo "Repository URL: $REPO_URL"

          # Determine branch based on trigger type
          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            EVAL_BRANCH="${{ github.head_ref }}"
            echo "PR Branch: $EVAL_BRANCH"
          elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
            EVAL_BRANCH="${{ github.event.inputs.branch }}"
            echo "Manual Branch: $EVAL_BRANCH"
          else
            # For release events, use the tag name or main branch
            EVAL_BRANCH="${{ github.ref_name }}"
            echo "Release Branch/Tag: $EVAL_BRANCH"
          fi

          # Determine evaluation instances based on trigger type
          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            if [[ "${{ github.event.label.name }}" == "run-eval-1" ]]; then
              EVAL_INSTANCES="1"
            elif [[ "${{ github.event.label.name }}" == "run-eval-2" ]]; then
              EVAL_INSTANCES="2"
            elif [[ "${{ github.event.label.name }}" == "run-eval-50" ]]; then
              EVAL_INSTANCES="50"
            elif [[ "${{ github.event.label.name }}" == "run-eval-100" ]]; then
              EVAL_INSTANCES="100"
            fi
          elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
            EVAL_INSTANCES="${{ github.event.inputs.eval_instances }}"
          else
            # For release events, default to 50 instances
            EVAL_INSTANCES="50"
          fi

          echo "Evaluation instances: $EVAL_INSTANCES"
          echo "repo_url=$REPO_URL" >> $GITHUB_OUTPUT
          echo "eval_branch=$EVAL_BRANCH" >> $GITHUB_OUTPUT
          echo "eval_instances=$EVAL_INSTANCES" >> $GITHUB_OUTPUT

      - name: Trigger remote job
        run: |
          # Determine PR number for the remote evaluation system
          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            PR_NUMBER="${{ github.event.pull_request.number }}"
          else
            # For non-PR triggers, use the master issue number as PR number
            PR_NUMBER="${{ env.MASTER_EVAL_ISSUE_NUMBER }}"
          fi

          curl -X POST \
            -H "Authorization: Bearer ${{ secrets.PAT_TOKEN }}" \
            -H "Accept: application/vnd.github+json" \
            -d "{\"ref\": \"main\", \"inputs\": {\"github-repo\": \"${{ steps.eval_params.outputs.repo_url }}\", \"github-branch\": \"${{ steps.eval_params.outputs.eval_branch }}\", \"pr-number\": \"${PR_NUMBER}\", \"eval-instances\": \"${{ steps.eval_params.outputs.eval_instances }}\"}}" \
            https://api.github.com/repos/OpenHands/evaluation/actions/workflows/create-branch.yml/dispatches

          # Send Slack message
          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            TRIGGER_URL="https://github.com/${{ github.repository }}/pull/${{ github.event.pull_request.number }}"
            slack_text="PR $TRIGGER_URL has triggered evaluation on ${{ steps.eval_params.outputs.eval_instances }} instances..."
          elif [[ "${{ github.event_name }}" == "release" ]]; then
            TRIGGER_URL="https://github.com/${{ github.repository }}/releases/tag/${{ github.ref_name }}"
            slack_text="Release $TRIGGER_URL has triggered evaluation on ${{ steps.eval_params.outputs.eval_instances }} instances..."
          else
            TRIGGER_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
            slack_text="Manual trigger (${{ github.event.inputs.reason || 'No reason provided' }}) has triggered evaluation on ${{ steps.eval_params.outputs.eval_instances }} instances for branch ${{ steps.eval_params.outputs.eval_branch }}..."
          fi

          curl -X POST -H 'Content-type: application/json' --data '{"text":"'"$slack_text"'"}' \
            https://hooks.slack.com/services/${{ secrets.SLACK_TOKEN }}

      - name: Comment on issue/PR
        uses: KeisukeYamashita/create-comment@v1
        with:
          # For PR triggers, comment on the PR. For other triggers, comment on the master issue
          number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || env.MASTER_EVAL_ISSUE_NUMBER }}
          unique: false
          comment: |
            **Evaluation Triggered**

            **Trigger:** ${{ github.event_name == 'pull_request' && format('Pull Request #{0}', github.event.pull_request.number) || (github.event_name == 'release' && 'Release') || format('Manual Trigger: {0}', github.event.inputs.reason || 'No reason provided') }}
            **Branch:** ${{ steps.eval_params.outputs.eval_branch }}
            **Instances:** ${{ steps.eval_params.outputs.eval_instances }}
            **Commit:** ${{ github.sha }}

            Running evaluation on the specified branch. Once eval is done, the results will be posted here.