Merge branch 'main' into enable-v1-for-oss

This commit is contained in:
Tim O'Farrell
2025-12-15 09:30:29 -07:00
committed by GitHub
10 changed files with 14 additions and 9 deletions

View File

@@ -8,7 +8,7 @@
<div align="center">
<a href="https://github.com/OpenHands/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/badge/LICENSE-MIT-20B2AA?style=for-the-badge" alt="MIT License"></a>
<a href="https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=811504672#gid=811504672"><img src="https://img.shields.io/badge/SWEBench-72.8-00cc00?logoColor=FFE165&style=for-the-badge" alt="Benchmark Score"></a>
<a href="https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=811504672#gid=811504672"><img src="https://img.shields.io/badge/SWEBench-77.6-00cc00?logoColor=FFE165&style=for-the-badge" alt="Benchmark Score"></a>
<br/>
<a href="https://docs.openhands.dev/sdk"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
<a href="https://arxiv.org/abs/2511.03690"><img src="https://img.shields.io/badge/Paper-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Tech Report"></a>

View File

@@ -1,5 +1,10 @@
# Evaluation
> [!WARNING]
> **This directory is deprecated.** Our new benchmarks are located at [OpenHands/benchmarks](https://github.com/OpenHands/benchmarks).
>
> If you have already implemented a benchmark in this directory and would like to contribute it, we are happy to have the contribution. However, if you are starting anew, please use the new location.
This folder contains code and resources to run experiments and evaluations.
## For Benchmark Users

View File

@@ -1,6 +1,6 @@
import { render, screen, fireEvent } from "@testing-library/react";
import { describe, it, expect, vi } from "vitest";
import { MCPServerForm } from "../mcp-server-form";
import { MCPServerForm } from "#/components/features/settings/mcp-settings/mcp-server-form";
// i18n mock
vi.mock("react-i18next", () => ({

View File

@@ -1,6 +1,6 @@
import { render, screen } from "@testing-library/react";
import { describe, it, expect, vi } from "vitest";
import { MCPServerList } from "../mcp-server-list";
import { MCPServerList } from "#/components/features/settings/mcp-settings/mcp-server-list";
// Mock react-i18next
vi.mock("react-i18next", () => ({

View File

@@ -1,5 +1,5 @@
import { describe, it, expect } from "vitest";
import { getObservationContent } from "../get-observation-content";
import { getObservationContent } from "#/components/v1/chat/event-content-helpers/get-observation-content";
import { ObservationEvent } from "#/types/v1/core";
import { BrowserObservation } from "#/types/v1/core/base/observation";

View File

@@ -1,5 +1,5 @@
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { handleStatusMessage } from "../actions";
import { handleStatusMessage } from "#/services/actions";
import { StatusMessage } from "#/types/message";
import { queryClient } from "#/query-client-config";
import { useStatusStore } from "#/state/status-store";

View File

@@ -3,7 +3,7 @@ import toast from "react-hot-toast";
import {
displaySuccessToast,
displayErrorToast,
} from "../custom-toast-handlers";
} from "#/utils/custom-toast-handlers";
// Mock react-hot-toast
vi.mock("react-hot-toast", () => ({

View File

@@ -1,5 +1,5 @@
import { describe, it, expect } from "vitest";
import { parseMaxBudgetPerTask, extractSettings } from "../settings-utils";
import { parseMaxBudgetPerTask, extractSettings } from "#/utils/settings-utils";
describe("parseMaxBudgetPerTask", () => {
it("should return null for empty string", () => {

View File

@@ -1,5 +1,5 @@
import { describe, it, expect } from "vitest";
import { calculateToastDuration } from "../toast-duration";
import { calculateToastDuration } from "#/utils/toast-duration";
describe("calculateToastDuration", () => {
it("should return minimum duration for short messages", () => {

View File

@@ -1,5 +1,5 @@
import { describe, it, expect, beforeEach, afterEach } from "vitest";
import { transformVSCodeUrl } from "../vscode-url-helper";
import { transformVSCodeUrl } from "#/utils/vscode-url-helper";
describe("transformVSCodeUrl", () => {
const originalWindowLocation = window.location;