mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
APP-190 Add browser screenshot support for V1 conversations (#11919)
Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>
This commit is contained in:
parent
d7b36c9579
commit
1a983d2978
@ -1,12 +1,26 @@
|
||||
import { describe, it, expect, beforeAll, afterAll, afterEach } from "vitest";
|
||||
import {
|
||||
describe,
|
||||
it,
|
||||
expect,
|
||||
beforeAll,
|
||||
beforeEach,
|
||||
afterAll,
|
||||
afterEach,
|
||||
} from "vitest";
|
||||
import { screen, waitFor, render, cleanup } from "@testing-library/react";
|
||||
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
||||
import { http, HttpResponse } from "msw";
|
||||
import { useOptimisticUserMessageStore } from "#/stores/optimistic-user-message-store";
|
||||
import { useBrowserStore } from "#/stores/browser-store";
|
||||
import { useCommandStore } from "#/state/command-store";
|
||||
import {
|
||||
createMockMessageEvent,
|
||||
createMockUserMessageEvent,
|
||||
createMockAgentErrorEvent,
|
||||
createMockBrowserObservationEvent,
|
||||
createMockBrowserNavigateActionEvent,
|
||||
createMockExecuteBashActionEvent,
|
||||
createMockExecuteBashObservationEvent,
|
||||
} from "#/mocks/mock-ws-helpers";
|
||||
import {
|
||||
ConnectionStatusComponent,
|
||||
@ -461,7 +475,7 @@ describe("Conversation WebSocket Handler", () => {
|
||||
);
|
||||
|
||||
// Create a test component that displays loading state
|
||||
const HistoryLoadingComponent = () => {
|
||||
function HistoryLoadingComponent() {
|
||||
const context = useConversationWebSocket();
|
||||
const { events } = useEventStore();
|
||||
|
||||
@ -474,7 +488,7 @@ describe("Conversation WebSocket Handler", () => {
|
||||
<div data-testid="expected-event-count">{expectedEventCount}</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
// Render with WebSocket context
|
||||
renderWithWebSocketContext(
|
||||
@ -484,7 +498,9 @@ describe("Conversation WebSocket Handler", () => {
|
||||
);
|
||||
|
||||
// Initially should be loading history
|
||||
expect(screen.getByTestId("is-loading-history")).toHaveTextContent("true");
|
||||
expect(screen.getByTestId("is-loading-history")).toHaveTextContent(
|
||||
"true",
|
||||
);
|
||||
|
||||
// Wait for all events to be received
|
||||
await waitFor(() => {
|
||||
@ -523,7 +539,7 @@ describe("Conversation WebSocket Handler", () => {
|
||||
);
|
||||
|
||||
// Create a test component that displays loading state
|
||||
const HistoryLoadingComponent = () => {
|
||||
function HistoryLoadingComponent() {
|
||||
const context = useConversationWebSocket();
|
||||
|
||||
return (
|
||||
@ -533,7 +549,7 @@ describe("Conversation WebSocket Handler", () => {
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
// Render with WebSocket context
|
||||
renderWithWebSocketContext(
|
||||
@ -583,7 +599,7 @@ describe("Conversation WebSocket Handler", () => {
|
||||
);
|
||||
|
||||
// Create a test component that displays loading state
|
||||
const HistoryLoadingComponent = () => {
|
||||
function HistoryLoadingComponent() {
|
||||
const context = useConversationWebSocket();
|
||||
const { events } = useEventStore();
|
||||
|
||||
@ -595,7 +611,7 @@ describe("Conversation WebSocket Handler", () => {
|
||||
<div data-testid="events-received">{events.length}</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
// Render with WebSocket context
|
||||
renderWithWebSocketContext(
|
||||
@ -605,7 +621,9 @@ describe("Conversation WebSocket Handler", () => {
|
||||
);
|
||||
|
||||
// Initially should be loading history
|
||||
expect(screen.getByTestId("is-loading-history")).toHaveTextContent("true");
|
||||
expect(screen.getByTestId("is-loading-history")).toHaveTextContent(
|
||||
"true",
|
||||
);
|
||||
|
||||
// Wait for all events to be received
|
||||
await waitFor(() => {
|
||||
@ -621,17 +639,133 @@ describe("Conversation WebSocket Handler", () => {
|
||||
});
|
||||
});
|
||||
|
||||
// 9. Terminal I/O Tests (ExecuteBashAction and ExecuteBashObservation)
|
||||
describe("Terminal I/O Integration", () => {
|
||||
it("should append command to store when ExecuteBashAction event is received", async () => {
|
||||
const { createMockExecuteBashActionEvent } = await import(
|
||||
"#/mocks/mock-ws-helpers"
|
||||
// 9. Browser State Tests (BrowserObservation)
|
||||
describe("Browser State Integration", () => {
|
||||
beforeEach(() => {
|
||||
useBrowserStore.getState().reset();
|
||||
});
|
||||
|
||||
it("should update browser store with screenshot when BrowserObservation event is received", async () => {
|
||||
// Create a mock BrowserObservation event with screenshot data
|
||||
const mockBrowserObsEvent = createMockBrowserObservationEvent(
|
||||
"base64-screenshot-data",
|
||||
"Page loaded successfully",
|
||||
);
|
||||
const { useCommandStore } = await import("#/state/command-store");
|
||||
|
||||
// Clear the command store before test
|
||||
// Set up MSW to send the event when connection is established
|
||||
mswServer.use(
|
||||
wsLink.addEventListener("connection", ({ client, server }) => {
|
||||
server.connect();
|
||||
// Send the mock event after connection
|
||||
client.send(JSON.stringify(mockBrowserObsEvent));
|
||||
}),
|
||||
);
|
||||
|
||||
// Render with WebSocket context
|
||||
renderWithWebSocketContext(<ConnectionStatusComponent />);
|
||||
|
||||
// Wait for connection
|
||||
await waitFor(() => {
|
||||
expect(screen.getByTestId("connection-state")).toHaveTextContent(
|
||||
"OPEN",
|
||||
);
|
||||
});
|
||||
|
||||
// Wait for the browser store to be updated with screenshot
|
||||
await waitFor(() => {
|
||||
const { screenshotSrc } = useBrowserStore.getState();
|
||||
expect(screenshotSrc).toBe(
|
||||
"-screenshot-data",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("should update browser store with URL when BrowserNavigateAction followed by BrowserObservation", async () => {
|
||||
// Create mock events - action first, then observation
|
||||
const mockBrowserActionEvent = createMockBrowserNavigateActionEvent(
|
||||
"https://example.com/test-page",
|
||||
);
|
||||
const mockBrowserObsEvent = createMockBrowserObservationEvent(
|
||||
"base64-screenshot-data",
|
||||
"Page loaded successfully",
|
||||
);
|
||||
|
||||
// Set up MSW to send both events when connection is established
|
||||
mswServer.use(
|
||||
wsLink.addEventListener("connection", ({ client, server }) => {
|
||||
server.connect();
|
||||
// Send action first, then observation
|
||||
client.send(JSON.stringify(mockBrowserActionEvent));
|
||||
client.send(JSON.stringify(mockBrowserObsEvent));
|
||||
}),
|
||||
);
|
||||
|
||||
// Render with WebSocket context
|
||||
renderWithWebSocketContext(<ConnectionStatusComponent />);
|
||||
|
||||
// Wait for connection
|
||||
await waitFor(() => {
|
||||
expect(screen.getByTestId("connection-state")).toHaveTextContent(
|
||||
"OPEN",
|
||||
);
|
||||
});
|
||||
|
||||
// Wait for the browser store to be updated with both screenshot and URL
|
||||
await waitFor(() => {
|
||||
const { screenshotSrc, url } = useBrowserStore.getState();
|
||||
expect(screenshotSrc).toBe(
|
||||
"-screenshot-data",
|
||||
);
|
||||
expect(url).toBe("https://example.com/test-page");
|
||||
});
|
||||
});
|
||||
|
||||
it("should not update browser store when BrowserObservation has no screenshot data", async () => {
|
||||
const initialScreenshot = useBrowserStore.getState().screenshotSrc;
|
||||
|
||||
// Create a mock BrowserObservation event WITHOUT screenshot data
|
||||
const mockBrowserObsEvent = createMockBrowserObservationEvent(
|
||||
null, // no screenshot
|
||||
"Browser action completed",
|
||||
);
|
||||
|
||||
// Set up MSW to send the event when connection is established
|
||||
mswServer.use(
|
||||
wsLink.addEventListener("connection", ({ client, server }) => {
|
||||
server.connect();
|
||||
// Send the mock event after connection
|
||||
client.send(JSON.stringify(mockBrowserObsEvent));
|
||||
}),
|
||||
);
|
||||
|
||||
// Render with WebSocket context
|
||||
renderWithWebSocketContext(<ConnectionStatusComponent />);
|
||||
|
||||
// Wait for connection
|
||||
await waitFor(() => {
|
||||
expect(screen.getByTestId("connection-state")).toHaveTextContent(
|
||||
"OPEN",
|
||||
);
|
||||
});
|
||||
|
||||
// Give some time for any potential updates
|
||||
await new Promise((resolve) => {
|
||||
setTimeout(resolve, 100);
|
||||
});
|
||||
|
||||
// Screenshot should remain unchanged (empty/initial value)
|
||||
const { screenshotSrc } = useBrowserStore.getState();
|
||||
expect(screenshotSrc).toBe(initialScreenshot);
|
||||
});
|
||||
});
|
||||
|
||||
// 10. Terminal I/O Tests (ExecuteBashAction and ExecuteBashObservation)
|
||||
describe("Terminal I/O Integration", () => {
|
||||
beforeEach(() => {
|
||||
useCommandStore.getState().clearTerminal();
|
||||
});
|
||||
|
||||
it("should append command to store when ExecuteBashAction event is received", async () => {
|
||||
// Create a mock ExecuteBashAction event
|
||||
const mockBashActionEvent = createMockExecuteBashActionEvent("npm test");
|
||||
|
||||
@ -667,14 +801,6 @@ describe("Conversation WebSocket Handler", () => {
|
||||
});
|
||||
|
||||
it("should append output to store when ExecuteBashObservation event is received", async () => {
|
||||
const { createMockExecuteBashObservationEvent } = await import(
|
||||
"#/mocks/mock-ws-helpers"
|
||||
);
|
||||
const { useCommandStore } = await import("#/state/command-store");
|
||||
|
||||
// Clear the command store before test
|
||||
useCommandStore.getState().clearTerminal();
|
||||
|
||||
// Create a mock ExecuteBashObservation event
|
||||
const mockBashObservationEvent = createMockExecuteBashObservationEvent(
|
||||
"PASS tests/example.test.js\n ✓ should work (2 ms)",
|
||||
|
||||
@ -0,0 +1,92 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { getObservationContent } from "../get-observation-content";
|
||||
import { ObservationEvent } from "#/types/v1/core";
|
||||
import { BrowserObservation } from "#/types/v1/core/base/observation";
|
||||
|
||||
describe("getObservationContent - BrowserObservation", () => {
|
||||
it("should return output content when available", () => {
|
||||
const mockEvent: ObservationEvent<BrowserObservation> = {
|
||||
id: "test-id",
|
||||
timestamp: "2024-01-01T00:00:00Z",
|
||||
source: "environment",
|
||||
tool_name: "browser_navigate",
|
||||
tool_call_id: "call-id",
|
||||
action_id: "action-id",
|
||||
observation: {
|
||||
kind: "BrowserObservation",
|
||||
output: "Browser action completed",
|
||||
error: null,
|
||||
screenshot_data: "base64data",
|
||||
},
|
||||
};
|
||||
|
||||
const result = getObservationContent(mockEvent);
|
||||
|
||||
expect(result).toContain("**Output:**");
|
||||
expect(result).toContain("Browser action completed");
|
||||
});
|
||||
|
||||
it("should handle error cases properly", () => {
|
||||
const mockEvent: ObservationEvent<BrowserObservation> = {
|
||||
id: "test-id",
|
||||
timestamp: "2024-01-01T00:00:00Z",
|
||||
source: "environment",
|
||||
tool_name: "browser_navigate",
|
||||
tool_call_id: "call-id",
|
||||
action_id: "action-id",
|
||||
observation: {
|
||||
kind: "BrowserObservation",
|
||||
output: "",
|
||||
error: "Browser action failed",
|
||||
screenshot_data: null,
|
||||
},
|
||||
};
|
||||
|
||||
const result = getObservationContent(mockEvent);
|
||||
|
||||
expect(result).toContain("**Error:**");
|
||||
expect(result).toContain("Browser action failed");
|
||||
});
|
||||
|
||||
it("should provide default message when no output or error", () => {
|
||||
const mockEvent: ObservationEvent<BrowserObservation> = {
|
||||
id: "test-id",
|
||||
timestamp: "2024-01-01T00:00:00Z",
|
||||
source: "environment",
|
||||
tool_name: "browser_navigate",
|
||||
tool_call_id: "call-id",
|
||||
action_id: "action-id",
|
||||
observation: {
|
||||
kind: "BrowserObservation",
|
||||
output: "",
|
||||
error: null,
|
||||
screenshot_data: "base64data",
|
||||
},
|
||||
};
|
||||
|
||||
const result = getObservationContent(mockEvent);
|
||||
|
||||
expect(result).toBe("Browser action completed successfully.");
|
||||
});
|
||||
|
||||
it("should return output when screenshot_data is null", () => {
|
||||
const mockEvent: ObservationEvent<BrowserObservation> = {
|
||||
id: "test-id",
|
||||
timestamp: "2024-01-01T00:00:00Z",
|
||||
source: "environment",
|
||||
tool_name: "browser_navigate",
|
||||
tool_call_id: "call-id",
|
||||
action_id: "action-id",
|
||||
observation: {
|
||||
kind: "BrowserObservation",
|
||||
output: "Page loaded successfully",
|
||||
error: null,
|
||||
screenshot_data: null,
|
||||
},
|
||||
};
|
||||
|
||||
const result = getObservationContent(mockEvent);
|
||||
|
||||
expect(result).toBe("**Output:**\nPage loaded successfully");
|
||||
});
|
||||
});
|
||||
@ -98,14 +98,16 @@ const getBrowserObservationContent = (
|
||||
.filter((c) => c.type === "text")
|
||||
.map((c) => c.text)
|
||||
.join("\n")
|
||||
: "";
|
||||
: observation.output || "";
|
||||
|
||||
let contentDetails = "";
|
||||
|
||||
if ("is_error" in observation && observation.is_error) {
|
||||
contentDetails += `**Error:**\n${textContent}`;
|
||||
} else {
|
||||
if (observation.error) {
|
||||
contentDetails += `**Error:**\n${observation.error}`;
|
||||
} else if (textContent) {
|
||||
contentDetails += `**Output:**\n${textContent}`;
|
||||
} else {
|
||||
contentDetails += "Browser action completed successfully.";
|
||||
}
|
||||
|
||||
if (contentDetails.length > MAX_CONTENT_LENGTH) {
|
||||
|
||||
@ -118,7 +118,7 @@ const renderUserMessageWithSkillReady = (
|
||||
);
|
||||
} catch (error) {
|
||||
// If skill ready event creation fails, just render the user message
|
||||
console.error("Failed to create skill ready event:", error);
|
||||
// Failed to create skill ready event, fallback to user message
|
||||
return (
|
||||
<UserAssistantEventMessage
|
||||
event={messageEvent}
|
||||
|
||||
@ -14,6 +14,7 @@ import { useErrorMessageStore } from "#/stores/error-message-store";
|
||||
import { useOptimisticUserMessageStore } from "#/stores/optimistic-user-message-store";
|
||||
import { useV1ConversationStateStore } from "#/stores/v1-conversation-state-store";
|
||||
import { useCommandStore } from "#/state/command-store";
|
||||
import { useBrowserStore } from "#/stores/browser-store";
|
||||
import {
|
||||
isV1Event,
|
||||
isAgentErrorEvent,
|
||||
@ -27,6 +28,8 @@ import {
|
||||
isExecuteBashObservationEvent,
|
||||
isConversationErrorEvent,
|
||||
isPlanningFileEditorObservationEvent,
|
||||
isBrowserObservationEvent,
|
||||
isBrowserNavigateActionEvent,
|
||||
} from "#/types/v1/type-guards";
|
||||
import { ConversationStateUpdateEventStats } from "#/types/v1/core/events/conversation-state-event";
|
||||
import { handleActionEventCacheInvalidation } from "#/utils/cache-utils";
|
||||
@ -383,6 +386,22 @@ export function ConversationWebSocketProvider({
|
||||
.join("\n");
|
||||
appendOutput(textContent);
|
||||
}
|
||||
|
||||
// Handle BrowserObservation events - update browser store with screenshot
|
||||
if (isBrowserObservationEvent(event)) {
|
||||
const { screenshot_data: screenshotData } = event.observation;
|
||||
if (screenshotData) {
|
||||
const screenshotSrc = screenshotData.startsWith("data:")
|
||||
? screenshotData
|
||||
: `data:image/png;base64,${screenshotData}`;
|
||||
useBrowserStore.getState().setScreenshotSrc(screenshotSrc);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle BrowserNavigateAction events - update browser store with URL
|
||||
if (isBrowserNavigateActionEvent(event)) {
|
||||
useBrowserStore.getState().setUrl(event.action.url);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// eslint-disable-next-line no-console
|
||||
|
||||
@ -184,3 +184,55 @@ export const createMockExecuteBashObservationEvent = (
|
||||
},
|
||||
action_id: "bash-action-123",
|
||||
});
|
||||
|
||||
/**
|
||||
* Creates a mock BrowserObservation event for testing browser state handling
|
||||
*/
|
||||
export const createMockBrowserObservationEvent = (
|
||||
screenshotData: string | null = "base64-screenshot-data",
|
||||
output: string = "Browser action completed",
|
||||
error: string | null = null,
|
||||
) => ({
|
||||
id: "browser-obs-123",
|
||||
timestamp: new Date().toISOString(),
|
||||
source: "environment",
|
||||
tool_name: "browser_navigate",
|
||||
tool_call_id: "browser-call-456",
|
||||
observation: {
|
||||
kind: "BrowserObservation",
|
||||
output,
|
||||
error,
|
||||
screenshot_data: screenshotData,
|
||||
},
|
||||
action_id: "browser-action-123",
|
||||
});
|
||||
|
||||
/**
|
||||
* Creates a mock BrowserNavigateAction event for testing browser URL extraction
|
||||
*/
|
||||
export const createMockBrowserNavigateActionEvent = (
|
||||
url: string = "https://example.com",
|
||||
) => ({
|
||||
id: "browser-action-123",
|
||||
timestamp: new Date().toISOString(),
|
||||
source: "agent",
|
||||
thought: [{ type: "text", text: "Navigating to URL" }],
|
||||
thinking_blocks: [],
|
||||
action: {
|
||||
kind: "BrowserNavigateAction",
|
||||
url,
|
||||
new_tab: false,
|
||||
},
|
||||
tool_name: "browser_navigate",
|
||||
tool_call_id: "browser-call-456",
|
||||
tool_call: {
|
||||
id: "browser-call-456",
|
||||
type: "function",
|
||||
function: {
|
||||
name: "browser_navigate",
|
||||
arguments: JSON.stringify({ url, new_tab: false }),
|
||||
},
|
||||
},
|
||||
llm_response_id: "llm-response-789",
|
||||
security_risk: { level: "low" },
|
||||
});
|
||||
|
||||
@ -7,6 +7,8 @@ import {
|
||||
ExecuteBashObservation,
|
||||
PlanningFileEditorObservation,
|
||||
TerminalObservation,
|
||||
BrowserObservation,
|
||||
BrowserNavigateAction,
|
||||
} from "./core";
|
||||
import { AgentErrorEvent } from "./core/events/observation-event";
|
||||
import { MessageEvent } from "./core/events/message-event";
|
||||
@ -126,6 +128,22 @@ export const isPlanningFileEditorObservationEvent = (
|
||||
isObservationEvent(event) &&
|
||||
event.observation.kind === "PlanningFileEditorObservation";
|
||||
|
||||
/**
|
||||
* Type guard function to check if an observation event is a BrowserObservation
|
||||
*/
|
||||
export const isBrowserObservationEvent = (
|
||||
event: OpenHandsEvent,
|
||||
): event is ObservationEvent<BrowserObservation> =>
|
||||
isObservationEvent(event) && event.observation.kind === "BrowserObservation";
|
||||
|
||||
/**
|
||||
* Type guard function to check if an action event is a BrowserNavigateAction
|
||||
*/
|
||||
export const isBrowserNavigateActionEvent = (
|
||||
event: OpenHandsEvent,
|
||||
): event is ActionEvent<BrowserNavigateAction> =>
|
||||
isActionEvent(event) && event.action.kind === "BrowserNavigateAction";
|
||||
|
||||
/**
|
||||
* Type guard function to check if an event is a system prompt event
|
||||
*/
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user