mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Engel Nyst <engel.nyst@gmail.com> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
558 lines
20 KiB
Python
558 lines
20 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
from typing import TYPE_CHECKING
|
|
|
|
from jinja2 import Environment, FileSystemLoader
|
|
from server.constants import WEB_HOST
|
|
from storage.repository_store import RepositoryStore
|
|
from storage.stored_repository import StoredRepository
|
|
from storage.user_repo_map import UserRepositoryMap
|
|
from storage.user_repo_map_store import UserRepositoryMapStore
|
|
|
|
from openhands.core.config.openhands_config import OpenHandsConfig
|
|
from openhands.core.logger import openhands_logger as logger
|
|
from openhands.core.schema.agent import AgentState
|
|
from openhands.events import Event, EventSource
|
|
from openhands.events.action import (
|
|
AgentFinishAction,
|
|
MessageAction,
|
|
)
|
|
from openhands.events.event_store_abc import EventStoreABC
|
|
from openhands.events.observation.agent import AgentStateChangedObservation
|
|
from openhands.integrations.service_types import Repository
|
|
from openhands.storage.data_models.conversation_status import ConversationStatus
|
|
|
|
if TYPE_CHECKING:
|
|
from openhands.server.conversation_manager.conversation_manager import (
|
|
ConversationManager,
|
|
)
|
|
|
|
# ---- DO NOT REMOVE ----
|
|
# WARNING: Langfuse depends on the WEB_HOST environment variable being set to track events.
|
|
HOST = WEB_HOST
|
|
# ---- DO NOT REMOVE ----
|
|
|
|
HOST_URL = f'https://{HOST}'
|
|
GITHUB_WEBHOOK_URL = f'{HOST_URL}/integration/github/events'
|
|
GITLAB_WEBHOOK_URL = f'{HOST_URL}/integration/gitlab/events'
|
|
conversation_prefix = 'conversations/{}'
|
|
CONVERSATION_URL = f'{HOST_URL}/{conversation_prefix}'
|
|
|
|
# Toggle for auto-response feature that proactively starts conversations with users when workflow tests fail
|
|
ENABLE_PROACTIVE_CONVERSATION_STARTERS = (
|
|
os.getenv('ENABLE_PROACTIVE_CONVERSATION_STARTERS', 'false').lower() == 'true'
|
|
)
|
|
|
|
# Toggle for solvability report feature
|
|
ENABLE_SOLVABILITY_ANALYSIS = (
|
|
os.getenv('ENABLE_SOLVABILITY_ANALYSIS', 'false').lower() == 'true'
|
|
)
|
|
|
|
|
|
OPENHANDS_RESOLVER_TEMPLATES_DIR = 'openhands/integrations/templates/resolver/'
|
|
jinja_env = Environment(loader=FileSystemLoader(OPENHANDS_RESOLVER_TEMPLATES_DIR))
|
|
|
|
|
|
def get_oh_labels(web_host: str) -> tuple[str, str]:
|
|
"""Get the OpenHands labels based on the web host.
|
|
|
|
Args:
|
|
web_host: The web host string to check
|
|
|
|
Returns:
|
|
A tuple of (oh_label, inline_oh_label) where:
|
|
- oh_label is 'openhands-exp' for staging/local hosts, 'openhands' otherwise
|
|
- inline_oh_label is '@openhands-exp' for staging/local hosts, '@openhands' otherwise
|
|
"""
|
|
web_host = web_host.strip()
|
|
is_staging_or_local = 'staging' in web_host or 'local' in web_host
|
|
oh_label = 'openhands-exp' if is_staging_or_local else 'openhands'
|
|
inline_oh_label = '@openhands-exp' if is_staging_or_local else '@openhands'
|
|
return oh_label, inline_oh_label
|
|
|
|
|
|
def get_summary_instruction():
|
|
summary_instruction_template = jinja_env.get_template('summary_prompt.j2')
|
|
summary_instruction = summary_instruction_template.render()
|
|
return summary_instruction
|
|
|
|
|
|
def has_exact_mention(text: str, mention: str) -> bool:
|
|
"""Check if the text contains an exact mention (not part of a larger word).
|
|
|
|
Args:
|
|
text: The text to check for mentions
|
|
mention: The mention to look for (e.g. "@openhands")
|
|
|
|
Returns:
|
|
bool: True if the exact mention is found, False otherwise
|
|
|
|
Example:
|
|
>>> has_exact_mention("Hello @openhands!", "@openhands") # True
|
|
>>> has_exact_mention("Hello @openhands-agent!", "@openhands") # False
|
|
>>> has_exact_mention("(@openhands)", "@openhands") # True
|
|
>>> has_exact_mention("user@openhands.com", "@openhands") # False
|
|
>>> has_exact_mention("Hello @OpenHands!", "@openhands") # True (case-insensitive)
|
|
"""
|
|
# Convert both text and mention to lowercase for case-insensitive matching
|
|
text_lower = text.lower()
|
|
mention_lower = mention.lower()
|
|
|
|
pattern = re.escape(mention_lower)
|
|
# Match mention that is not part of a larger word
|
|
return bool(re.search(rf'(?:^|[^\w@]){pattern}(?![\w-])', text_lower))
|
|
|
|
|
|
def confirm_event_type(event: Event):
|
|
return isinstance(event, AgentStateChangedObservation) and not (
|
|
event.agent_state == AgentState.REJECTED
|
|
or event.agent_state == AgentState.USER_CONFIRMED
|
|
or event.agent_state == AgentState.USER_REJECTED
|
|
or event.agent_state == AgentState.LOADING
|
|
or event.agent_state == AgentState.RUNNING
|
|
)
|
|
|
|
|
|
def get_readable_error_reason(reason: str):
|
|
if reason == 'STATUS$ERROR_LLM_AUTHENTICATION':
|
|
reason = 'Authentication with the LLM provider failed. Please check your API key or credentials'
|
|
elif reason == 'STATUS$ERROR_LLM_SERVICE_UNAVAILABLE':
|
|
reason = 'The LLM service is temporarily unavailable. Please try again later'
|
|
elif reason == 'STATUS$ERROR_LLM_INTERNAL_SERVER_ERROR':
|
|
reason = 'The LLM provider encountered an internal error. Please try again soon'
|
|
elif reason == 'STATUS$ERROR_LLM_OUT_OF_CREDITS':
|
|
reason = "You've run out of credits. Please top up to continue"
|
|
elif reason == 'STATUS$ERROR_LLM_CONTENT_POLICY_VIOLATION':
|
|
reason = 'Content policy violation. The output was blocked by content filtering policy'
|
|
return reason
|
|
|
|
|
|
def get_summary_for_agent_state(
|
|
observations: list[AgentStateChangedObservation], conversation_link: str
|
|
) -> str:
|
|
unknown_error_msg = f'OpenHands encountered an unknown error. [See the conversation]({conversation_link}) for more information, or try again'
|
|
|
|
if len(observations) == 0:
|
|
logger.error(
|
|
'Unknown error: No agent state observations found',
|
|
extra={'conversation_link': conversation_link},
|
|
)
|
|
return unknown_error_msg
|
|
|
|
observation: AgentStateChangedObservation = observations[0]
|
|
state = observation.agent_state
|
|
|
|
if state == AgentState.RATE_LIMITED:
|
|
logger.warning(
|
|
'Agent was rate limited',
|
|
extra={
|
|
'agent_state': state.value,
|
|
'conversation_link': conversation_link,
|
|
'observation_reason': getattr(observation, 'reason', None),
|
|
},
|
|
)
|
|
return 'OpenHands was rate limited by the LLM provider. Please try again later.'
|
|
|
|
if state == AgentState.ERROR:
|
|
reason = observation.reason
|
|
reason = get_readable_error_reason(reason)
|
|
|
|
logger.error(
|
|
'Agent encountered an error',
|
|
extra={
|
|
'agent_state': state.value,
|
|
'conversation_link': conversation_link,
|
|
'observation_reason': observation.reason,
|
|
'readable_reason': reason,
|
|
},
|
|
)
|
|
|
|
return f'OpenHands encountered an error: **{reason}**.\n\n[See the conversation]({conversation_link}) for more information.'
|
|
|
|
if state == AgentState.AWAITING_USER_INPUT:
|
|
logger.info(
|
|
'Agent is awaiting user input',
|
|
extra={
|
|
'agent_state': state.value,
|
|
'conversation_link': conversation_link,
|
|
'observation_reason': getattr(observation, 'reason', None),
|
|
},
|
|
)
|
|
return f'OpenHands is waiting for your input. [Continue the conversation]({conversation_link}) to provide additional instructions.'
|
|
|
|
# Log unknown agent state as error
|
|
logger.error(
|
|
'Unknown error: Unhandled agent state',
|
|
extra={
|
|
'agent_state': state.value if hasattr(state, 'value') else str(state),
|
|
'conversation_link': conversation_link,
|
|
'observation_reason': getattr(observation, 'reason', None),
|
|
},
|
|
)
|
|
return unknown_error_msg
|
|
|
|
|
|
def get_final_agent_observation(
|
|
event_store: EventStoreABC,
|
|
) -> list[AgentStateChangedObservation]:
|
|
return event_store.get_matching_events(
|
|
source=EventSource.ENVIRONMENT,
|
|
event_types=(AgentStateChangedObservation,),
|
|
limit=1,
|
|
reverse=True,
|
|
)
|
|
|
|
|
|
def get_last_user_msg(event_store: EventStoreABC) -> list[MessageAction]:
|
|
return event_store.get_matching_events(
|
|
source=EventSource.USER, event_types=(MessageAction,), limit=1, reverse='true'
|
|
)
|
|
|
|
|
|
def extract_summary_from_event_store(
|
|
event_store: EventStoreABC, conversation_id: str
|
|
) -> str:
|
|
"""
|
|
Get agent summary or alternative message depending on current AgentState
|
|
"""
|
|
conversation_link = CONVERSATION_URL.format(conversation_id)
|
|
summary_instruction = get_summary_instruction()
|
|
|
|
instruction_event: list[MessageAction] = event_store.get_matching_events(
|
|
query=json.dumps(summary_instruction),
|
|
source=EventSource.USER,
|
|
event_types=(MessageAction,),
|
|
limit=1,
|
|
reverse=True,
|
|
)
|
|
|
|
final_agent_observation = get_final_agent_observation(event_store)
|
|
|
|
# Find summary instruction event ID
|
|
if len(instruction_event) == 0:
|
|
logger.warning(
|
|
'no_instruction_event_found', extra={'conversation_id': conversation_id}
|
|
)
|
|
return get_summary_for_agent_state(
|
|
final_agent_observation, conversation_link
|
|
) # Agent did not receive summary instruction
|
|
|
|
event_id: int = instruction_event[0].id
|
|
|
|
agent_messages: list[MessageAction | AgentFinishAction] = (
|
|
event_store.get_matching_events(
|
|
start_id=event_id,
|
|
source=EventSource.AGENT,
|
|
event_types=(MessageAction, AgentFinishAction),
|
|
reverse=True,
|
|
limit=1,
|
|
)
|
|
)
|
|
|
|
if len(agent_messages) == 0:
|
|
logger.warning(
|
|
'no_agent_messages_found', extra={'conversation_id': conversation_id}
|
|
)
|
|
return get_summary_for_agent_state(
|
|
final_agent_observation, conversation_link
|
|
) # Agent failed to generate summary
|
|
|
|
summary_event: MessageAction | AgentFinishAction = agent_messages[0]
|
|
if isinstance(summary_event, MessageAction):
|
|
return summary_event.content
|
|
|
|
return summary_event.final_thought
|
|
|
|
|
|
async def get_event_store_from_conversation_manager(
|
|
conversation_manager: ConversationManager, conversation_id: str
|
|
) -> EventStoreABC:
|
|
agent_loop_infos = await conversation_manager.get_agent_loop_info(
|
|
filter_to_sids={conversation_id}
|
|
)
|
|
if not agent_loop_infos or agent_loop_infos[0].status != ConversationStatus.RUNNING:
|
|
raise RuntimeError(f'conversation_not_running:{conversation_id}')
|
|
event_store = agent_loop_infos[0].event_store
|
|
if not event_store:
|
|
raise RuntimeError(f'event_store_missing:{conversation_id}')
|
|
return event_store
|
|
|
|
|
|
async def get_last_user_msg_from_conversation_manager(
|
|
conversation_manager: ConversationManager, conversation_id: str
|
|
):
|
|
event_store = await get_event_store_from_conversation_manager(
|
|
conversation_manager, conversation_id
|
|
)
|
|
return get_last_user_msg(event_store)
|
|
|
|
|
|
async def extract_summary_from_conversation_manager(
|
|
conversation_manager: ConversationManager, conversation_id: str
|
|
) -> str:
|
|
"""
|
|
Get agent summary or alternative message depending on current AgentState
|
|
"""
|
|
|
|
event_store = await get_event_store_from_conversation_manager(
|
|
conversation_manager, conversation_id
|
|
)
|
|
summary = extract_summary_from_event_store(event_store, conversation_id)
|
|
return append_conversation_footer(summary, conversation_id)
|
|
|
|
|
|
def append_conversation_footer(message: str, conversation_id: str) -> str:
|
|
"""
|
|
Append a small footer with the conversation URL to a message.
|
|
|
|
Args:
|
|
message: The original message content
|
|
conversation_id: The conversation ID to link to
|
|
|
|
Returns:
|
|
The message with the conversation footer appended
|
|
"""
|
|
conversation_link = CONVERSATION_URL.format(conversation_id)
|
|
footer = f'\n\n<sub>[View full conversation]({conversation_link})</sub>'
|
|
return message + footer
|
|
|
|
|
|
async def store_repositories_in_db(repos: list[Repository], user_id: str) -> None:
|
|
"""
|
|
Store repositories in DB and create user-repository mappings
|
|
|
|
Args:
|
|
repos: List of Repository objects to store
|
|
user_id: User ID associated with these repositories
|
|
"""
|
|
|
|
# Convert Repository objects to StoredRepository objects
|
|
# Convert Repository objects to UserRepositoryMap objects
|
|
stored_repos = []
|
|
user_repos = []
|
|
for repo in repos:
|
|
repo_id = f'{repo.git_provider.value}##{str(repo.id)}'
|
|
stored_repo = StoredRepository(
|
|
repo_name=repo.full_name,
|
|
repo_id=repo_id,
|
|
is_public=repo.is_public,
|
|
# Optional fields set to None by default
|
|
has_microagent=None,
|
|
has_setup_script=None,
|
|
)
|
|
stored_repos.append(stored_repo)
|
|
user_repo_map = UserRepositoryMap(user_id=user_id, repo_id=repo_id, admin=None)
|
|
|
|
user_repos.append(user_repo_map)
|
|
|
|
# Get config instance
|
|
config = OpenHandsConfig()
|
|
|
|
try:
|
|
# Store repositories in the repos table
|
|
repo_store = RepositoryStore.get_instance(config)
|
|
repo_store.store_projects(stored_repos)
|
|
|
|
# Store user-repository mappings in the user-repos table
|
|
user_repo_store = UserRepositoryMapStore.get_instance(config)
|
|
user_repo_store.store_user_repo_mappings(user_repos)
|
|
|
|
logger.info(f'Saved repos for user {user_id}')
|
|
except Exception:
|
|
logger.warning('Failed to save repos', exc_info=True)
|
|
|
|
|
|
def infer_repo_from_message(user_msg: str) -> list[str]:
|
|
"""
|
|
Extract all repository names in the format 'owner/repo' from various Git provider URLs
|
|
and direct mentions in text. Supports GitHub, GitLab, and BitBucket.
|
|
Args:
|
|
user_msg: Input message that may contain repository references
|
|
Returns:
|
|
List of repository names in 'owner/repo' format, empty list if none found
|
|
"""
|
|
# Normalize the message by removing extra whitespace and newlines
|
|
normalized_msg = re.sub(r'\s+', ' ', user_msg.strip())
|
|
|
|
# Pattern to match Git URLs from GitHub, GitLab, and BitBucket
|
|
# Captures: protocol, domain, owner, repo (with optional .git extension)
|
|
git_url_pattern = r'https?://(?:github\.com|gitlab\.com|bitbucket\.org)/([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+?)(?:\.git)?(?:[/?#].*?)?(?=\s|$|[^\w.-])'
|
|
|
|
# Pattern to match direct owner/repo mentions (e.g., "OpenHands/OpenHands")
|
|
# Must be surrounded by word boundaries or specific characters to avoid false positives
|
|
direct_pattern = (
|
|
r'(?:^|\s|[\[\(\'"])([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+)(?=\s|$|[\]\)\'",.])'
|
|
)
|
|
|
|
matches = []
|
|
|
|
# First, find all Git URLs (highest priority)
|
|
git_matches = re.findall(git_url_pattern, normalized_msg)
|
|
for owner, repo in git_matches:
|
|
# Remove .git extension if present
|
|
repo = re.sub(r'\.git$', '', repo)
|
|
matches.append(f'{owner}/{repo}')
|
|
|
|
# Second, find all direct owner/repo mentions
|
|
direct_matches = re.findall(direct_pattern, normalized_msg)
|
|
for owner, repo in direct_matches:
|
|
full_match = f'{owner}/{repo}'
|
|
|
|
# Skip if it looks like a version number, date, or file path
|
|
if (
|
|
re.match(r'^\d+\.\d+/\d+\.\d+$', full_match) # version numbers
|
|
or re.match(r'^\d{1,2}/\d{1,2}$', full_match) # dates
|
|
or re.match(r'^[A-Z]/[A-Z]$', full_match) # single letters
|
|
or repo.endswith('.txt')
|
|
or repo.endswith('.md') # file extensions
|
|
or repo.endswith('.py')
|
|
or repo.endswith('.js')
|
|
or '.' in repo
|
|
and len(repo.split('.')) > 2
|
|
): # complex file paths
|
|
continue
|
|
|
|
# Avoid duplicates from Git URLs already found
|
|
if full_match not in matches:
|
|
matches.append(full_match)
|
|
|
|
return matches
|
|
|
|
|
|
def filter_potential_repos_by_user_msg(
|
|
user_msg: str, user_repos: list[Repository]
|
|
) -> tuple[bool, list[Repository]]:
|
|
"""Filter repositories based on user message inference."""
|
|
inferred_repos = infer_repo_from_message(user_msg)
|
|
if not inferred_repos:
|
|
return False, user_repos[0:99]
|
|
|
|
final_repos = []
|
|
for repo in user_repos:
|
|
# Check if the repo matches any of the inferred repositories
|
|
for inferred_repo in inferred_repos:
|
|
if inferred_repo.lower() in repo.full_name.lower():
|
|
final_repos.append(repo)
|
|
break # Avoid adding the same repo multiple times
|
|
|
|
# no repos matched, return original list
|
|
if len(final_repos) == 0:
|
|
return False, user_repos[0:99]
|
|
|
|
# Found exact match
|
|
elif len(final_repos) == 1:
|
|
return True, final_repos
|
|
|
|
# Found partial matches
|
|
return False, final_repos[0:99]
|
|
|
|
|
|
def markdown_to_jira_markup(markdown_text: str) -> str:
|
|
"""
|
|
Convert markdown text to Jira Wiki Markup format.
|
|
This function handles common markdown elements and converts them to their
|
|
Jira Wiki Markup equivalents. It's designed to be exception-safe.
|
|
Args:
|
|
markdown_text: The markdown text to convert
|
|
Returns:
|
|
str: The converted Jira Wiki Markup text
|
|
"""
|
|
if not markdown_text or not isinstance(markdown_text, str):
|
|
return ''
|
|
|
|
try:
|
|
# Work with a copy to avoid modifying the original
|
|
text = markdown_text
|
|
|
|
# Convert headers (# ## ### #### ##### ######)
|
|
text = re.sub(r'^#{6}\s+(.*?)$', r'h6. \1', text, flags=re.MULTILINE)
|
|
text = re.sub(r'^#{5}\s+(.*?)$', r'h5. \1', text, flags=re.MULTILINE)
|
|
text = re.sub(r'^#{4}\s+(.*?)$', r'h4. \1', text, flags=re.MULTILINE)
|
|
text = re.sub(r'^#{3}\s+(.*?)$', r'h3. \1', text, flags=re.MULTILINE)
|
|
text = re.sub(r'^#{2}\s+(.*?)$', r'h2. \1', text, flags=re.MULTILINE)
|
|
text = re.sub(r'^#{1}\s+(.*?)$', r'h1. \1', text, flags=re.MULTILINE)
|
|
|
|
# Convert code blocks first (before other formatting)
|
|
text = re.sub(
|
|
r'```(\w+)\n(.*?)\n```', r'{code:\1}\n\2\n{code}', text, flags=re.DOTALL
|
|
)
|
|
text = re.sub(r'```\n(.*?)\n```', r'{code}\n\1\n{code}', text, flags=re.DOTALL)
|
|
|
|
# Convert inline code (`code`)
|
|
text = re.sub(r'`([^`]+)`', r'{{\1}}', text)
|
|
|
|
# Convert markdown formatting to Jira formatting
|
|
# Use temporary placeholders to avoid conflicts between bold and italic conversion
|
|
|
|
# First convert bold (double markers) to temporary placeholders
|
|
text = re.sub(r'\*\*(.*?)\*\*', r'JIRA_BOLD_START\1JIRA_BOLD_END', text)
|
|
text = re.sub(r'__(.*?)__', r'JIRA_BOLD_START\1JIRA_BOLD_END', text)
|
|
|
|
# Now convert single asterisk italics
|
|
text = re.sub(r'\*([^*]+?)\*', r'_\1_', text)
|
|
|
|
# Convert underscore italics
|
|
text = re.sub(r'(?<!_)_([^_]+?)_(?!_)', r'_\1_', text)
|
|
|
|
# Finally, restore bold markers
|
|
text = text.replace('JIRA_BOLD_START', '*')
|
|
text = text.replace('JIRA_BOLD_END', '*')
|
|
|
|
# Convert links [text](url)
|
|
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'[\1|\2]', text)
|
|
|
|
# Convert unordered lists (- or * or +)
|
|
text = re.sub(r'^[\s]*[-*+]\s+(.*?)$', r'* \1', text, flags=re.MULTILINE)
|
|
|
|
# Convert ordered lists (1. 2. etc.)
|
|
text = re.sub(r'^[\s]*\d+\.\s+(.*?)$', r'# \1', text, flags=re.MULTILINE)
|
|
|
|
# Convert strikethrough (~~text~~)
|
|
text = re.sub(r'~~(.*?)~~', r'-\1-', text)
|
|
|
|
# Convert horizontal rules (---, ***, ___)
|
|
text = re.sub(r'^[\s]*[-*_]{3,}[\s]*$', r'----', text, flags=re.MULTILINE)
|
|
|
|
# Convert blockquotes (> text)
|
|
text = re.sub(r'^>\s+(.*?)$', r'bq. \1', text, flags=re.MULTILINE)
|
|
|
|
# Convert tables (basic support)
|
|
# This is a simplified table conversion - Jira tables are quite different
|
|
lines = text.split('\n')
|
|
in_table = False
|
|
converted_lines = []
|
|
|
|
for line in lines:
|
|
if (
|
|
'|' in line
|
|
and line.strip().startswith('|')
|
|
and line.strip().endswith('|')
|
|
):
|
|
# Skip markdown table separator lines (contain ---)
|
|
if '---' in line:
|
|
continue
|
|
if not in_table:
|
|
in_table = True
|
|
# Convert markdown table row to Jira table row
|
|
cells = [cell.strip() for cell in line.split('|')[1:-1]]
|
|
converted_line = '|' + '|'.join(cells) + '|'
|
|
converted_lines.append(converted_line)
|
|
elif in_table and line.strip() and '|' not in line:
|
|
in_table = False
|
|
converted_lines.append(line)
|
|
else:
|
|
in_table = False
|
|
converted_lines.append(line)
|
|
|
|
text = '\n'.join(converted_lines)
|
|
|
|
return text
|
|
|
|
except Exception as e:
|
|
# Log the error but don't raise it - return original text as fallback
|
|
print(f'Error converting markdown to Jira markup: {str(e)}')
|
|
return markdown_text or ''
|