Robert Brennan b5e00f577c
Replace All-Hands-AI references with OpenHands (#11287)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <engel.nyst@gmail.com>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-10-26 01:52:45 +02:00

558 lines
20 KiB
Python

from __future__ import annotations
import json
import os
import re
from typing import TYPE_CHECKING
from jinja2 import Environment, FileSystemLoader
from server.constants import WEB_HOST
from storage.repository_store import RepositoryStore
from storage.stored_repository import StoredRepository
from storage.user_repo_map import UserRepositoryMap
from storage.user_repo_map_store import UserRepositoryMapStore
from openhands.core.config.openhands_config import OpenHandsConfig
from openhands.core.logger import openhands_logger as logger
from openhands.core.schema.agent import AgentState
from openhands.events import Event, EventSource
from openhands.events.action import (
AgentFinishAction,
MessageAction,
)
from openhands.events.event_store_abc import EventStoreABC
from openhands.events.observation.agent import AgentStateChangedObservation
from openhands.integrations.service_types import Repository
from openhands.storage.data_models.conversation_status import ConversationStatus
if TYPE_CHECKING:
from openhands.server.conversation_manager.conversation_manager import (
ConversationManager,
)
# ---- DO NOT REMOVE ----
# WARNING: Langfuse depends on the WEB_HOST environment variable being set to track events.
HOST = WEB_HOST
# ---- DO NOT REMOVE ----
HOST_URL = f'https://{HOST}'
GITHUB_WEBHOOK_URL = f'{HOST_URL}/integration/github/events'
GITLAB_WEBHOOK_URL = f'{HOST_URL}/integration/gitlab/events'
conversation_prefix = 'conversations/{}'
CONVERSATION_URL = f'{HOST_URL}/{conversation_prefix}'
# Toggle for auto-response feature that proactively starts conversations with users when workflow tests fail
ENABLE_PROACTIVE_CONVERSATION_STARTERS = (
os.getenv('ENABLE_PROACTIVE_CONVERSATION_STARTERS', 'false').lower() == 'true'
)
# Toggle for solvability report feature
ENABLE_SOLVABILITY_ANALYSIS = (
os.getenv('ENABLE_SOLVABILITY_ANALYSIS', 'false').lower() == 'true'
)
OPENHANDS_RESOLVER_TEMPLATES_DIR = 'openhands/integrations/templates/resolver/'
jinja_env = Environment(loader=FileSystemLoader(OPENHANDS_RESOLVER_TEMPLATES_DIR))
def get_oh_labels(web_host: str) -> tuple[str, str]:
"""Get the OpenHands labels based on the web host.
Args:
web_host: The web host string to check
Returns:
A tuple of (oh_label, inline_oh_label) where:
- oh_label is 'openhands-exp' for staging/local hosts, 'openhands' otherwise
- inline_oh_label is '@openhands-exp' for staging/local hosts, '@openhands' otherwise
"""
web_host = web_host.strip()
is_staging_or_local = 'staging' in web_host or 'local' in web_host
oh_label = 'openhands-exp' if is_staging_or_local else 'openhands'
inline_oh_label = '@openhands-exp' if is_staging_or_local else '@openhands'
return oh_label, inline_oh_label
def get_summary_instruction():
summary_instruction_template = jinja_env.get_template('summary_prompt.j2')
summary_instruction = summary_instruction_template.render()
return summary_instruction
def has_exact_mention(text: str, mention: str) -> bool:
"""Check if the text contains an exact mention (not part of a larger word).
Args:
text: The text to check for mentions
mention: The mention to look for (e.g. "@openhands")
Returns:
bool: True if the exact mention is found, False otherwise
Example:
>>> has_exact_mention("Hello @openhands!", "@openhands") # True
>>> has_exact_mention("Hello @openhands-agent!", "@openhands") # False
>>> has_exact_mention("(@openhands)", "@openhands") # True
>>> has_exact_mention("user@openhands.com", "@openhands") # False
>>> has_exact_mention("Hello @OpenHands!", "@openhands") # True (case-insensitive)
"""
# Convert both text and mention to lowercase for case-insensitive matching
text_lower = text.lower()
mention_lower = mention.lower()
pattern = re.escape(mention_lower)
# Match mention that is not part of a larger word
return bool(re.search(rf'(?:^|[^\w@]){pattern}(?![\w-])', text_lower))
def confirm_event_type(event: Event):
return isinstance(event, AgentStateChangedObservation) and not (
event.agent_state == AgentState.REJECTED
or event.agent_state == AgentState.USER_CONFIRMED
or event.agent_state == AgentState.USER_REJECTED
or event.agent_state == AgentState.LOADING
or event.agent_state == AgentState.RUNNING
)
def get_readable_error_reason(reason: str):
if reason == 'STATUS$ERROR_LLM_AUTHENTICATION':
reason = 'Authentication with the LLM provider failed. Please check your API key or credentials'
elif reason == 'STATUS$ERROR_LLM_SERVICE_UNAVAILABLE':
reason = 'The LLM service is temporarily unavailable. Please try again later'
elif reason == 'STATUS$ERROR_LLM_INTERNAL_SERVER_ERROR':
reason = 'The LLM provider encountered an internal error. Please try again soon'
elif reason == 'STATUS$ERROR_LLM_OUT_OF_CREDITS':
reason = "You've run out of credits. Please top up to continue"
elif reason == 'STATUS$ERROR_LLM_CONTENT_POLICY_VIOLATION':
reason = 'Content policy violation. The output was blocked by content filtering policy'
return reason
def get_summary_for_agent_state(
observations: list[AgentStateChangedObservation], conversation_link: str
) -> str:
unknown_error_msg = f'OpenHands encountered an unknown error. [See the conversation]({conversation_link}) for more information, or try again'
if len(observations) == 0:
logger.error(
'Unknown error: No agent state observations found',
extra={'conversation_link': conversation_link},
)
return unknown_error_msg
observation: AgentStateChangedObservation = observations[0]
state = observation.agent_state
if state == AgentState.RATE_LIMITED:
logger.warning(
'Agent was rate limited',
extra={
'agent_state': state.value,
'conversation_link': conversation_link,
'observation_reason': getattr(observation, 'reason', None),
},
)
return 'OpenHands was rate limited by the LLM provider. Please try again later.'
if state == AgentState.ERROR:
reason = observation.reason
reason = get_readable_error_reason(reason)
logger.error(
'Agent encountered an error',
extra={
'agent_state': state.value,
'conversation_link': conversation_link,
'observation_reason': observation.reason,
'readable_reason': reason,
},
)
return f'OpenHands encountered an error: **{reason}**.\n\n[See the conversation]({conversation_link}) for more information.'
if state == AgentState.AWAITING_USER_INPUT:
logger.info(
'Agent is awaiting user input',
extra={
'agent_state': state.value,
'conversation_link': conversation_link,
'observation_reason': getattr(observation, 'reason', None),
},
)
return f'OpenHands is waiting for your input. [Continue the conversation]({conversation_link}) to provide additional instructions.'
# Log unknown agent state as error
logger.error(
'Unknown error: Unhandled agent state',
extra={
'agent_state': state.value if hasattr(state, 'value') else str(state),
'conversation_link': conversation_link,
'observation_reason': getattr(observation, 'reason', None),
},
)
return unknown_error_msg
def get_final_agent_observation(
event_store: EventStoreABC,
) -> list[AgentStateChangedObservation]:
return event_store.get_matching_events(
source=EventSource.ENVIRONMENT,
event_types=(AgentStateChangedObservation,),
limit=1,
reverse=True,
)
def get_last_user_msg(event_store: EventStoreABC) -> list[MessageAction]:
return event_store.get_matching_events(
source=EventSource.USER, event_types=(MessageAction,), limit=1, reverse='true'
)
def extract_summary_from_event_store(
event_store: EventStoreABC, conversation_id: str
) -> str:
"""
Get agent summary or alternative message depending on current AgentState
"""
conversation_link = CONVERSATION_URL.format(conversation_id)
summary_instruction = get_summary_instruction()
instruction_event: list[MessageAction] = event_store.get_matching_events(
query=json.dumps(summary_instruction),
source=EventSource.USER,
event_types=(MessageAction,),
limit=1,
reverse=True,
)
final_agent_observation = get_final_agent_observation(event_store)
# Find summary instruction event ID
if len(instruction_event) == 0:
logger.warning(
'no_instruction_event_found', extra={'conversation_id': conversation_id}
)
return get_summary_for_agent_state(
final_agent_observation, conversation_link
) # Agent did not receive summary instruction
event_id: int = instruction_event[0].id
agent_messages: list[MessageAction | AgentFinishAction] = (
event_store.get_matching_events(
start_id=event_id,
source=EventSource.AGENT,
event_types=(MessageAction, AgentFinishAction),
reverse=True,
limit=1,
)
)
if len(agent_messages) == 0:
logger.warning(
'no_agent_messages_found', extra={'conversation_id': conversation_id}
)
return get_summary_for_agent_state(
final_agent_observation, conversation_link
) # Agent failed to generate summary
summary_event: MessageAction | AgentFinishAction = agent_messages[0]
if isinstance(summary_event, MessageAction):
return summary_event.content
return summary_event.final_thought
async def get_event_store_from_conversation_manager(
conversation_manager: ConversationManager, conversation_id: str
) -> EventStoreABC:
agent_loop_infos = await conversation_manager.get_agent_loop_info(
filter_to_sids={conversation_id}
)
if not agent_loop_infos or agent_loop_infos[0].status != ConversationStatus.RUNNING:
raise RuntimeError(f'conversation_not_running:{conversation_id}')
event_store = agent_loop_infos[0].event_store
if not event_store:
raise RuntimeError(f'event_store_missing:{conversation_id}')
return event_store
async def get_last_user_msg_from_conversation_manager(
conversation_manager: ConversationManager, conversation_id: str
):
event_store = await get_event_store_from_conversation_manager(
conversation_manager, conversation_id
)
return get_last_user_msg(event_store)
async def extract_summary_from_conversation_manager(
conversation_manager: ConversationManager, conversation_id: str
) -> str:
"""
Get agent summary or alternative message depending on current AgentState
"""
event_store = await get_event_store_from_conversation_manager(
conversation_manager, conversation_id
)
summary = extract_summary_from_event_store(event_store, conversation_id)
return append_conversation_footer(summary, conversation_id)
def append_conversation_footer(message: str, conversation_id: str) -> str:
"""
Append a small footer with the conversation URL to a message.
Args:
message: The original message content
conversation_id: The conversation ID to link to
Returns:
The message with the conversation footer appended
"""
conversation_link = CONVERSATION_URL.format(conversation_id)
footer = f'\n\n<sub>[View full conversation]({conversation_link})</sub>'
return message + footer
async def store_repositories_in_db(repos: list[Repository], user_id: str) -> None:
"""
Store repositories in DB and create user-repository mappings
Args:
repos: List of Repository objects to store
user_id: User ID associated with these repositories
"""
# Convert Repository objects to StoredRepository objects
# Convert Repository objects to UserRepositoryMap objects
stored_repos = []
user_repos = []
for repo in repos:
repo_id = f'{repo.git_provider.value}##{str(repo.id)}'
stored_repo = StoredRepository(
repo_name=repo.full_name,
repo_id=repo_id,
is_public=repo.is_public,
# Optional fields set to None by default
has_microagent=None,
has_setup_script=None,
)
stored_repos.append(stored_repo)
user_repo_map = UserRepositoryMap(user_id=user_id, repo_id=repo_id, admin=None)
user_repos.append(user_repo_map)
# Get config instance
config = OpenHandsConfig()
try:
# Store repositories in the repos table
repo_store = RepositoryStore.get_instance(config)
repo_store.store_projects(stored_repos)
# Store user-repository mappings in the user-repos table
user_repo_store = UserRepositoryMapStore.get_instance(config)
user_repo_store.store_user_repo_mappings(user_repos)
logger.info(f'Saved repos for user {user_id}')
except Exception:
logger.warning('Failed to save repos', exc_info=True)
def infer_repo_from_message(user_msg: str) -> list[str]:
"""
Extract all repository names in the format 'owner/repo' from various Git provider URLs
and direct mentions in text. Supports GitHub, GitLab, and BitBucket.
Args:
user_msg: Input message that may contain repository references
Returns:
List of repository names in 'owner/repo' format, empty list if none found
"""
# Normalize the message by removing extra whitespace and newlines
normalized_msg = re.sub(r'\s+', ' ', user_msg.strip())
# Pattern to match Git URLs from GitHub, GitLab, and BitBucket
# Captures: protocol, domain, owner, repo (with optional .git extension)
git_url_pattern = r'https?://(?:github\.com|gitlab\.com|bitbucket\.org)/([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+?)(?:\.git)?(?:[/?#].*?)?(?=\s|$|[^\w.-])'
# Pattern to match direct owner/repo mentions (e.g., "OpenHands/OpenHands")
# Must be surrounded by word boundaries or specific characters to avoid false positives
direct_pattern = (
r'(?:^|\s|[\[\(\'"])([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+)(?=\s|$|[\]\)\'",.])'
)
matches = []
# First, find all Git URLs (highest priority)
git_matches = re.findall(git_url_pattern, normalized_msg)
for owner, repo in git_matches:
# Remove .git extension if present
repo = re.sub(r'\.git$', '', repo)
matches.append(f'{owner}/{repo}')
# Second, find all direct owner/repo mentions
direct_matches = re.findall(direct_pattern, normalized_msg)
for owner, repo in direct_matches:
full_match = f'{owner}/{repo}'
# Skip if it looks like a version number, date, or file path
if (
re.match(r'^\d+\.\d+/\d+\.\d+$', full_match) # version numbers
or re.match(r'^\d{1,2}/\d{1,2}$', full_match) # dates
or re.match(r'^[A-Z]/[A-Z]$', full_match) # single letters
or repo.endswith('.txt')
or repo.endswith('.md') # file extensions
or repo.endswith('.py')
or repo.endswith('.js')
or '.' in repo
and len(repo.split('.')) > 2
): # complex file paths
continue
# Avoid duplicates from Git URLs already found
if full_match not in matches:
matches.append(full_match)
return matches
def filter_potential_repos_by_user_msg(
user_msg: str, user_repos: list[Repository]
) -> tuple[bool, list[Repository]]:
"""Filter repositories based on user message inference."""
inferred_repos = infer_repo_from_message(user_msg)
if not inferred_repos:
return False, user_repos[0:99]
final_repos = []
for repo in user_repos:
# Check if the repo matches any of the inferred repositories
for inferred_repo in inferred_repos:
if inferred_repo.lower() in repo.full_name.lower():
final_repos.append(repo)
break # Avoid adding the same repo multiple times
# no repos matched, return original list
if len(final_repos) == 0:
return False, user_repos[0:99]
# Found exact match
elif len(final_repos) == 1:
return True, final_repos
# Found partial matches
return False, final_repos[0:99]
def markdown_to_jira_markup(markdown_text: str) -> str:
"""
Convert markdown text to Jira Wiki Markup format.
This function handles common markdown elements and converts them to their
Jira Wiki Markup equivalents. It's designed to be exception-safe.
Args:
markdown_text: The markdown text to convert
Returns:
str: The converted Jira Wiki Markup text
"""
if not markdown_text or not isinstance(markdown_text, str):
return ''
try:
# Work with a copy to avoid modifying the original
text = markdown_text
# Convert headers (# ## ### #### ##### ######)
text = re.sub(r'^#{6}\s+(.*?)$', r'h6. \1', text, flags=re.MULTILINE)
text = re.sub(r'^#{5}\s+(.*?)$', r'h5. \1', text, flags=re.MULTILINE)
text = re.sub(r'^#{4}\s+(.*?)$', r'h4. \1', text, flags=re.MULTILINE)
text = re.sub(r'^#{3}\s+(.*?)$', r'h3. \1', text, flags=re.MULTILINE)
text = re.sub(r'^#{2}\s+(.*?)$', r'h2. \1', text, flags=re.MULTILINE)
text = re.sub(r'^#{1}\s+(.*?)$', r'h1. \1', text, flags=re.MULTILINE)
# Convert code blocks first (before other formatting)
text = re.sub(
r'```(\w+)\n(.*?)\n```', r'{code:\1}\n\2\n{code}', text, flags=re.DOTALL
)
text = re.sub(r'```\n(.*?)\n```', r'{code}\n\1\n{code}', text, flags=re.DOTALL)
# Convert inline code (`code`)
text = re.sub(r'`([^`]+)`', r'{{\1}}', text)
# Convert markdown formatting to Jira formatting
# Use temporary placeholders to avoid conflicts between bold and italic conversion
# First convert bold (double markers) to temporary placeholders
text = re.sub(r'\*\*(.*?)\*\*', r'JIRA_BOLD_START\1JIRA_BOLD_END', text)
text = re.sub(r'__(.*?)__', r'JIRA_BOLD_START\1JIRA_BOLD_END', text)
# Now convert single asterisk italics
text = re.sub(r'\*([^*]+?)\*', r'_\1_', text)
# Convert underscore italics
text = re.sub(r'(?<!_)_([^_]+?)_(?!_)', r'_\1_', text)
# Finally, restore bold markers
text = text.replace('JIRA_BOLD_START', '*')
text = text.replace('JIRA_BOLD_END', '*')
# Convert links [text](url)
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'[\1|\2]', text)
# Convert unordered lists (- or * or +)
text = re.sub(r'^[\s]*[-*+]\s+(.*?)$', r'* \1', text, flags=re.MULTILINE)
# Convert ordered lists (1. 2. etc.)
text = re.sub(r'^[\s]*\d+\.\s+(.*?)$', r'# \1', text, flags=re.MULTILINE)
# Convert strikethrough (~~text~~)
text = re.sub(r'~~(.*?)~~', r'-\1-', text)
# Convert horizontal rules (---, ***, ___)
text = re.sub(r'^[\s]*[-*_]{3,}[\s]*$', r'----', text, flags=re.MULTILINE)
# Convert blockquotes (> text)
text = re.sub(r'^>\s+(.*?)$', r'bq. \1', text, flags=re.MULTILINE)
# Convert tables (basic support)
# This is a simplified table conversion - Jira tables are quite different
lines = text.split('\n')
in_table = False
converted_lines = []
for line in lines:
if (
'|' in line
and line.strip().startswith('|')
and line.strip().endswith('|')
):
# Skip markdown table separator lines (contain ---)
if '---' in line:
continue
if not in_table:
in_table = True
# Convert markdown table row to Jira table row
cells = [cell.strip() for cell in line.split('|')[1:-1]]
converted_line = '|' + '|'.join(cells) + '|'
converted_lines.append(converted_line)
elif in_table and line.strip() and '|' not in line:
in_table = False
converted_lines.append(line)
else:
in_table = False
converted_lines.append(line)
text = '\n'.join(converted_lines)
return text
except Exception as e:
# Log the error but don't raise it - return original text as fallback
print(f'Error converting markdown to Jira markup: {str(e)}')
return markdown_text or ''