Files
OpenHands/.github/scripts/find_prs_between_commits.py

331 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Find all PRs that went in between two commits in the OpenHands/OpenHands repository.
Handles cherry-picks and different merge strategies.
This script is designed to run from within the OpenHands repository under .github/scripts:
.github/scripts/find_prs_between_commits.py
Usage: find_prs_between_commits <older_commit> <newer_commit> [--repo <path>]
"""
import json
import os
import re
import subprocess
import sys
from collections import defaultdict
from pathlib import Path
from typing import Optional
def find_openhands_repo() -> Optional[Path]:
"""
Find the OpenHands repository.
Since this script is designed to live in .github/scripts/, it assumes
the repository root is two levels up from the script location.
Tries:
1. Repository root (../../ from script location)
2. Current directory
3. Environment variable OPENHANDS_REPO
"""
# Check repository root (assuming script is in .github/scripts/)
script_dir = Path(__file__).parent.absolute()
repo_root = (
script_dir.parent.parent
) # Go up two levels: scripts -> .github -> repo root
if (repo_root / '.git').exists():
return repo_root
# Check current directory
if (Path.cwd() / '.git').exists():
return Path.cwd()
# Check environment variable
if 'OPENHANDS_REPO' in os.environ:
repo_path = Path(os.environ['OPENHANDS_REPO'])
if (repo_path / '.git').exists():
return repo_path
return None
def run_git_command(cmd: list[str], repo_path: Path) -> str:
"""Run a git command in the repository directory and return its output."""
try:
result = subprocess.run(
cmd, capture_output=True, text=True, check=True, cwd=str(repo_path)
)
return result.stdout.strip()
except subprocess.CalledProcessError as e:
print(f'Error running git command: {" ".join(cmd)}', file=sys.stderr)
print(f'Error: {e.stderr}', file=sys.stderr)
sys.exit(1)
def extract_pr_numbers_from_message(message: str) -> set[int]:
"""Extract PR numbers from commit message in any common format."""
# Match #12345 anywhere, including in patterns like (#12345) or "Merge pull request #12345"
matches = re.findall(r'#(\d+)', message)
return set(int(m) for m in matches)
def get_commit_info(commit_hash: str, repo_path: Path) -> tuple[str, str, str]:
"""Get commit subject, body, and author from a commit hash."""
subject = run_git_command(
['git', 'log', '-1', '--format=%s', commit_hash], repo_path
)
body = run_git_command(['git', 'log', '-1', '--format=%b', commit_hash], repo_path)
author = run_git_command(
['git', 'log', '-1', '--format=%an <%ae>', commit_hash], repo_path
)
return subject, body, author
def get_commits_between(
older_commit: str, newer_commit: str, repo_path: Path
) -> list[str]:
"""Get all commit hashes between two commits."""
commits_output = run_git_command(
['git', 'rev-list', f'{older_commit}..{newer_commit}'], repo_path
)
if not commits_output:
return []
return commits_output.split('\n')
def get_pr_info_from_github(pr_number: int, repo_path: Path) -> Optional[dict]:
"""Get PR information from GitHub API if GITHUB_TOKEN is available."""
try:
# Set up environment with GitHub token
env = os.environ.copy()
if 'GITHUB_TOKEN' in env:
env['GH_TOKEN'] = env['GITHUB_TOKEN']
result = subprocess.run(
[
'gh',
'pr',
'view',
str(pr_number),
'--json',
'number,title,author,mergedAt,baseRefName,headRefName,url',
],
capture_output=True,
text=True,
check=True,
env=env,
cwd=str(repo_path),
)
return json.loads(result.stdout)
except (subprocess.CalledProcessError, FileNotFoundError, json.JSONDecodeError):
return None
def find_prs_between_commits(
older_commit: str, newer_commit: str, repo_path: Path
) -> dict[int, dict]:
"""
Find all PRs that went in between two commits.
Returns a dictionary mapping PR numbers to their information.
"""
print(f'Repository: {repo_path}', file=sys.stderr)
print('Finding PRs between commits:', file=sys.stderr)
print(f' Older: {older_commit}', file=sys.stderr)
print(f' Newer: {newer_commit}', file=sys.stderr)
print(file=sys.stderr)
# Verify commits exist
try:
run_git_command(['git', 'rev-parse', '--verify', older_commit], repo_path)
run_git_command(['git', 'rev-parse', '--verify', newer_commit], repo_path)
except SystemExit:
print('Error: One or both commits not found in repository', file=sys.stderr)
sys.exit(1)
# Extract PRs from the older commit itself (to exclude from results)
# These PRs are already included at or before the older commit
older_subject, older_body, _ = get_commit_info(older_commit, repo_path)
older_message = f'{older_subject}\n{older_body}'
excluded_prs = extract_pr_numbers_from_message(older_message)
if excluded_prs:
print(
f'Excluding PRs already in older commit: {", ".join(f"#{pr}" for pr in sorted(excluded_prs))}',
file=sys.stderr,
)
print(file=sys.stderr)
# Get all commits between the two
commits = get_commits_between(older_commit, newer_commit, repo_path)
print(f'Found {len(commits)} commits to analyze', file=sys.stderr)
print(file=sys.stderr)
# Extract PR numbers from all commits
pr_info: dict[int, dict] = {}
commits_by_pr: dict[int, list[str]] = defaultdict(list)
for commit_hash in commits:
subject, body, author = get_commit_info(commit_hash, repo_path)
full_message = f'{subject}\n{body}'
pr_numbers = extract_pr_numbers_from_message(full_message)
for pr_num in pr_numbers:
# Skip PRs that are already in the older commit
if pr_num in excluded_prs:
continue
commits_by_pr[pr_num].append(commit_hash)
if pr_num not in pr_info:
pr_info[pr_num] = {
'number': pr_num,
'first_commit': commit_hash[:8],
'first_commit_subject': subject,
'commits': [],
'github_info': None,
}
pr_info[pr_num]['commits'].append(
{'hash': commit_hash[:8], 'subject': subject, 'author': author}
)
# Try to get additional info from GitHub API
print('Fetching additional info from GitHub API...', file=sys.stderr)
for pr_num in pr_info.keys():
github_info = get_pr_info_from_github(pr_num, repo_path)
if github_info:
pr_info[pr_num]['github_info'] = github_info
print(file=sys.stderr)
return pr_info
def print_results(pr_info: dict[int, dict]):
"""Print the results in a readable format."""
sorted_prs = sorted(pr_info.items(), key=lambda x: x[0])
print(f'{"=" * 80}')
print(f'Found {len(sorted_prs)} PRs')
print(f'{"=" * 80}')
print()
for pr_num, info in sorted_prs:
print(f'PR #{pr_num}')
if info['github_info']:
gh = info['github_info']
print(f' Title: {gh["title"]}')
print(f' Author: {gh["author"]["login"]}')
print(f' URL: {gh["url"]}')
if gh.get('mergedAt'):
print(f' Merged: {gh["mergedAt"]}')
if gh.get('baseRefName'):
print(f' Base: {gh["baseRefName"]}{gh["headRefName"]}')
else:
print(f' Subject: {info["first_commit_subject"]}')
# Show if this PR has multiple commits (cherry-picked or multiple commits)
commit_count = len(info['commits'])
if commit_count > 1:
print(
f' ⚠️ Found {commit_count} commits (possible cherry-pick or multi-commit PR):'
)
for commit in info['commits'][:3]: # Show first 3
print(f' {commit["hash"]}: {commit["subject"][:60]}')
if commit_count > 3:
print(f' ... and {commit_count - 3} more')
else:
print(f' Commit: {info["first_commit"]}')
print()
def main():
if len(sys.argv) < 3:
print('Usage: find_prs_between_commits <older_commit> <newer_commit> [options]')
print()
print('Arguments:')
print(' <older_commit> The older commit hash (or ref)')
print(' <newer_commit> The newer commit hash (or ref)')
print()
print('Options:')
print(' --json Output results in JSON format')
print(' --repo <path> Path to OpenHands repository (default: auto-detect)')
print()
print('Example:')
print(
' find_prs_between_commits c79e0cd3c7a2501a719c9296828d7a31e4030585 35bddb14f15124a3dc448a74651a6592911d99e9'
)
print()
print('Repository Detection:')
print(' The script will try to find the OpenHands repository in this order:')
print(' 1. --repo argument')
print(' 2. Repository root (../../ from script location)')
print(' 3. Current directory')
print(' 4. OPENHANDS_REPO environment variable')
print()
print('Environment variables:')
print(
' GITHUB_TOKEN Optional. If set, will fetch additional PR info from GitHub API'
)
print(' OPENHANDS_REPO Optional. Path to OpenHands repository')
sys.exit(1)
older_commit = sys.argv[1]
newer_commit = sys.argv[2]
json_output = '--json' in sys.argv
# Check for --repo argument
repo_path = None
if '--repo' in sys.argv:
repo_idx = sys.argv.index('--repo')
if repo_idx + 1 < len(sys.argv):
repo_path = Path(sys.argv[repo_idx + 1])
if not (repo_path / '.git').exists():
print(f'Error: {repo_path} is not a git repository', file=sys.stderr)
sys.exit(1)
# Auto-detect repository if not specified
if repo_path is None:
repo_path = find_openhands_repo()
if repo_path is None:
print('Error: Could not find OpenHands repository', file=sys.stderr)
print('Please either:', file=sys.stderr)
print(
' 1. Place this script in .github/scripts/ within the OpenHands repository',
file=sys.stderr,
)
print(' 2. Run from the OpenHands repository directory', file=sys.stderr)
print(
' 3. Use --repo <path> to specify the repository location',
file=sys.stderr,
)
print(' 4. Set OPENHANDS_REPO environment variable', file=sys.stderr)
sys.exit(1)
# Find PRs
pr_info = find_prs_between_commits(older_commit, newer_commit, repo_path)
if json_output:
# Output as JSON
print(json.dumps(pr_info, indent=2))
else:
# Print results in human-readable format
print_results(pr_info)
# Also print a simple list for easy copying
print(f'{"=" * 80}')
print('PR Numbers (for easy copying):')
print(f'{"=" * 80}')
sorted_pr_nums = sorted(pr_info.keys())
print(', '.join(f'#{pr}' for pr in sorted_pr_nums))
if __name__ == '__main__':
main()