OpenHands/docs/translation_updater.py
Xingyao Wang 31ad7fc175
chore: add claude 4 to verified mode & global replace 3.7 to claude 4 (#8665)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-05-23 17:35:30 +00:00

145 lines
4.3 KiB
Python

import hashlib
import json
import os
import sys
import anthropic
import frontmatter
import yaml
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
if not ANTHROPIC_API_KEY:
print('Error: ANTHROPIC_API_KEY environment variable not set')
sys.exit(1)
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
DOCS_DIR = 'docs/'
CACHE_FILE = os.path.join(DOCS_DIR, 'translation_cache.json')
# Supported languages and their codes
LANGUAGES = {
'fr': 'French',
'zh-Hans': 'Simplified Chinese',
'pt-BR': 'Brazilian Portuguese',
'ja': 'Japanese',
}
def get_file_hash(filepath):
"""Calculate MD5 hash of a file."""
with open(filepath, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
def load_file_hashes():
"""Load previously saved file hashes."""
if os.path.exists(CACHE_FILE):
with open(CACHE_FILE, 'r') as f:
return json.load(f)
return {}
def save_file_hashes(hashes):
"""Save current file hashes."""
with open(CACHE_FILE, 'w') as f:
json.dump(hashes, f, indent=4)
def get_translation_path(source_path, lang):
"""Get the corresponding translation file path for a source file."""
relative_path = os.path.relpath(source_path, 'docs/modules')
return f'docs/i18n/{lang}/docusaurus-plugin-content-docs/current/{relative_path}'
def translate_content(content, target_lang):
"""Translate content using Anthropic's Claude."""
system_prompt = f'You are a professional translator. Translate the following content into {target_lang}. Preserve all Markdown formatting, code blocks, and front matter. Keep any {{% jsx %}} tags and similar intact. Do not translate code examples, URLs, or technical terms.'
message = client.messages.create(
model='claude-sonnet-4-20250514',
max_tokens=4096,
temperature=0,
system=system_prompt,
messages=[
{'role': 'user', 'content': f'Please translate this content:\n\n{content}'}
],
)
return message.content[0].text
def process_file(source_path, lang):
"""Process a single file for translation."""
# Skip non-markdown files
if not source_path.endswith(('.md', '.mdx')):
return
translation_path = get_translation_path(source_path, lang)
os.makedirs(os.path.dirname(translation_path), exist_ok=True)
# Read source content
with open(source_path, 'r', encoding='utf-8') as f:
content = f.read()
# Parse frontmatter if exists
has_frontmatter = content.startswith('---')
if has_frontmatter:
post = frontmatter.loads(content)
metadata = post.metadata
content_without_frontmatter = post.content
else:
metadata = {}
content_without_frontmatter = content
# Translate the content
print('translating...', source_path, lang)
translated_content = translate_content(content_without_frontmatter, LANGUAGES[lang])
print('translation done')
# Reconstruct the file with frontmatter if it existed
if has_frontmatter:
final_content = '---\n'
final_content += yaml.dump(metadata, allow_unicode=True)
final_content += '---\n\n'
final_content += translated_content
else:
final_content = translated_content
# Write the translated content
with open(translation_path, 'w', encoding='utf-8') as f:
f.write(final_content)
print(f'Updated translation for {source_path} in {lang}')
def main():
previous_hashes = load_file_hashes()
current_hashes = {}
# Walk through all files in docs/modules
for root, _, files in os.walk('docs/modules'):
for file in files:
if file.endswith(('.md', '.mdx')):
filepath = os.path.join(root, file)
current_hash = get_file_hash(filepath)
current_hashes[filepath] = current_hash
# Check if file is new or modified
if (
filepath not in previous_hashes
or previous_hashes[filepath] != current_hash
):
print(f'Change detected in {filepath}')
for lang in LANGUAGES:
process_file(filepath, lang)
print('all files up to date, saving hashes')
save_file_hashes(current_hashes)
previous_hashes = current_hashes
if __name__ == '__main__':
main()