mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
140 lines
4.2 KiB
Python
140 lines
4.2 KiB
Python
import hashlib
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
import anthropic
|
|
import frontmatter
|
|
import yaml
|
|
|
|
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
|
|
if not ANTHROPIC_API_KEY:
|
|
print('Error: ANTHROPIC_API_KEY environment variable not set')
|
|
sys.exit(1)
|
|
|
|
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
|
|
|
|
DOCS_DIR = 'docs/'
|
|
CACHE_FILE = os.path.join(DOCS_DIR, 'translation_cache.json')
|
|
|
|
# Supported languages and their codes
|
|
LANGUAGES = {'fr': 'French', 'zh-Hans': 'Simplified Chinese'}
|
|
|
|
|
|
def get_file_hash(filepath):
|
|
"""Calculate MD5 hash of a file."""
|
|
with open(filepath, 'rb') as f:
|
|
return hashlib.md5(f.read()).hexdigest()
|
|
|
|
|
|
def load_file_hashes():
|
|
"""Load previously saved file hashes."""
|
|
if os.path.exists(CACHE_FILE):
|
|
with open(CACHE_FILE, 'r') as f:
|
|
return json.load(f)
|
|
return {}
|
|
|
|
|
|
def save_file_hashes(hashes):
|
|
"""Save current file hashes."""
|
|
with open(CACHE_FILE, 'w') as f:
|
|
json.dump(hashes, f)
|
|
|
|
|
|
def get_translation_path(source_path, lang):
|
|
"""Get the corresponding translation file path for a source file."""
|
|
relative_path = os.path.relpath(source_path, 'docs/modules')
|
|
return f'docs/i18n/{lang}/docusaurus-plugin-content-docs/current/{relative_path}'
|
|
|
|
|
|
def translate_content(content, target_lang):
|
|
"""Translate content using Anthropic's Claude."""
|
|
system_prompt = f'You are a professional translator. Translate the following content into {target_lang}. Preserve all Markdown formatting, code blocks, and front matter. Keep any {{% jsx %}} tags and similar intact. Do not translate code examples, URLs, or technical terms.'
|
|
|
|
message = client.messages.create(
|
|
model='claude-3-opus-20240229',
|
|
max_tokens=4096,
|
|
temperature=0,
|
|
system=system_prompt,
|
|
messages=[
|
|
{'role': 'user', 'content': f'Please translate this content:\n\n{content}'}
|
|
],
|
|
)
|
|
|
|
return message.content[0].text
|
|
|
|
|
|
def process_file(source_path, lang):
|
|
"""Process a single file for translation."""
|
|
# Skip non-markdown files
|
|
if not source_path.endswith(('.md', '.mdx')):
|
|
return
|
|
|
|
translation_path = get_translation_path(source_path, lang)
|
|
os.makedirs(os.path.dirname(translation_path), exist_ok=True)
|
|
|
|
# Read source content
|
|
with open(source_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Parse frontmatter if exists
|
|
has_frontmatter = content.startswith('---')
|
|
if has_frontmatter:
|
|
post = frontmatter.loads(content)
|
|
metadata = post.metadata
|
|
content_without_frontmatter = post.content
|
|
else:
|
|
metadata = {}
|
|
content_without_frontmatter = content
|
|
|
|
# Translate the content
|
|
print('translating...', source_path, lang)
|
|
translated_content = translate_content(content_without_frontmatter, LANGUAGES[lang])
|
|
print('translation done')
|
|
|
|
# Reconstruct the file with frontmatter if it existed
|
|
if has_frontmatter:
|
|
final_content = '---\n'
|
|
final_content += yaml.dump(metadata, allow_unicode=True)
|
|
final_content += '---\n\n'
|
|
final_content += translated_content
|
|
else:
|
|
final_content = translated_content
|
|
|
|
# Write the translated content
|
|
with open(translation_path, 'w', encoding='utf-8') as f:
|
|
f.write(final_content)
|
|
|
|
print(f'Updated translation for {source_path} in {lang}')
|
|
|
|
|
|
def main():
|
|
previous_hashes = load_file_hashes()
|
|
|
|
current_hashes = {}
|
|
|
|
# Walk through all files in docs/modules
|
|
for root, _, files in os.walk('docs/modules'):
|
|
for file in files:
|
|
if file.endswith(('.md', '.mdx')):
|
|
filepath = os.path.join(root, file)
|
|
current_hash = get_file_hash(filepath)
|
|
current_hashes[filepath] = current_hash
|
|
|
|
# Check if file is new or modified
|
|
if (
|
|
filepath not in previous_hashes
|
|
or previous_hashes[filepath] != current_hash
|
|
):
|
|
print(f'Change detected in {filepath}')
|
|
for lang in LANGUAGES:
|
|
process_file(filepath, lang)
|
|
|
|
print('all files up to date, saving hashes')
|
|
save_file_hashes(current_hashes)
|
|
previous_hashes = current_hashes
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|