forked from All-Hands-AI/OpenHands
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranslation_updater.py
139 lines (105 loc) Β· 4.22 KB
/
translation_updater.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import hashlib
import json
import os
import sys
import anthropic
import frontmatter
import yaml
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
if not ANTHROPIC_API_KEY:
print('Error: ANTHROPIC_API_KEY environment variable not set')
sys.exit(1)
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
DOCS_DIR = 'docs/'
CACHE_FILE = os.path.join(DOCS_DIR, 'translation_cache.json')
# Supported languages and their codes
LANGUAGES = {'fr': 'French', 'zh-Hans': 'Simplified Chinese'}
def get_file_hash(filepath):
"""Calculate MD5 hash of a file."""
with open(filepath, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
def load_file_hashes():
"""Load previously saved file hashes."""
if os.path.exists(CACHE_FILE):
with open(CACHE_FILE, 'r') as f:
return json.load(f)
return {}
def save_file_hashes(hashes):
"""Save current file hashes."""
with open(CACHE_FILE, 'w') as f:
json.dump(hashes, f)
def get_translation_path(source_path, lang):
"""Get the corresponding translation file path for a source file."""
relative_path = os.path.relpath(source_path, 'docs/modules')
return f'docs/i18n/{lang}/docusaurus-plugin-content-docs/current/{relative_path}'
def translate_content(content, target_lang):
"""Translate content using Anthropic's Claude."""
system_prompt = f'You are a professional translator. Translate the following content into {target_lang}. Preserve all Markdown formatting, code blocks, and front matter. Keep any {{% jsx %}} tags and similar intact. Do not translate code examples, URLs, or technical terms.'
message = client.messages.create(
model='claude-3-opus-20240229',
max_tokens=4096,
temperature=0,
system=system_prompt,
messages=[
{'role': 'user', 'content': f'Please translate this content:\n\n{content}'}
],
)
return message.content[0].text
def process_file(source_path, lang):
"""Process a single file for translation."""
# Skip non-markdown files
if not source_path.endswith(('.md', '.mdx')):
return
translation_path = get_translation_path(source_path, lang)
os.makedirs(os.path.dirname(translation_path), exist_ok=True)
# Read source content
with open(source_path, 'r', encoding='utf-8') as f:
content = f.read()
# Parse frontmatter if exists
has_frontmatter = content.startswith('---')
if has_frontmatter:
post = frontmatter.loads(content)
metadata = post.metadata
content_without_frontmatter = post.content
else:
metadata = {}
content_without_frontmatter = content
# Translate the content
print('translating...', source_path, lang)
translated_content = translate_content(content_without_frontmatter, LANGUAGES[lang])
print('translation done')
# Reconstruct the file with frontmatter if it existed
if has_frontmatter:
final_content = '---\n'
final_content += yaml.dump(metadata, allow_unicode=True)
final_content += '---\n\n'
final_content += translated_content
else:
final_content = translated_content
# Write the translated content
with open(translation_path, 'w', encoding='utf-8') as f:
f.write(final_content)
print(f'Updated translation for {source_path} in {lang}')
def main():
previous_hashes = load_file_hashes()
current_hashes = {}
# Walk through all files in docs/modules
for root, _, files in os.walk('docs/modules'):
for file in files:
if file.endswith(('.md', '.mdx')):
filepath = os.path.join(root, file)
current_hash = get_file_hash(filepath)
current_hashes[filepath] = current_hash
# Check if file is new or modified
if (
filepath not in previous_hashes
or previous_hashes[filepath] != current_hash
):
print(f'Change detected in {filepath}')
for lang in LANGUAGES:
process_file(filepath, lang)
print('all files up to date, saving hashes')
save_file_hashes(current_hashes)
previous_hashes = current_hashes
if __name__ == '__main__':
main()