-
Notifications
You must be signed in to change notification settings - Fork 27
/
translate_docs.py
132 lines (105 loc) · 4.13 KB
/
translate_docs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import re
import json
import pathlib
import importlib
from concurrent.futures import ThreadPoolExecutor, as_completed
from google.cloud import translate_v2 as translate
from git import Repo
from github import Github, GithubException
def get_languages():
app_name = pathlib.Path(__file__).resolve().parent.name
hooks = importlib.import_module(f"{app_name}.hooks")
try:
return hooks.docs_languages
except Exception:
return []
def set_google_credentials():
secret_value = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
with open("credentials.json", "w") as f:
json.dump(json.loads(secret_value), f)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"
def get_pull_request_number():
github_ref = os.getenv("GITHUB_REF")
match = re.match(r"refs/pull/(\d+)/merge", github_ref)
return int(match.group(1)) if match else None
def extract_code_blocks(text):
code_blocks = re.findall(r"```.*?```", text, re.DOTALL)
text_without_code = re.sub(r"```.*?```", "CODE_BLOCK_PLACEHOLDER", text, flags=re.DOTALL)
return text_without_code, code_blocks
def reintegrate_code_blocks(translated_text, code_blocks):
for code in code_blocks:
translated_text = translated_text.replace("CODE_BLOCK_PLACEHOLDER", code, 1)
return translated_text
def translate_file(source_content, target_file, target_language, translate_client):
text_without_code, code_blocks = extract_code_blocks(source_content)
translation = translate_client.translate(
text_without_code, target_language=target_language, format_="text"
)
final_text = reintegrate_code_blocks(translation["translatedText"], code_blocks)
with open(target_file, "w", encoding="utf-8") as f:
f.write(final_text)
def translate_md_files():
target_languages = get_languages()
if not target_languages:
return
set_google_credentials()
translate_client = translate.Client()
base_branch = os.getenv("GITHUB_BASE_REF", "main") # Default to 'main' if not available
repo_name = os.getenv("GITHUB_REPOSITORY")
g = Github(os.getenv("GITHUB_TOKEN"))
repo = Repo(search_parent_directories=True)
origin = repo.remote(name="origin")
origin.fetch()
version_folders = [f for f in os.listdir("docs") if os.path.isdir(os.path.join("docs", f))]
pull_request_number = get_pull_request_number()
repository = g.get_repo(repo_name, lazy=False)
pull_request = repository.get_pull(pull_request_number)
head_branch = pull_request.head.ref
files = pull_request.get_files()
modified_files = {}
for file in files:
try:
modified_files[file.filename] = repository.get_contents(
file.filename, ref=head_branch
).decoded_content.decode("utf-8")
except Exception:
continue
with ThreadPoolExecutor() as executor:
futures = []
for version in version_folders:
for target_language in target_languages:
for filename, modified_file in modified_files.items():
if filename.startswith(f"docs/{version}/en") and filename.endswith(".md"):
target_folder = f"docs/{version}/{target_language}"
target_file = os.path.join(target_folder, os.path.basename(filename))
if not os.path.exists(target_folder):
os.makedirs(target_folder)
futures.append(
executor.submit(
translate_file, modified_file, target_file, target_language, translate_client
)
)
for future in as_completed(futures):
try:
future.result()
except Exception as e:
print(f"Error during translation: {e}")
for version in version_folders:
for target_language in target_languages:
target_folder = f"docs/{version}/{target_language}"
branch_name = f"translate-{target_language}-{pull_request_number}"
repo.git.checkout(base_branch)
repo.git.checkout("-b", branch_name)
repo.index.add([target_folder])
commit_message = f"Translate {target_language}"
repo.index.commit(commit_message)
origin.push(branch_name)
title = f"Translate to {target_language}"
body = f"This pull request translates to {target_language}"
try:
repository.create_pull(title=title, body=body, head=branch_name, base=base_branch)
except GithubException as e:
print(f"Failed to create pull request for {target_language}: {e}")
if __name__ == "__main__":
translate_md_files()