Skip to content

Commit

Permalink
Adding the translator tool
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelbontyes committed Aug 30, 2024
1 parent 45697db commit 5783b5d
Show file tree
Hide file tree
Showing 2 changed files with 453 additions and 0 deletions.
115 changes: 115 additions & 0 deletions translate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""
A script to add translations names to concepts in OCL based on a metadata file in Excel.
"""
import json
import os
import re
import time
import uuid
import openpyxl
import requests
import pandas as pd

# Ignore potential warnings related to opening large Excel files
openpyxl.reader.excel.warnings.simplefilter(action='ignore')

# Load the configuration settings from config.json
with open('config.json', 'r', encoding='utf-8') as f:
config = json.load(f)

# Define the necessary variables
OCL_API_URL = config['OCL_API_URL']
SOURCE_ID = config['SOURCE_ID']
COLLECTION_ID = config['COLLECTION_ID']
OCL_TOKEN = config['OCL_TOKEN']
ORG_ID = config['ORG_ID']

ORG_ID = "MSF"
SOURCE_ID = "MSF"
COLLECTION_ID = "MentalHealth"

# Headers for the API request
HEADERS = {
"Authorization": f"Token {OCL_TOKEN}",
"Content-Type": "application/json"
}

# Extract the configuration settings
METADATA_FILE = config.get('METADATA_FILEPATH')
# Adjust header to start from row 2
option_sets = pd.read_excel(METADATA_FILE, sheet_name='OptionSets', header=1)
# List of sheets to process
SHEETS = config.get('SHEETS_TO_PREVIEW', [])
# Add sheet "optionSets" to the list of sheets to process
SHEETS.append('OptionSets')

print(SHEETS)

# Load the OCL concepts from ocl_source_snapshots/MSF_Source_20240830_163943.json
OCL_CONCEPTS_FILE = 'ocl_source_snapshots/MSF_Source_20240830_163943.json'
with open(OCL_CONCEPTS_FILE, 'r', encoding='utf-8') as file:
ocl_concepts = json.load(file)

# List all the external IDs found in column "External ID" in the metadata
# Add their corresponding translation from the column "Translation" in the metadata
# Then get their corresponding concepts ID using the ocl_concepts
translation_dict = {}
for sheet_name in SHEETS:
data = pd.read_excel(METADATA_FILE, sheet_name=sheet_name, header=1)
for _, row in data.iterrows():
# Check the external ID in the column "External ID" of the metadata
external_id = row['External ID']
translation = str(row['Translation']) # Convert translation to string
# If the external ID is found and the translation is not null or NaN or nan
if external_id and translation and not pd.isna(translation) and translation.lower()!= 'nan':
# Get the concept ID using the ocl_concepts
concept_id = next((c['id'] for c in ocl_concepts if c['external_id'] == external_id), None)
if concept_id:
print(f'Updating {external_id} with translation {translation} -> {concept_id}')
# Add the translation, concept ID and external ID to the translation_dict
translation_dict[external_id] = {
'translation': translation,
'concept_id': concept_id,
'external_id': external_id
}
print(f'Translation dictionary for sheet {sheet_name} created.')
# Count the number of translations added to the translation_dict
print(f'Total translations added: {len(translation_dict)}')

# filter all entries to only keep one with the concept ID 2473
# translation_dict = {k: v for k, v in translation_dict.items() if v['external_id'] == '1a8bf24f-4f36-4971-aad9-ae77f3525738'}
# print(f'Translation dictionary after filtering: {len(translation_dict)}')

# save the translation_dict to a csv file
translation_dict_file = 'translation_dict.csv'
pd.DataFrame(list(translation_dict.values())).to_csv(translation_dict_file, index=False)
print(f'Translation dictionary saved to {translation_dict_file}')

# For each concept ID in the translation_dict, check if the AR translation is already present in OCL source using this URL https://api.openconceptlab.org/orgs/MSFOCG/sources/Create/concepts/{conceptID}/names/
# If there is no AR translation, add it to the OCL concepts using the same URL with a post method
for external_id, translation_info in translation_dict.items():
concept_id = translation_info['concept_id']
translation = translation_info['translation']
print(f'Checking AR translation {translation} for {concept_id}...')
url = f'https://api.openconceptlab.org/orgs/MSF/sources/MSF/concepts/{concept_id}/names/'
response = requests.get(url, headers=HEADERS, timeout=30)
if response.status_code == 200:
# Get the names of the concepts
names = response.json()
# Check if any of the names has a locale of 'ar'
ar_translation_present = any(name['locale'] == 'ar' for name in names)
print(f'AR translation for {external_id} is already present: {ar_translation_present}')
if not ar_translation_present:
# If AR translation is not present, add it to the OCL concepts
data = {
"name": translation,
"locale": "ar",
"name_type": "Fully-Specified"
}
print(f'Adding AR translation {translation} for {concept_id}...')
response = requests.post(url, headers=HEADERS, json=data, timeout=30)
if response.status_code == 201:
print(f'Added AR translation {translation} for {concept_id}')
else:
print(f'Failed to add AR translation {translation} for {concept_id}: Status code {response.status_code}')

Loading

0 comments on commit 5783b5d

Please sign in to comment.