From 16e965bbbdc8ebe9f145436fe26b2dfb6db3d35d Mon Sep 17 00:00:00 2001 From: erikh360 Date: Thu, 16 Jan 2025 07:47:15 +0200 Subject: [PATCH 1/2] Change wa_id column to urn --- scripts/migrate_to_turn/fetch_rapidpro_contacts.py | 2 +- scripts/migrate_to_turn/update_turn_contacts.py | 2 +- scripts/migrate_to_turn/update_turn_contacts_queue.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/migrate_to_turn/fetch_rapidpro_contacts.py b/scripts/migrate_to_turn/fetch_rapidpro_contacts.py index 41830f9e..c0783a5d 100644 --- a/scripts/migrate_to_turn/fetch_rapidpro_contacts.py +++ b/scripts/migrate_to_turn/fetch_rapidpro_contacts.py @@ -63,7 +63,7 @@ def get_rapidpro_contacts(start_date=None, end_date=None): if wa_id: data = get_field_data(contact) - data["wa_id"] = wa_id + data["urn"] = wa_id contacts.append(data) modified_on = contact.modified_on.astimezone(pytz.utc) diff --git a/scripts/migrate_to_turn/update_turn_contacts.py b/scripts/migrate_to_turn/update_turn_contacts.py index 3c110fa0..c4c79aa9 100644 --- a/scripts/migrate_to_turn/update_turn_contacts.py +++ b/scripts/migrate_to_turn/update_turn_contacts.py @@ -14,7 +14,7 @@ async def update_turn_contact_details(session, row, target): - wa_id = row.pop("wa_id") + wa_id = row.pop("urn") url = urljoin(TURN_URL, f"/v1/contacts/{wa_id}/profile") headers = { diff --git a/scripts/migrate_to_turn/update_turn_contacts_queue.py b/scripts/migrate_to_turn/update_turn_contacts_queue.py index d29d1e44..2456ea33 100644 --- a/scripts/migrate_to_turn/update_turn_contacts_queue.py +++ b/scripts/migrate_to_turn/update_turn_contacts_queue.py @@ -82,7 +82,7 @@ async def main(filename, target): tasks.append(task) for row in reader: - wa_id = row.pop("wa_id") + wa_id = row.pop("urn") update = (session, wa_id, row, target) await queue.put(update) From f65bd47e9093a544bc90160738a95fff391d1e66 Mon Sep 17 00:00:00 2001 From: erikh360 Date: Thu, 16 Jan 2025 07:54:58 +0200 Subject: [PATCH 2/2] Update readme wit queue update script --- scripts/migrate_to_turn/README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/scripts/migrate_to_turn/README.md b/scripts/migrate_to_turn/README.md index 44273f28..41262172 100644 --- a/scripts/migrate_to_turn/README.md +++ b/scripts/migrate_to_turn/README.md @@ -7,6 +7,8 @@ We can do this in batches and repeat until we do the actual switch over from Rap ## SCRIPTS +There is one fetch script and 2 update script options. We can test out the different update option with larger batches and see which one works the best. + ### fetch_rapidpro_contacts.py This fetches all the contacts from Rapidpro based on the start and end date provided, you can also configure a limit. @@ -19,6 +21,8 @@ It will also output the latest modified on date in the batch, this can then be u ### update_turn_contacts.py +Update using the turn contacts api asynchronously. + This script takes a filename of a file generated by the `fetch_rapidpro_contacts.py` script as a parameter and updates all the contact in the file on Turn. It is an async script and `CONCURRENCY` can be updated to control the speed, to avoid hitting the Turn API rate limits. @@ -28,6 +32,19 @@ Command to run: The output is sent to a json file, which can be used to retry failed requests. +### update_turn_contacts_queue.py + +Update using the turn contacts api asynchronously but using a queue and workers. It will sleep if it gets rate limited by turn. + +This script takes a filename of a file generated by the `fetch_rapidpro_contacts.py` script as a parameter and updates all the contact in the file on Turn. + +It is an async script and `WORKER_COUNT` can be configured, to change the amount being processed at a time. + +Command to run: +`python scripts/migrate_to_turn/update_turn_contacts_queue.py contacts-2025-01-01-2025-01-07.csv > update_turn_contacts.json` + +The output is sent to a json file, which can be used to retry failed requests. + ## FIELD_MAPPING This is a dictionary the script uses to figure out where to get the data, how to process it and where it should go.