From c995e280af345f042dd881078d4ab70a4b75396a Mon Sep 17 00:00:00 2001 From: Nathan Powell Date: Tue, 12 Nov 2024 05:49:36 -0800 Subject: [PATCH 1/6] push all communicationDate strings to dd/mm/yyyy format --- ...112000000-format-comlog-comdate-history.js | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 src/migrations/20241112000000-format-comlog-comdate-history.js diff --git a/src/migrations/20241112000000-format-comlog-comdate-history.js b/src/migrations/20241112000000-format-comlog-comdate-history.js new file mode 100644 index 0000000000..f4cbd2cc5e --- /dev/null +++ b/src/migrations/20241112000000-format-comlog-comdate-history.js @@ -0,0 +1,79 @@ +const { prepMigration } = require('../lib/migration'); + +/** @type {import('sequelize-cli').Migration} */ +module.exports = { + async up(queryInterface) { + await queryInterface.sequelize.transaction(async (transaction) => { + const sessionSig = __filename; + await prepMigration(queryInterface, transaction, sessionSig); + return queryInterface.sequelize.query(` + -- This reformats all historical communicationDate values to + -- mm/dd/yyyy + -- Assumptions of preexisting data: + -- -always day-month-year + -- -always separated by a slash, period, or space [/. ] + -- -if there is an extra separator it impacts the year + -- -if the third position is at least two characters, it's the year, else it's the fourth position + -- -the first four characters of a long year string holds the year + -- + -- These assumptions are based on the data we have, so aren't guaranteed to be correct if this is rerun + -- However the logic is slightly overengineered for extra robustness if new errors show up + + DROP TABLE IF EXISTS comdate_corrections; + CREATE TEMP TABLE comdate_corrections + AS + WITH reseparated AS ( + SELECT + id clid, + data->>'communicationDate' orig, + regexp_replace(data->>'communicationDate','[-. ]','/','g') reseparated + FROM "CommunicationLogs" + WHERE data->>'communicationDate' !~ '^\d\d/\d\d/\d\d\d\d$' + AND COALESCE(data->>'communicationDate','') != '' + ), + date_particles AS ( + SELECT + clid, + orig, + SPLIT_PART(reseparated,'/',1) day_part, + SPLIT_PART(reseparated,'/',2) month_part, + CASE + WHEN LENGTH(SPLIT_PART(reseparated,'/',3)) > 1 THEN SPLIT_PART(reseparated,'/',3) + ELSE SPLIT_PART(reseparated,'/',4) + END AS year_part + FROM reseparated + ), + padded_particles AS ( + SELECT + clid, + orig, + LPAD(day_part,2,'0') padded_day, + LPAD(month_part,2,'0') padded_month, + LPAD( + LEFT(year_part,4), + 4, + '20' + ) padded_year + FROM date_particles + ) + SELECT + clid, + orig, + padded_day || '/' || padded_month || '/' || padded_year reformat + FROM padded_particles + ; + + UPDATE "CommunicationLogs" + SET data = jsonb_set(data, '{communicationDate}', to_jsonb(reformat)) + FROM comdate_corrections + WHERE id = clid + ; + + `); + }); + }, + + async down() { + // no rollbacks + }, +}; From c2ceedec17a5da19b24d9fc40a17c7654b21de58 Mon Sep 17 00:00:00 2001 From: Nathan Powell Date: Tue, 12 Nov 2024 05:52:23 -0800 Subject: [PATCH 2/6] tweak comments --- .../20241112000000-format-comlog-comdate-history.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/migrations/20241112000000-format-comlog-comdate-history.js b/src/migrations/20241112000000-format-comlog-comdate-history.js index f4cbd2cc5e..a05a5fc485 100644 --- a/src/migrations/20241112000000-format-comlog-comdate-history.js +++ b/src/migrations/20241112000000-format-comlog-comdate-history.js @@ -7,8 +7,8 @@ module.exports = { const sessionSig = __filename; await prepMigration(queryInterface, transaction, sessionSig); return queryInterface.sequelize.query(` - -- This reformats all historical communicationDate values to - -- mm/dd/yyyy + -- This reformats all historical communicationDate values to mm/dd/yyyy + -- -- Assumptions of preexisting data: -- -always day-month-year -- -always separated by a slash, period, or space [/. ] @@ -16,8 +16,9 @@ module.exports = { -- -if the third position is at least two characters, it's the year, else it's the fourth position -- -the first four characters of a long year string holds the year -- - -- These assumptions are based on the data we have, so aren't guaranteed to be correct if this is rerun - -- However the logic is slightly overengineered for extra robustness if new errors show up + -- These assumptions are based on the data we have, so aren't guaranteed to be correct if this is rerun. + -- However the logic is slightly overengineered for extra robustness if new errors show up. At the time of + -- writing, this produces all valid strings where to_date(data->>'communicationDate','mm/dd/yyyy') succeeds DROP TABLE IF EXISTS comdate_corrections; CREATE TEMP TABLE comdate_corrections From 0238705e74d544141e2c4982650fdebfd85fbcdb Mon Sep 17 00:00:00 2001 From: Nathan Powell Date: Tue, 12 Nov 2024 06:14:52 -0800 Subject: [PATCH 3/6] tweak escaping --- .../20241112000000-format-comlog-comdate-history.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/migrations/20241112000000-format-comlog-comdate-history.js b/src/migrations/20241112000000-format-comlog-comdate-history.js index a05a5fc485..172ff9163f 100644 --- a/src/migrations/20241112000000-format-comlog-comdate-history.js +++ b/src/migrations/20241112000000-format-comlog-comdate-history.js @@ -10,7 +10,7 @@ module.exports = { -- This reformats all historical communicationDate values to mm/dd/yyyy -- -- Assumptions of preexisting data: - -- -always day-month-year + -- -always month-day-year -- -always separated by a slash, period, or space [/. ] -- -if there is an extra separator it impacts the year -- -if the third position is at least two characters, it's the year, else it's the fourth position @@ -29,15 +29,15 @@ module.exports = { data->>'communicationDate' orig, regexp_replace(data->>'communicationDate','[-. ]','/','g') reseparated FROM "CommunicationLogs" - WHERE data->>'communicationDate' !~ '^\d\d/\d\d/\d\d\d\d$' + WHERE data->>'communicationDate' !~ '^\\d{2}/\\d{2}/\\d{4}$' AND COALESCE(data->>'communicationDate','') != '' ), date_particles AS ( SELECT clid, orig, - SPLIT_PART(reseparated,'/',1) day_part, - SPLIT_PART(reseparated,'/',2) month_part, + SPLIT_PART(reseparated,'/',1) month_part, + SPLIT_PART(reseparated,'/',2) day_part, CASE WHEN LENGTH(SPLIT_PART(reseparated,'/',3)) > 1 THEN SPLIT_PART(reseparated,'/',3) ELSE SPLIT_PART(reseparated,'/',4) @@ -48,8 +48,8 @@ module.exports = { SELECT clid, orig, - LPAD(day_part,2,'0') padded_day, LPAD(month_part,2,'0') padded_month, + LPAD(day_part,2,'0') padded_day, LPAD( LEFT(year_part,4), 4, @@ -60,7 +60,7 @@ module.exports = { SELECT clid, orig, - padded_day || '/' || padded_month || '/' || padded_year reformat + padded_month || '/' || padded_day || '/' || padded_year reformat FROM padded_particles ; From dd587fc9bf7268c0dbbc061d63b5eb78993ff60a Mon Sep 17 00:00:00 2001 From: Nathan Powell Date: Thu, 14 Nov 2024 07:26:47 -0800 Subject: [PATCH 4/6] remove unneeded date correction logic --- src/queries/dataRequests/user/communication-logs.sql | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/queries/dataRequests/user/communication-logs.sql b/src/queries/dataRequests/user/communication-logs.sql index 7c7a81c1b3..5e1d198e66 100644 --- a/src/queries/dataRequests/user/communication-logs.sql +++ b/src/queries/dataRequests/user/communication-logs.sql @@ -169,14 +169,7 @@ SELECT COALESCE(cl.data ->> 'purpose', '') AS "purpose", COALESCE(cl.data ->> 'duration', '') AS "duration", COALESCE(cl.data ->> 'regionId', '') AS "region", - CASE - WHEN data ->> 'communicationDate' ~ '^[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}$' THEN TO_DATE(data ->> 'communicationDate', 'MM/DD/YYYY') - WHEN data ->> 'communicationDate' ~ '^[0-9]{1,2}/[0-9]{1,2}/[0-9]{2}$' THEN TO_DATE(data ->> 'communicationDate', 'MM/DD/YY') - WHEN data ->> 'communicationDate' ~ '^[0-9]{1,2}-[0-9]{1,2}-[0-9]{2}$' THEN TO_DATE(data ->> 'communicationDate', 'MM-DD-YY') - WHEN data ->> 'communicationDate' ~ '^[0-9]{1,2}/[0-9]{1,2}//[0-9]{2}$' THEN TO_DATE(regexp_replace(data ->> 'communicationDate', '//', '/'), 'MM/DD/YY') - WHEN data ->> 'communicationDate' ~ '^[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}?[0-9]{1,2}.[0-9]{1,2}$' THEN TO_DATE(LEFT(data ->> 'communicationDate', 10), 'MM/DD/YYYY') - ELSE NULL - END AS "communicationDate", + TO_DATE(data ->> 'communicationDate', 'MM/DD/YYYY') "communicationDate", COALESCE(cl.data ->> 'pocComplete', '') AS "pocComplete", COALESCE(cl.data ->> 'notes', '') AS "notes", COALESCE(( From 75c459b56a87fe9a16a8a789485efb288f2b83b4 Mon Sep 17 00:00:00 2001 From: Nathan Powell Date: Thu, 14 Nov 2024 07:35:52 -0800 Subject: [PATCH 5/6] add more comments per review and use more legible regex --- src/migrations/20241112000000-format-comlog-comdate-history.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/migrations/20241112000000-format-comlog-comdate-history.js b/src/migrations/20241112000000-format-comlog-comdate-history.js index 172ff9163f..230eabbe5c 100644 --- a/src/migrations/20241112000000-format-comlog-comdate-history.js +++ b/src/migrations/20241112000000-format-comlog-comdate-history.js @@ -27,6 +27,7 @@ module.exports = { SELECT id clid, data->>'communicationDate' orig, + -- replace [-. ] seperators (only - has been seen) with / so the subsequent logic always works regexp_replace(data->>'communicationDate','[-. ]','/','g') reseparated FROM "CommunicationLogs" WHERE data->>'communicationDate' !~ '^\\d{2}/\\d{2}/\\d{4}$' @@ -38,6 +39,7 @@ module.exports = { orig, SPLIT_PART(reseparated,'/',1) month_part, SPLIT_PART(reseparated,'/',2) day_part, + -- check where the year part is because sometimes separators between day and year are doubled CASE WHEN LENGTH(SPLIT_PART(reseparated,'/',3)) > 1 THEN SPLIT_PART(reseparated,'/',3) ELSE SPLIT_PART(reseparated,'/',4) @@ -50,6 +52,7 @@ module.exports = { orig, LPAD(month_part,2,'0') padded_month, LPAD(day_part,2,'0') padded_day, + -- pull out only the leftmost 4 characters, but pad them with the century if we only two chars LPAD( LEFT(year_part,4), 4, From 67069b18267a3aaff78690f857f816618e4c9780 Mon Sep 17 00:00:00 2001 From: Nathan Powell Date: Thu, 14 Nov 2024 08:10:07 -0800 Subject: [PATCH 6/6] freshen migration prefix --- ...history.js => 20241114000000-format-comlog-comdate-history.js} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/migrations/{20241112000000-format-comlog-comdate-history.js => 20241114000000-format-comlog-comdate-history.js} (100%) diff --git a/src/migrations/20241112000000-format-comlog-comdate-history.js b/src/migrations/20241114000000-format-comlog-comdate-history.js similarity index 100% rename from src/migrations/20241112000000-format-comlog-comdate-history.js rename to src/migrations/20241114000000-format-comlog-comdate-history.js