DIAC-232 python scripts (#1954)

* DIAC-226 setup majority of scirpts to work. Searches for exported csvs and redacts, transforms and outputs them ready for importing * DIAC-226 updated python scripts * Finished scripts to properly output all wanted files with correct permissions * Added comments to explain how to use script * Adding proper CSV row searcher and try clauses for errors. Also added additonal fields to be redacted * QOL changes * Various refactoring. Created filepath_settings file to manage filepaths and place redacted fields dict, refactored scripts to now use input and output directories * Added dummy files within directories * Added extra fields, wrote first unit test for txt converter and renamed file
hmcts · Feb 12, 2024 · 2b0da41 · 2b0da41
1 parent bbc28ba
commit 2b0da41
Show file tree

Hide file tree

Showing 11 changed files with 291 additions and 110 deletions.
diff --git a/bin/utils/python_scripts/convert-json-to-txt.py b/bin/utils/python_scripts/convert-json-to-txt.py
diff --git a/bin/utils/python_scripts/convert_json_to_txt.py b/bin/utils/python_scripts/convert_json_to_txt.py
@@ -0,0 +1,20 @@
+import json
+
+
+# Your JSON data (example) 1677498210980054-data-annotated.json
+def convert_json_to_txt(file_path):
+    # Read JSON data from the input file
+    with open(file_path, "r") as json_file:
+        json_data = json.load(json_file)
+
+    # Convert JSON data to a compact text representation
+    text_data = json.dumps(json_data, separators=(',', ':'))
+
+    output_file = file_path.split(".")[0] + ".txt"
+
+    # Write the compact text data to a text file
+    with open(output_file, "w") as text_file:
+        text_file.write(text_data)
+
+
+# convert_json_to_txt("event_1.json")
diff --git a/bin/utils/python_scripts/create_jsons_from_event_csv.py b/bin/utils/python_scripts/create_jsons_from_event_csv.py
@@ -0,0 +1,32 @@
+import csv
+import json
+import os
+
+from filepath_settings import settings
+
+
+def create_jsons_from_csv(csv_file, events: list[int] = None, output_dir_name_suffix: str = 'latest'):
+    with open(csv_file, 'r') as file:
+        csv_reader = csv.DictReader(file)
+        events_counter = 1
+        dir_name = make_output_dir(output_dir_name_suffix)
+        for row in csv_reader:
+            if 'data' in row and events_counter in events:
+                data = json.loads(row['data'])
+                filename = f"event_{events_counter}.json"
+                full_filepath = os.path.join(dir_name, filename)
+                with open(full_filepath, 'w') as json_file:
+                    json.dump(data, json_file, indent=2)
+                os.chmod(full_filepath, 0o777)
+            events_counter += 1
+
+
+def make_output_dir(case_name: str) -> str:
+    directory_name = f'output_jsons_{case_name}'
+    full_filepath = os.path.join(settings.output_json_directory, directory_name)
+    if not os.path.exists(full_filepath):
+        os.makedirs(full_filepath)
+    os.chmod(full_filepath, 0o777)
+    return full_filepath
+
+# create_jsons_from_csv('case_event_202402071630.csv', events=list(range(1, 7)), output_dir_name_prefix='5405')
diff --git a/bin/utils/python_scripts/filepath_settings.py b/bin/utils/python_scripts/filepath_settings.py
@@ -0,0 +1,105 @@
+import os
+
+PROJECT_DIR = os.path.dirname(__file__)
+PYTHON_SCRIPTS_DIR = os.path.join(PROJECT_DIR)
+EXPORTED_CSV_INPUT_DIR = os.path.join(PYTHON_SCRIPTS_DIR, 'input_csv_files')
+OUTPUT_CSV_DIR = os.path.join(PYTHON_SCRIPTS_DIR, "output_csv_files")
+OUTPUT_JSON_DIRECTORY = os.path.join(PYTHON_SCRIPTS_DIR, "output_jsons")
+
+
+class Settings:
+    project_dir = PROJECT_DIR
+    scripts_dir = PYTHON_SCRIPTS_DIR
+    exported_csv_dir = EXPORTED_CSV_INPUT_DIR
+    output_csv_dir = OUTPUT_CSV_DIR
+    output_json_directory = OUTPUT_JSON_DIRECTORY
+
+    replace_mapping_dict = {
+        'email': '[email protected]',
+        'postcode': 'N11 1yz',
+        'dateofbirth': '1980-01-01',
+        'document_filename': 'redacted.pdf',
+        'document_url': 'http://dm-store-aat.service.core-compute-aat.internal/documents/9ce3f9d5-31ef-4021-9aa5-4d017c404cfe',
+        'document_binary_url': 'http://dm-store-aat.service.core-compute-aat.internal/documents/9ce3f9d5-31ef-4021-9aa5-4d017c404cfe/binary',
+        'name': 'redacted.pdf',
+        'filename': 'redacted.pdf',
+        'addressline1': '10 street',
+        'addressline2': 'town',
+        'addressline3': 'city',
+        'attendingjudge': 'redacted',
+        'feedescription': 'redacted',
+        'searchpostcode': 'n11 1yz',
+        'posttown': 'town',
+        'details': 'redacted',
+        'decisionreason': 'redacted',
+        'appellantfamilyname': 'redacted',
+        'appellantgivennames': 'redacted',
+        'appellantdateofbirth': '1980-01-01',
+        'appellantemailaddress': '[email protected]',
+        'casenotedescription': 'redacted',
+        'casenotesubject': 'redacted',
+        'user': 'redacted',
+        'question': 'redacted',
+        'explanation': 'redacted',
+        'address': 'redacted',
+        'bundlefilenameprefix': 'EA 50111 2023',
+        'casenamehmctsinternal': 'redacted',
+        'hmctscasenameinternal': 'redacted',
+        'appellantnamefordisplay': 'redacted',
+        'reasonsforappealdecision': 'redacted',
+        'description': 'redacted',
+        'answer': 'redacted',
+        "legalrepname": "redacted",
+        "mobilenumber": "07451111111",
+        "legalrepcompany": "redacted",
+        "partyname": "redacted",
+        "legalrepcompanyname": "redacted",
+        "county": "redacted",
+        "legalrepresentativename": "redacted",
+        "legalrepreferencenumber": "AA/1234",
+        "directioneditexplanation": 'redacted',
+        "fullname": "redacted",
+        "dayofbirth": 1,
+        "familyname": "redacted",
+        "yearofbirth": 1980,
+        "monthofbirth": 1,
+        "displaydateofbirth": "10 Jan 1980",
+        "documentreference": "012345678",
+        "displayappellantdetailstitle": "redacted",
+        "displayapplicationdetailstitle": "redacted",
+        "homeofficesearchresponse": 'redacted',
+        "remotevideocalldescription": 'redacted',
+        "hearingdaterangedescription": 'redacted',
+        "interpreterlanguagereadonly": "Language\t\tEnglish\nDialect\t\t\tENG",
+        "legalrepresentativeemailaddress": "[email protected]",
+        "remotevideocalltribunalresponse": "redacted",
+        "appellantnationalitiesdescription": "France",
+        "language": "English",
+        "languagedialect": "redacted",
+        "legalaidaccountnumber": "OG123V1",
+        "appellantphonenumber": "07451111111",
+        "givenname": "redacted",
+        "data": "redacted",
+        "witnessname": "redacted",
+        "witnessdetailsreadonly": "redacted",
+        "multimediaTribunalResponse": "redacted",
+        "appellantfullname": "redacted",
+        "endappealapprovername": "redacted",
+        "dateToAvoidReason": "redacted",
+        "sponsorNameForDisplay": "redacted",
+        "sponsorAddressForDisplay": "10 street",
+        "sponsorMobileNumber": "07451111111",
+        "Name": "redacted",
+        "attendingAppellant": "redacted",
+        "attendingHomeOfficeLegalRepresentative": "redacted",
+        "attendingAppellantsLegalRepresentative": "redacted",
+        "newMatters": "redacted"
+    }
+
+    csv_rows_to_redact = {
+        "user_first_name": "redacted",
+        "user_last_name": "redacted"
+    }
+
+
+settings = Settings()
diff --git a/bin/utils/python_scripts/init.py b/bin/utils/python_scripts/init.py
diff --git a/bin/utils/python_scripts/input_csv_files/dummy.txt b/bin/utils/python_scripts/input_csv_files/dummy.txt
diff --git a/bin/utils/python_scripts/output_csv_files/dummy.txt b/bin/utils/python_scripts/output_csv_files/dummy.txt
diff --git a/bin/utils/python_scripts/output_jsons/dummy.txt b/bin/utils/python_scripts/output_jsons/dummy.txt
diff --git a/bin/utils/python_scripts/prep_import_data.py b/bin/utils/python_scripts/prep_import_data.py
@@ -0,0 +1,68 @@
+import os
+import csv
+
+from filepath_settings import settings
+from create_jsons_from_event_csv import create_jsons_from_csv
+from redact_info_from_json import redact_values_from_csv
+
+
+def prep_import_data(directory: str = settings.exported_csv_dir, events_to_get_individual_json: list[int] = None):
+    """
+    Function to prep exported CSV data for importing. Redacts and transforms most recent exported files within
+    python_scripts directory.
+
+    Exported files should be in format: case_event_202402080516.csv and case_data_202402080518.csv
+    (default export pattern)
+    """
+    latest_case_event_data = get_latest_file(directory, 'case_event')
+    latest_case_data = get_latest_file(directory, 'case_data')
+    if events_to_get_individual_json:
+        create_jsons_from_csv(latest_case_event_data, events=events_to_get_individual_json)
+    redacted_case_data = redact_values_from_csv(latest_case_data)
+    os.chmod(redacted_case_data, 0o777)
+    case_data_id = input(
+        'Import the redacted case data CSV and retrieve the correct case id.\nEnter the new case data id:')\
+        .encode('utf-8').decode('utf-8')
+    redacted_case_event_data = redact_values_from_csv(latest_case_event_data)
+    redacted_replaced_event_data = replace_case_data_id(case_data_id, redacted_case_event_data)
+    os.chmod(redacted_replaced_event_data, 0o777)
+
+
+def get_latest_file(dir_path: str, file_prefix: str) -> str:
+    dir_files = os.listdir(dir_path)
+    files = [file for file in dir_files if file_prefix in file]
+    times = []
+    for file in files:
+        try:
+            file = file.split('.')[0]
+            times.append(int(file[-12:]))
+        except Exception as e:
+            print(e)
+            pass
+    latest = max(times)
+    filename = f"{file_prefix}_{latest}.csv"
+    full_filepath = os.path.join(dir_path, filename)
+    return full_filepath
+
+
+def replace_case_data_id(new_id: str, file_path: str):
+    # Read the CSV file and create a list of dictionaries
+    with open(file_path, 'r') as file:
+        reader = csv.DictReader(file)
+        data_list = list(reader)
+
+    # Replace values in the 'case_data_id' column with the new_id
+    for row in data_list:
+        if 'case_data_id' in row:
+            row['case_data_id'] = new_id
+
+    # Write the modified data back to the CSV file
+    with open(file_path, 'w', newline='') as file:
+        fieldnames = reader.fieldnames
+        writer = csv.DictWriter(file, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(data_list)
+    return file_path
+
+
+prep_import_data(events_to_get_individual_json=range(1,13))