Skip to content

Commit

Permalink
DIAC-232 python scripts (#1954)
Browse files Browse the repository at this point in the history
* DIAC-226 setup majority of scirpts to work. Searches for exported csvs and redacts, transforms and outputs them ready for importing

* DIAC-226 updated python scripts

* Finished scripts to properly output all wanted files with correct permissions

* Added comments to explain how to use script

* Adding proper CSV row searcher and try clauses for errors. Also added additonal fields to be redacted

* QOL changes

* Various refactoring. Created filepath_settings file to manage filepaths and place redacted fields dict, refactored scripts to now use input and output directories

* Added dummy files within directories

* Added extra fields, wrote first unit test for txt converter and renamed file
  • Loading branch information
JakeCohenSol authored Feb 12, 2024
1 parent bbc28ba commit 2b0da41
Show file tree
Hide file tree
Showing 11 changed files with 291 additions and 110 deletions.
18 changes: 0 additions & 18 deletions bin/utils/python_scripts/convert-json-to-txt.py

This file was deleted.

20 changes: 20 additions & 0 deletions bin/utils/python_scripts/convert_json_to_txt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import json


# Your JSON data (example) 1677498210980054-data-annotated.json
def convert_json_to_txt(file_path):
# Read JSON data from the input file
with open(file_path, "r") as json_file:
json_data = json.load(json_file)

# Convert JSON data to a compact text representation
text_data = json.dumps(json_data, separators=(',', ':'))

output_file = file_path.split(".")[0] + ".txt"

# Write the compact text data to a text file
with open(output_file, "w") as text_file:
text_file.write(text_data)


# convert_json_to_txt("event_1.json")
32 changes: 32 additions & 0 deletions bin/utils/python_scripts/create_jsons_from_event_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import csv
import json
import os

from filepath_settings import settings


def create_jsons_from_csv(csv_file, events: list[int] = None, output_dir_name_suffix: str = 'latest'):
with open(csv_file, 'r') as file:
csv_reader = csv.DictReader(file)
events_counter = 1
dir_name = make_output_dir(output_dir_name_suffix)
for row in csv_reader:
if 'data' in row and events_counter in events:
data = json.loads(row['data'])
filename = f"event_{events_counter}.json"
full_filepath = os.path.join(dir_name, filename)
with open(full_filepath, 'w') as json_file:
json.dump(data, json_file, indent=2)
os.chmod(full_filepath, 0o777)
events_counter += 1


def make_output_dir(case_name: str) -> str:
directory_name = f'output_jsons_{case_name}'
full_filepath = os.path.join(settings.output_json_directory, directory_name)
if not os.path.exists(full_filepath):
os.makedirs(full_filepath)
os.chmod(full_filepath, 0o777)
return full_filepath

# create_jsons_from_csv('case_event_202402071630.csv', events=list(range(1, 7)), output_dir_name_prefix='5405')
105 changes: 105 additions & 0 deletions bin/utils/python_scripts/filepath_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import os

PROJECT_DIR = os.path.dirname(__file__)
PYTHON_SCRIPTS_DIR = os.path.join(PROJECT_DIR)
EXPORTED_CSV_INPUT_DIR = os.path.join(PYTHON_SCRIPTS_DIR, 'input_csv_files')
OUTPUT_CSV_DIR = os.path.join(PYTHON_SCRIPTS_DIR, "output_csv_files")
OUTPUT_JSON_DIRECTORY = os.path.join(PYTHON_SCRIPTS_DIR, "output_jsons")


class Settings:
project_dir = PROJECT_DIR
scripts_dir = PYTHON_SCRIPTS_DIR
exported_csv_dir = EXPORTED_CSV_INPUT_DIR
output_csv_dir = OUTPUT_CSV_DIR
output_json_directory = OUTPUT_JSON_DIRECTORY

replace_mapping_dict = {
'email': '[email protected]',
'postcode': 'N11 1yz',
'dateofbirth': '1980-01-01',
'document_filename': 'redacted.pdf',
'document_url': 'http://dm-store-aat.service.core-compute-aat.internal/documents/9ce3f9d5-31ef-4021-9aa5-4d017c404cfe',
'document_binary_url': 'http://dm-store-aat.service.core-compute-aat.internal/documents/9ce3f9d5-31ef-4021-9aa5-4d017c404cfe/binary',
'name': 'redacted.pdf',
'filename': 'redacted.pdf',
'addressline1': '10 street',
'addressline2': 'town',
'addressline3': 'city',
'attendingjudge': 'redacted',
'feedescription': 'redacted',
'searchpostcode': 'n11 1yz',
'posttown': 'town',
'details': 'redacted',
'decisionreason': 'redacted',
'appellantfamilyname': 'redacted',
'appellantgivennames': 'redacted',
'appellantdateofbirth': '1980-01-01',
'appellantemailaddress': '[email protected]',
'casenotedescription': 'redacted',
'casenotesubject': 'redacted',
'user': 'redacted',
'question': 'redacted',
'explanation': 'redacted',
'address': 'redacted',
'bundlefilenameprefix': 'EA 50111 2023',
'casenamehmctsinternal': 'redacted',
'hmctscasenameinternal': 'redacted',
'appellantnamefordisplay': 'redacted',
'reasonsforappealdecision': 'redacted',
'description': 'redacted',
'answer': 'redacted',
"legalrepname": "redacted",
"mobilenumber": "07451111111",
"legalrepcompany": "redacted",
"partyname": "redacted",
"legalrepcompanyname": "redacted",
"county": "redacted",
"legalrepresentativename": "redacted",
"legalrepreferencenumber": "AA/1234",
"directioneditexplanation": 'redacted',
"fullname": "redacted",
"dayofbirth": 1,
"familyname": "redacted",
"yearofbirth": 1980,
"monthofbirth": 1,
"displaydateofbirth": "10 Jan 1980",
"documentreference": "012345678",
"displayappellantdetailstitle": "redacted",
"displayapplicationdetailstitle": "redacted",
"homeofficesearchresponse": 'redacted',
"remotevideocalldescription": 'redacted',
"hearingdaterangedescription": 'redacted',
"interpreterlanguagereadonly": "Language\t\tEnglish\nDialect\t\t\tENG",
"legalrepresentativeemailaddress": "[email protected]",
"remotevideocalltribunalresponse": "redacted",
"appellantnationalitiesdescription": "France",
"language": "English",
"languagedialect": "redacted",
"legalaidaccountnumber": "OG123V1",
"appellantphonenumber": "07451111111",
"givenname": "redacted",
"data": "redacted",
"witnessname": "redacted",
"witnessdetailsreadonly": "redacted",
"multimediaTribunalResponse": "redacted",
"appellantfullname": "redacted",
"endappealapprovername": "redacted",
"dateToAvoidReason": "redacted",
"sponsorNameForDisplay": "redacted",
"sponsorAddressForDisplay": "10 street",
"sponsorMobileNumber": "07451111111",
"Name": "redacted",
"attendingAppellant": "redacted",
"attendingHomeOfficeLegalRepresentative": "redacted",
"attendingAppellantsLegalRepresentative": "redacted",
"newMatters": "redacted"
}

csv_rows_to_redact = {
"user_first_name": "redacted",
"user_last_name": "redacted"
}


settings = Settings()
Empty file.
Empty file.
Empty file.
Empty file.
68 changes: 68 additions & 0 deletions bin/utils/python_scripts/prep_import_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import os
import csv

from filepath_settings import settings
from create_jsons_from_event_csv import create_jsons_from_csv
from redact_info_from_json import redact_values_from_csv


def prep_import_data(directory: str = settings.exported_csv_dir, events_to_get_individual_json: list[int] = None):
"""
Function to prep exported CSV data for importing. Redacts and transforms most recent exported files within
python_scripts directory.
Exported files should be in format: case_event_202402080516.csv and case_data_202402080518.csv
(default export pattern)
"""
latest_case_event_data = get_latest_file(directory, 'case_event')
latest_case_data = get_latest_file(directory, 'case_data')
if events_to_get_individual_json:
create_jsons_from_csv(latest_case_event_data, events=events_to_get_individual_json)
redacted_case_data = redact_values_from_csv(latest_case_data)
os.chmod(redacted_case_data, 0o777)
case_data_id = input(
'Import the redacted case data CSV and retrieve the correct case id.\nEnter the new case data id:')\
.encode('utf-8').decode('utf-8')
redacted_case_event_data = redact_values_from_csv(latest_case_event_data)
redacted_replaced_event_data = replace_case_data_id(case_data_id, redacted_case_event_data)
os.chmod(redacted_replaced_event_data, 0o777)


def get_latest_file(dir_path: str, file_prefix: str) -> str:
dir_files = os.listdir(dir_path)
files = [file for file in dir_files if file_prefix in file]
times = []
for file in files:
try:
file = file.split('.')[0]
times.append(int(file[-12:]))
except Exception as e:
print(e)
pass
latest = max(times)
filename = f"{file_prefix}_{latest}.csv"
full_filepath = os.path.join(dir_path, filename)
return full_filepath


def replace_case_data_id(new_id: str, file_path: str):
# Read the CSV file and create a list of dictionaries
with open(file_path, 'r') as file:
reader = csv.DictReader(file)
data_list = list(reader)

# Replace values in the 'case_data_id' column with the new_id
for row in data_list:
if 'case_data_id' in row:
row['case_data_id'] = new_id

# Write the modified data back to the CSV file
with open(file_path, 'w', newline='') as file:
fieldnames = reader.fieldnames
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data_list)
return file_path


prep_import_data(events_to_get_individual_json=range(1,13))
Loading

0 comments on commit 2b0da41

Please sign in to comment.