-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* DIAC-226 setup majority of scirpts to work. Searches for exported csvs and redacts, transforms and outputs them ready for importing * DIAC-226 updated python scripts * Finished scripts to properly output all wanted files with correct permissions * Added comments to explain how to use script * Adding proper CSV row searcher and try clauses for errors. Also added additonal fields to be redacted * QOL changes * Various refactoring. Created filepath_settings file to manage filepaths and place redacted fields dict, refactored scripts to now use input and output directories * Added dummy files within directories * Added extra fields, wrote first unit test for txt converter and renamed file
- Loading branch information
1 parent
bbc28ba
commit 2b0da41
Showing
11 changed files
with
291 additions
and
110 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import json | ||
|
||
|
||
# Your JSON data (example) 1677498210980054-data-annotated.json | ||
def convert_json_to_txt(file_path): | ||
# Read JSON data from the input file | ||
with open(file_path, "r") as json_file: | ||
json_data = json.load(json_file) | ||
|
||
# Convert JSON data to a compact text representation | ||
text_data = json.dumps(json_data, separators=(',', ':')) | ||
|
||
output_file = file_path.split(".")[0] + ".txt" | ||
|
||
# Write the compact text data to a text file | ||
with open(output_file, "w") as text_file: | ||
text_file.write(text_data) | ||
|
||
|
||
# convert_json_to_txt("event_1.json") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import csv | ||
import json | ||
import os | ||
|
||
from filepath_settings import settings | ||
|
||
|
||
def create_jsons_from_csv(csv_file, events: list[int] = None, output_dir_name_suffix: str = 'latest'): | ||
with open(csv_file, 'r') as file: | ||
csv_reader = csv.DictReader(file) | ||
events_counter = 1 | ||
dir_name = make_output_dir(output_dir_name_suffix) | ||
for row in csv_reader: | ||
if 'data' in row and events_counter in events: | ||
data = json.loads(row['data']) | ||
filename = f"event_{events_counter}.json" | ||
full_filepath = os.path.join(dir_name, filename) | ||
with open(full_filepath, 'w') as json_file: | ||
json.dump(data, json_file, indent=2) | ||
os.chmod(full_filepath, 0o777) | ||
events_counter += 1 | ||
|
||
|
||
def make_output_dir(case_name: str) -> str: | ||
directory_name = f'output_jsons_{case_name}' | ||
full_filepath = os.path.join(settings.output_json_directory, directory_name) | ||
if not os.path.exists(full_filepath): | ||
os.makedirs(full_filepath) | ||
os.chmod(full_filepath, 0o777) | ||
return full_filepath | ||
|
||
# create_jsons_from_csv('case_event_202402071630.csv', events=list(range(1, 7)), output_dir_name_prefix='5405') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
import os | ||
|
||
PROJECT_DIR = os.path.dirname(__file__) | ||
PYTHON_SCRIPTS_DIR = os.path.join(PROJECT_DIR) | ||
EXPORTED_CSV_INPUT_DIR = os.path.join(PYTHON_SCRIPTS_DIR, 'input_csv_files') | ||
OUTPUT_CSV_DIR = os.path.join(PYTHON_SCRIPTS_DIR, "output_csv_files") | ||
OUTPUT_JSON_DIRECTORY = os.path.join(PYTHON_SCRIPTS_DIR, "output_jsons") | ||
|
||
|
||
class Settings: | ||
project_dir = PROJECT_DIR | ||
scripts_dir = PYTHON_SCRIPTS_DIR | ||
exported_csv_dir = EXPORTED_CSV_INPUT_DIR | ||
output_csv_dir = OUTPUT_CSV_DIR | ||
output_json_directory = OUTPUT_JSON_DIRECTORY | ||
|
||
replace_mapping_dict = { | ||
'email': '[email protected]', | ||
'postcode': 'N11 1yz', | ||
'dateofbirth': '1980-01-01', | ||
'document_filename': 'redacted.pdf', | ||
'document_url': 'http://dm-store-aat.service.core-compute-aat.internal/documents/9ce3f9d5-31ef-4021-9aa5-4d017c404cfe', | ||
'document_binary_url': 'http://dm-store-aat.service.core-compute-aat.internal/documents/9ce3f9d5-31ef-4021-9aa5-4d017c404cfe/binary', | ||
'name': 'redacted.pdf', | ||
'filename': 'redacted.pdf', | ||
'addressline1': '10 street', | ||
'addressline2': 'town', | ||
'addressline3': 'city', | ||
'attendingjudge': 'redacted', | ||
'feedescription': 'redacted', | ||
'searchpostcode': 'n11 1yz', | ||
'posttown': 'town', | ||
'details': 'redacted', | ||
'decisionreason': 'redacted', | ||
'appellantfamilyname': 'redacted', | ||
'appellantgivennames': 'redacted', | ||
'appellantdateofbirth': '1980-01-01', | ||
'appellantemailaddress': '[email protected]', | ||
'casenotedescription': 'redacted', | ||
'casenotesubject': 'redacted', | ||
'user': 'redacted', | ||
'question': 'redacted', | ||
'explanation': 'redacted', | ||
'address': 'redacted', | ||
'bundlefilenameprefix': 'EA 50111 2023', | ||
'casenamehmctsinternal': 'redacted', | ||
'hmctscasenameinternal': 'redacted', | ||
'appellantnamefordisplay': 'redacted', | ||
'reasonsforappealdecision': 'redacted', | ||
'description': 'redacted', | ||
'answer': 'redacted', | ||
"legalrepname": "redacted", | ||
"mobilenumber": "07451111111", | ||
"legalrepcompany": "redacted", | ||
"partyname": "redacted", | ||
"legalrepcompanyname": "redacted", | ||
"county": "redacted", | ||
"legalrepresentativename": "redacted", | ||
"legalrepreferencenumber": "AA/1234", | ||
"directioneditexplanation": 'redacted', | ||
"fullname": "redacted", | ||
"dayofbirth": 1, | ||
"familyname": "redacted", | ||
"yearofbirth": 1980, | ||
"monthofbirth": 1, | ||
"displaydateofbirth": "10 Jan 1980", | ||
"documentreference": "012345678", | ||
"displayappellantdetailstitle": "redacted", | ||
"displayapplicationdetailstitle": "redacted", | ||
"homeofficesearchresponse": 'redacted', | ||
"remotevideocalldescription": 'redacted', | ||
"hearingdaterangedescription": 'redacted', | ||
"interpreterlanguagereadonly": "Language\t\tEnglish\nDialect\t\t\tENG", | ||
"legalrepresentativeemailaddress": "[email protected]", | ||
"remotevideocalltribunalresponse": "redacted", | ||
"appellantnationalitiesdescription": "France", | ||
"language": "English", | ||
"languagedialect": "redacted", | ||
"legalaidaccountnumber": "OG123V1", | ||
"appellantphonenumber": "07451111111", | ||
"givenname": "redacted", | ||
"data": "redacted", | ||
"witnessname": "redacted", | ||
"witnessdetailsreadonly": "redacted", | ||
"multimediaTribunalResponse": "redacted", | ||
"appellantfullname": "redacted", | ||
"endappealapprovername": "redacted", | ||
"dateToAvoidReason": "redacted", | ||
"sponsorNameForDisplay": "redacted", | ||
"sponsorAddressForDisplay": "10 street", | ||
"sponsorMobileNumber": "07451111111", | ||
"Name": "redacted", | ||
"attendingAppellant": "redacted", | ||
"attendingHomeOfficeLegalRepresentative": "redacted", | ||
"attendingAppellantsLegalRepresentative": "redacted", | ||
"newMatters": "redacted" | ||
} | ||
|
||
csv_rows_to_redact = { | ||
"user_first_name": "redacted", | ||
"user_last_name": "redacted" | ||
} | ||
|
||
|
||
settings = Settings() |
Empty file.
Empty file.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import os | ||
import csv | ||
|
||
from filepath_settings import settings | ||
from create_jsons_from_event_csv import create_jsons_from_csv | ||
from redact_info_from_json import redact_values_from_csv | ||
|
||
|
||
def prep_import_data(directory: str = settings.exported_csv_dir, events_to_get_individual_json: list[int] = None): | ||
""" | ||
Function to prep exported CSV data for importing. Redacts and transforms most recent exported files within | ||
python_scripts directory. | ||
Exported files should be in format: case_event_202402080516.csv and case_data_202402080518.csv | ||
(default export pattern) | ||
""" | ||
latest_case_event_data = get_latest_file(directory, 'case_event') | ||
latest_case_data = get_latest_file(directory, 'case_data') | ||
if events_to_get_individual_json: | ||
create_jsons_from_csv(latest_case_event_data, events=events_to_get_individual_json) | ||
redacted_case_data = redact_values_from_csv(latest_case_data) | ||
os.chmod(redacted_case_data, 0o777) | ||
case_data_id = input( | ||
'Import the redacted case data CSV and retrieve the correct case id.\nEnter the new case data id:')\ | ||
.encode('utf-8').decode('utf-8') | ||
redacted_case_event_data = redact_values_from_csv(latest_case_event_data) | ||
redacted_replaced_event_data = replace_case_data_id(case_data_id, redacted_case_event_data) | ||
os.chmod(redacted_replaced_event_data, 0o777) | ||
|
||
|
||
def get_latest_file(dir_path: str, file_prefix: str) -> str: | ||
dir_files = os.listdir(dir_path) | ||
files = [file for file in dir_files if file_prefix in file] | ||
times = [] | ||
for file in files: | ||
try: | ||
file = file.split('.')[0] | ||
times.append(int(file[-12:])) | ||
except Exception as e: | ||
print(e) | ||
pass | ||
latest = max(times) | ||
filename = f"{file_prefix}_{latest}.csv" | ||
full_filepath = os.path.join(dir_path, filename) | ||
return full_filepath | ||
|
||
|
||
def replace_case_data_id(new_id: str, file_path: str): | ||
# Read the CSV file and create a list of dictionaries | ||
with open(file_path, 'r') as file: | ||
reader = csv.DictReader(file) | ||
data_list = list(reader) | ||
|
||
# Replace values in the 'case_data_id' column with the new_id | ||
for row in data_list: | ||
if 'case_data_id' in row: | ||
row['case_data_id'] = new_id | ||
|
||
# Write the modified data back to the CSV file | ||
with open(file_path, 'w', newline='') as file: | ||
fieldnames = reader.fieldnames | ||
writer = csv.DictWriter(file, fieldnames=fieldnames) | ||
writer.writeheader() | ||
writer.writerows(data_list) | ||
return file_path | ||
|
||
|
||
prep_import_data(events_to_get_individual_json=range(1,13)) |
Oops, something went wrong.