Skip to content

Commit

Permalink
Create JSON_readDATA
Browse files Browse the repository at this point in the history
  • Loading branch information
Monika Barget authored Mar 29, 2021
1 parent c1c3fa9 commit 7e44023
Showing 1 changed file with 53 additions and 0 deletions.
53 changes: 53 additions & 0 deletions JSON_readDATA
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
import os.path
from os.path import dirname, join
import json
import re

in_directory=("C:\\Users\\mbarg\\Google Drive\\ACADEMIA\\FeministDH for Susan\\6_JSON files Letters 1916-1923\\OPEN4")
out_directory=("C:\\Users\\mbarg\\Google Drive\\ACADEMIA\\FeministDH for Susan\\7_Transcriptions from JSON files\\\JSON_April27")

items=(os.listdir(in_directory))
for i in items:
filename=os.path.join(in_directory, i)
indata=open(filename,"r", encoding="utf-8", errors="ignore")
jsondata = json.loads(indata.read())
print(type(jsondata))
print(len(jsondata))

for n in range(len(jsondata)):
try:
outdata = (jsondata[n])
if outdata:
letter_content1=outdata.get('element')
if letter_content1:
letter_content2=json.loads(letter_content1)
letter_content3=letter_content2.get('pages')
for r in range(len(letter_content3)):
letter_content4=letter_content3[r]['transcription']

letter_ID=str(i+str(n)+"."+str(r))

if letter_content4:
print(letter_ID, ":", letter_content4)
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, ' ', letter_content4)
print(cleantext)

out_file_name=os.path.join(out_directory, str(letter_ID)+".txt")
out_file=open(out_file_name, "w", encoding="utf-8")

toFile=str(cleantext)
out_file.write(toFile)
out_file.close()
continue
else:
continue
else:
continue
except KeyError:
print("Not found.")
continue



0 comments on commit 7e44023

Please sign in to comment.