diff --git a/Jenkinsfile b/Jenkinsfile index 1aceeff..61c0e11 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -36,7 +36,7 @@ pipeline{ } } stage('Deploy UAT'){ - when { branch 'master' } + when { branch 'main' } steps { deployImage16("${VERSION}", "uat") } diff --git a/config.py b/config.py index e264569..0adf3e6 100644 --- a/config.py +++ b/config.py @@ -1,59 +1,83 @@ -""" -Flask settings for fecfile-ImageGenerator project. - -""" - -import os -import tempfile - -# Set the temporary directory -tempfile.tempdir = "temp" - -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) - -BASE_DIR = os.path.dirname(os.path.dirname(__file__)) -DEBUG = os.environ.get("DEBUG", True) -ALLOWED_HOSTS = ["*"] - - -FORM_TEMPLATES_LOCATION = 'templates/forms/{}.pdf' -HTML_FORM_TEMPLATES_LOCATION = 'templates/forms/F99/{}.html' -FORMS_LOCATION = 'templates/forms/{}' -REQUEST_FILE_LOCATION = 'temp/json/{}.json' -# OUTPUT_FILE_LOCATION = 'output/pdf/{}.pdf' -OUTPUT_DIR_LOCATION = "output/pdf/{}/" - -# ATTACHMENT_FILE_LOCATION = 'temp/{}.pdf' - -# AWS settings - -# AWS SES Configuration Settings - -# AWS_ACCESS_KEY_ID = os.environ.get('ACCESS_KEY', None) -# AWS_SECRET_ACCESS_KEY = os.environ.get('SECRET_KEY', None) -# AWS_HOST_NAME = 'us-east-1' -# AWS_REGION = 'us-east-1' -AWS_SES_AUTO_THROTTLE = ( - 0.5 -) # (default; safety factor applied to rate limit, turn off automatic throttling, set this to None) - -# AWS FECFile components bucket name -AWS_FECFILE_COMPONENTS_BUCKET_NAME = "fecfile-dev-components" - -# if False it will create unique file names for every uploaded file -AWS_S3_FILE_OVERWRITE = True -# the url, that your uploaded JSON and print output will be available at -AWS_S3_FECFILE_COMPONENTS_DOMAIN = ( - "%s.s3.amazonaws.com" % AWS_FECFILE_COMPONENTS_BUCKET_NAME -) - -# the sub-directories of temp and output files -# TEMP_FILES_LOCATION = 'temp' -# OUTPUT_FILE_FOLDER = 'output' - -# TEMP_FILES_URL = "https://%s/%s/{}" % (AWS_S3_FECFILE_COMPONENTS_DOMAIN, TEMP_FILES_LOCATION) -PRINT_OUTPUT_FILE_URL = "https://%s/%s" % (AWS_S3_FECFILE_COMPONENTS_DOMAIN, OUTPUT_DIR_LOCATION) - -FECFILE_UTIL_PRINT_API_URL = os.environ.get('FECFILE_UTIL_URL', 'https://dev-efile-api.efdev.fec.gov/printpdf') -FECFILE_UTIL_API_VERSION = "/v1/fecfileutil" - +""" +Flask settings for fecfile-ImageGenerator project. + +""" + +import os +import tempfile + +# Set the temporary directory +tempfile.tempdir = "temp" + +# Build paths inside the project like this: os.path.join(BASE_DIR, ...) + +BASE_DIR = os.path.dirname(os.path.dirname(__file__)) +DEBUG = os.environ.get("DEBUG", True) +ALLOWED_HOSTS = ["*"] + + +FORM_TEMPLATES_LOCATION = 'templates/forms/{}.pdf' +HTML_FORM_TEMPLATES_LOCATION = 'templates/forms/F99/{}.html' +FORMS_LOCATION = 'templates/forms/{}' +REQUEST_FILE_LOCATION = 'temp/json/{}.json' +# OUTPUT_FILE_LOCATION = 'output/pdf/{}.pdf' +OUTPUT_DIR_LOCATION = "output/pdf/{}/" + +# ATTACHMENT_FILE_LOCATION = 'temp/{}.pdf' + +# AWS settings + +# AWS SES Configuration Settings + +# AWS_ACCESS_KEY_ID = os.environ.get('ACCESS_KEY', None) +# AWS_SECRET_ACCESS_KEY = os.environ.get('SECRET_KEY', None) +# AWS_HOST_NAME = 'us-east-1' +# AWS_REGION = 'us-east-1' +AWS_SES_AUTO_THROTTLE = ( + 0.5 +) # (default; safety factor applied to rate limit, turn off automatic throttling, set this to None) + +# AWS FECFile components bucket name + +AWS_FECFILE_OUTPUT_DIRECTORY = os.environ.get('OUTPUT_DIRECTORY', 'output') +AWS_FECFILE_COMPONENTS_BUCKET_NAME = "fecfile-dev-components" +AWS_FECFILE_TEMP_BUCKET_NAME = os.environ.get('TEMP_BUCKET', 'dev-efile-repo') +AWS_FECFILE_PDF_BUCKET_NAME = os.environ.get('PERM_BUCKET', 'fecfile-pdf') + + +# if False it will create unique file names for every uploaded file +AWS_S3_FILE_OVERWRITE = True +# the url, that your uploaded JSON and print output will be available at +AWS_S3_FECFILE_COMPONENTS_DOMAIN = ( + "%s.s3.amazonaws.com" % AWS_FECFILE_COMPONENTS_BUCKET_NAME +) +# the url, that your uploaded JSON and print output will be available at +AWS_S3_PAGINATION_COMPONENTS_DOMAIN = ( + "%s.s3.amazonaws.com" % AWS_FECFILE_TEMP_BUCKET_NAME +) + +S3_FILE_URL = "https://%s/%s/" % (AWS_S3_FECFILE_COMPONENTS_DOMAIN, AWS_FECFILE_OUTPUT_DIRECTORY) + +# the sub-directories of temp and output files +# TEMP_FILES_LOCATION = 'temp' +# OUTPUT_FILE_FOLDER = 'output' + +# TEMP_FILES_URL = "https://%s/%s/{}" % (AWS_S3_FECFILE_COMPONENTS_DOMAIN, TEMP_FILES_LOCATION) +PRINT_OUTPUT_FILE_URL = "https://%s/%s" % (AWS_S3_FECFILE_COMPONENTS_DOMAIN, OUTPUT_DIR_LOCATION) + +FECFILE_UTIL_PRINT_API_URL = os.environ.get('FECFILE_UTIL_URL', 'https://dev-efile-api.efdev.fec.gov/printpdf') +FECFILE_UTIL_API_VERSION = "/v1/fecfileutil" + +NXG_FEC_PARSER_API_URL = os.environ.get('PARSER_URL', 'https://dev-efile-api.efdev.fec.gov/receiver') +# NXG_FEC_PARSER_API_URL = os.environ.get('PARSER_URL', 'http://localhost:8090') +NXG_FEC_PARSER_API_VERSION = "/v1" + + +# SQS Details +DATA_RECEIVER_SQS_QUEUE = os.environ.get('DATA_RECEIVER_SQS_QUEUE', 'datareceiver-dev') +VALIDATION_SQS_QUEUE = os.environ.get('VALIDATION_SQS_QUEUE', 'validator-dev') +IMAGE_NUMBER_SQS_QUEUE = os.environ.get('IMAGE_NUMBER_SQS_QUEUE', 'imaging') +# IMAGE_NUMBER_SQS_QUEUE = os.environ.get('IMAGE_NUMBER_SQS_QUEUE', 'validator-dev') +IMAGE_GENERATOR_SQS_QUEUE = os.environ.get('IMAGE_GENERATOR_SQS_QUEUE', 'imaging-generator') +# Timeout for the message in queue, after the set time interval other process will see it +MESSAGE_VISIBILITY_TIMEOUT = 30 # setting it to 0 for testing diff --git a/routes/src/common.py b/routes/src/common.py index 20d690d..aad9260 100644 --- a/routes/src/common.py +++ b/routes/src/common.py @@ -11,7 +11,7 @@ LOGGER = logging.getLogger() -def get_return_envelope(success="true", message="", data=None): +def get_return_envelope(success=True, message="", data=None): """ Builds and returns a 'return envelope' :param str success: Should be 'true' or 'false' diff --git a/routes/src/controllers.py b/routes/src/controllers.py index b4532f9..e9e1f78 100644 --- a/routes/src/controllers.py +++ b/routes/src/controllers.py @@ -1,10 +1,16 @@ import logging import flask +import boto3 +import config as cfg +import urllib.request +import json +import requests from flask import request from flask_cors import CORS -from routes.src import tmoflask, form99, form1m, form24 +from routes.src import tmoflask, form99, form1m, form24, common from routes.src.f3x import form3x +from flask_api import status logger = logging.getLogger() @@ -41,46 +47,78 @@ def print_pdf(): """ form_type = request.form["form_type"] if form_type == "F99": - return form99.print_f99_pdftk_html("") + return form99.print_f99_pdftk_html() elif form_type == "F3X": - return form3x.print_pdftk("") - # return form3x_copy.print_pdftk('') + return form3x.print_pdftk() elif form_type == "F1M": - return form1m.print_pdftk("") + return form1m.print_pdftk() elif form_type == "F24": - return form24.print_pdftk("") + return form24.print_pdftk() -@app.route("/paginate", methods=["POST"]) -def paginate_pdf(): - """ - This function is being invoked from FECFile and Vendors - HTTP request needs to have form_type, file, and begin_image_num - form_type : F99 - json_file: please refer to below sample JSON - :return: return JSON response - sample: - { - "message": "", - "results": { - "total_pages": 35, - "txn_img_json: { - "xxx0200804xxxxxxx": 29, - "xxx0200123xxxxxxx": 4, - } - }, - success": "true" - } - """ - form_type = request.form["form_type"] +# @app.route("/paginate", methods=["POST"]) +# def paginate_pdf(): +# """ +# This function is being invoked from FECFile and Vendors +# HTTP request needs to have form_type, json_file_name, and begin_image_num +# form_type : F99 +# json_file: please refer to below sample JSON +# begin_image_num: 1 +# :return: return JSON response +# sample: +# { +# "message": "", +# "results": { +# "total_pages": 35, +# "txn_img_json: { +# "xxx0200804xxxxxxx": 1, +# "xxx0200123xxxxxxx": 2, +# } +# }, +# success": "true" +# } +# """ +# form_type = request.json["form_type"] +# if form_type == "F99": +# return form99.print_f99_pdftk_html(paginate=True) +# elif form_type == "F3X": +# return form3x.print_pdftk(paginate=True) +# elif form_type == "F1M": +# return form1m.paginate() +# elif form_type == "F24": +# return form24.paginate() + +# attachment_file_content is for only F99 +def _paginate_pdf( + form_type=None, + file_content=None, + begin_image_num=None, + attachment_file_content=None, +): if form_type == "F99": - return form99.print_f99_pdftk_html("") + response, status = form99.print_f99_pdftk_html( + paginate=True, + file_content=file_content, + begin_image_num=begin_image_num, + attachment_file_content=attachment_file_content, + ) elif form_type == "F3X": - return form3x.print_pdftk("", paginate=True) + response, status = form3x.print_pdftk( + paginate=True, file_content=file_content, begin_image_num=begin_image_num + ) elif form_type == "F1M": - return form1m.print_pdftk("") + response, status = form1m.paginate( + file_content=file_content, begin_image_num=begin_image_num + ) elif form_type == "F24": - return form24.print_pdftk("") + response, status = form24.paginate( + file_content=file_content, begin_image_num=begin_image_num + ) + + response = response.json.get("results") + if status != 400: + return response.get("total_pages"), response.get("txn_img_json") + return None, None @app.route("/stamp_print", methods=["POST"]) @@ -103,3 +141,387 @@ def stamp_print_pdf(): form_type = request.form["form_type"] if form_type == "F99": return form99.print_f99_pdftk_html("stamp") + + +def page_count_pdf(form_type=None, file_content=None): + if form_type == "F99": + response, status = form99.print_f99_pdftk_html( + page_count=True, file_content=file_content + ) + elif form_type == "F3X": + response, status = form3x.print_pdftk( + page_count=True, file_content=file_content + ) + elif form_type == "F1M": + response, status = form1m.print_pdftk( + page_count=True, file_content=file_content + ) + elif form_type == "F24": + response, status = form24.print_pdftk( + page_count=True, file_content=file_content + ) + + response = response.json.get("results") + return response.get("total_pages") if status != 400 else None + + +def _print_pdf( + form_type=None, + file_content=None, + begin_image_num=None, + silent_print=False, + filing_timestamp=None, + attachment_file_content=None, + rep_id=None, +): + if form_type == "F99": + response, status = form99.print_f99_pdftk_html( + paginate=False, + file_content=file_content, + begin_image_num=begin_image_num, + attachment_file_content=attachment_file_content, + silent_print=silent_print, + filing_timestamp=filing_timestamp, + rep_id=rep_id, + ) + elif form_type == "F3X": + response, status = form3x.print_pdftk( + paginate=False, + file_content=file_content, + begin_image_num=begin_image_num, + silent_print=silent_print, + filing_timestamp=filing_timestamp, + rep_id=rep_id, + ) + elif form_type == "F1M": + response, status = form1m.print_pdftk( + file_content=file_content, + begin_image_num=begin_image_num, + silent_print=silent_print, + filing_timestamp=filing_timestamp, + rep_id=rep_id, + ) + elif form_type == "F24": + response, status = form24.print_pdftk( + file_content=file_content, + begin_image_num=begin_image_num, + silent_print=silent_print, + filing_timestamp=filing_timestamp, + rep_id=rep_id, + ) + + return response, status + + +@app.route("parse_next_in_image_number_queue", methods=["GET"]) +def parse_next_in_image_number_queue(): + """***************************************************************************************************************** + Function to manually run from UI/postman + *****************************************************************************************************************""" + if request.method == "GET": + res = parse_next_filing_from_image_number_queue() + return res + + +def parse_next_filing_from_image_number_queue(): + """***************************************************************************************************************** + Function will get the next report to image from imaging queue and processes it + ******************************************************************************************************************** + 1. Imaging worker will call this function on a set interval, say every 15 seconds + 2. This Function will read the message from the SQS one at a time and sets the visibility to N number of minutes + so that other workers will not see it. + + Sample Message + message_attributes = {'submissionId': {'StringValue': submission_id, 'DataType': 'String'}, + 'committeeId': {'StringValue': committee_id, 'DataType': 'String'}, + 'fileName': {'StringValue': file_name, 'DataType': 'String'}, + 'receivedTime': {'StringValue': str(upload_time), 'DataType': 'String'}, + 'beginImageNumber': {'StringValue': begin_image_number, 'DataType': 'String'} + } + *****************************************************************************************************************""" + sqs = boto3.resource("sqs") + queue = sqs.get_queue_by_name(QueueName=cfg.IMAGE_NUMBER_SQS_QUEUE) + try: + # Getting One message at a time + messages = queue.receive_messages( + MaxNumberOfMessages=1, + MessageAttributeNames=["All"], + VisibilityTimeout=cfg.MESSAGE_VISIBILITY_TIMEOUT, + ) + except Exception as e: + envelope = common.get_return_envelope( + "false", "unable to read message from image number queue" + ) + return flask.jsonify(**envelope), status.HTTP_400_BAD_REQUEST + # messages = {} + # next_imaging = [] + # next_imaging.append({"submissionId":"fab1d0fc-0089-4b47-8b80-0a1f3f970066","committeeId":"C00337733", + # "fileName": "C00337733_fab1d0fc00894b478b800a1f3f970066.json","beginImageNumber":""}) + # image_number = image_number_data(next_imaging[0]) + # message.delete() + # print(image_number) + + if len(messages) > 0: + # Getting the first message + for message in messages: + receipt_handle = message.receipt_handle + # process the messages + msg_body = message.body + next_imaging = [] + print( + "***************************************************************************************************" + ) + print("Getting Message from the SQS: " + str(msg_body)) + print(message.message_attributes) + print( + "***************************************************************************************************" + ) + if message.message_attributes is not None: + next_imaging.append( + { + "submissionId": message.message_attributes.get( + "submissionId" + ).get("StringValue"), + "committeeId": message.message_attributes.get( + "committeeId" + ).get("StringValue"), + "fileName": message.message_attributes.get("fileName").get( + "StringValue" + ), + "beginImageNumber": message.message_attributes.get( + "beginImageNumber" + ).get("StringValue"), + } + ) + # Parsing the data + image_number = image_number_data(next_imaging[0]) + message.delete() + print(image_number) + # return res + res = flask.jsonify({"result": [{"beginImageNum": str(image_number)}]}) + return res + + else: + print("Nothing to process - Message Queue is empty") + envelope = common.get_return_envelope( + "true", "Nothing to process - Message Queue is empty" + ) + # next_imaging = [] + # next_imaging.append( + # { + # "submissionId": "374d7ea3-1718-4763-b9f3-fee88acecc3c", + # "committeeId": "C00024679", + # "fileName": "C00024679_374d7ea317184763b9f3fee88acecc3c.json", + # "beginImageNumber": "20201109000000" + # } + # ) + # image_number_data(next_imaging) + + return flask.jsonify(**envelope), status.HTTP_200_OK + + +def image_number_data(next_imaging=None): + print(next_imaging) + submission_id = next_imaging["submissionId"] + committee_id = next_imaging["committeeId"] + json_file_name = next_imaging["fileName"] + begin_image_number = next_imaging["beginImageNumber"] + # image number should not be null, temporarily assigning summy image number + if begin_image_number != "": + begin_image_number = "20201109000000" + + file_url = ( + "https://" + cfg.AWS_S3_PAGINATION_COMPONENTS_DOMAIN + "/" + json_file_name + ) + # file_url = "https://dev-efile-repo.s3.amazonaws.com/" + file_name + print(file_url) + + file_content = None + json_data = None + try: + with urllib.request.urlopen(file_url) as url: + file_content = url.read().decode() + json_data = json.loads(file_content) + except Exception as e: + print(e) + + if json_data.get("data"): + data = json_data.get("data") + total_pages = page_count_pdf(data.get("formType"), file_content) + # call parser to update begin image number + data_obj = { + "imageType": "EFILING", + "candCmteId": committee_id, + "formType": data.get("formType"), + "reportType": data.get("reportCode"), + "cvgStartDate": data.get("coverageStartDate"), + "cvgEndDate": data.get("coverageEndDate"), + "submissionId": submission_id, + "totalPages": total_pages, + } + ## data_obj = json.dumps(data_obj) + begin_image_number_object = requests.post( + cfg.NXG_FEC_PARSER_API_URL + + cfg.NXG_FEC_PARSER_API_VERSION + + "/image_number", + data=data_obj, + ) + begin_image_number_json = begin_image_number_object.json() + begin_image_num = begin_image_number_json["beginImageNumber"] + # begin_image_num = 20201109000000 + total_pages, txn_img_json = _paginate_pdf( + data.get("formType"), file_content, begin_image_num + ) + txn_img_json = json.dumps(txn_img_json) + print(total_pages, txn_img_json) + + # Call parser to update JSON tran file + data_obj = {"submissionId": submission_id, "imageJsonText": txn_img_json} + requests.put( + cfg.NXG_FEC_PARSER_API_URL + + cfg.NXG_FEC_PARSER_API_VERSION + + "/image_number", + data=data_obj, + ) + return begin_image_num + + +@app.route("parse_next_in_image_generator_queue", methods=["GET"]) +def parse_next_in_image_generator_queue(): + """***************************************************************************************************************** + Function to manually run from UI/postman + *****************************************************************************************************************""" + if request.method == "GET": + res = parse_next_in_image_generator_queue() + return res + + +def parse_next_in_image_generator_queue(): + """***************************************************************************************************************** + Function will get the next report to stamp image on PDF + ******************************************************************************************************************** + 1. Image Generator worker will call this function on a set interval, say every 15 seconds + 2. This Function will read the message from the SQS one at a time and sets the visibility to N number of minutes + so that other workers will not see it. + + Sample Message + message_attributes = {'submissionId': {'StringValue': submission_id, 'DataType': 'String'}, + 'committeeId': {'StringValue': committee_id, 'DataType': 'String'}, + 'fileName': {'StringValue': file_name, 'DataType': 'String'}, + 'receivedTime': {'StringValue': str(upload_time), 'DataType': 'String'}, + 'beginImageNumber': {'StringValue': begin_image_number, 'DataType': 'String'} + } + *****************************************************************************************************************""" + sqs = boto3.resource("sqs") + queue = sqs.get_queue_by_name(QueueName=cfg.IMAGE_GENERATOR_SQS_QUEUE) + try: + # Getting One message at a time + messages = queue.receive_messages( + MaxNumberOfMessages=1, + MessageAttributeNames=["All"], + VisibilityTimeout=cfg.MESSAGE_VISIBILITY_TIMEOUT, + ) + except Exception as e: + envelope = common.get_return_envelope( + "false", "unable to read message from image generator queue" + ) + return flask.jsonify(**envelope), status.HTTP_400_BAD_REQUEST + + # next_image_generator = [] + # next_image_generator.append("one") + # res = image_generator_data(next_image_generator) + # print(res) + # return res + if len(messages) > 0: + # Getting the first message + for message in messages: + receipt_handle = message.receipt_handle + # process the messages + msg_body = message.body + next_image_generator = [] + print( + "***************************************************************************************************" + ) + print("Getting Message from the SQS: " + str(msg_body)) + print(message.message_attributes) + print( + "***************************************************************************************************" + ) + if message.message_attributes is not None: + next_image_generator.append( + { + "submissionId": message.message_attributes.get( + "submissionId" + ).get("StringValue"), + "committeeId": message.message_attributes.get( + "committeeId" + ).get("StringValue"), + "fileName": message.message_attributes.get("fileName").get( + "StringValue" + ), + "beginImageNumber": message.message_attributes.get( + "beginImageNumber" + ).get("StringValue"), + "receivedTime": message.message_attributes.get( + "receivedTime" + ).get("StringValue"), + } + ) + # Parsing the data + res = image_generator_data(next_image_generator[0]) + message.delete() + print(res) + return res + else: + print("Nothing to process - Message Queue is empty") + envelope = common.get_return_envelope( + "true", "Nothing to process - Message Queue is empty" + ) + return flask.jsonify(**envelope), status.HTTP_200_OK + + +def image_generator_data(next_image_generator=None): + print(next_image_generator) + submission_id = next_image_generator["submissionId"] + committee_id = next_image_generator["committeeId"] + json_file_name = next_image_generator["fileName"] + begin_image_number = next_image_generator["beginImageNumber"] + filing_timestamp = next_image_generator["receivedTime"] + rep_id = json_file_name[0 : json_file_name.index(".json")] + # rep_id = '8' + # print(rep_id) + + # image number should not be null, temporarily assigning summy image number + # if not begin_image_number: + # begin_image_number = "20201109000000" + # begin_image_number = "20201109000000" + # filing_timestamp = "11/25/2020 1:32PM" + file_url = ( + "https://" + cfg.AWS_S3_FECFILE_COMPONENTS_DOMAIN + "/output/" + json_file_name + ) + # file_url = "https://dev-efile-repo.s3.amazonaws.com/" + file_name + # file_url = "https://dev-efile-repo.s3.amazonaws.com/C00000935_4498f6f2b355426ca127708551e34f2f.json" + # file_url = 'https://fecfile-dev-components.s3.amazonaws.com/output/8.json' + # print(file_url) + + file_content = None + json_data = None + try: + with urllib.request.urlopen(file_url) as url: + file_content = url.read().decode() + json_data = json.loads(file_content) + except Exception as e: + print(e) + + if json_data.get("data"): + data = json_data.get("data") + # Stamp PDF + return _print_pdf( + data.get("formType"), + file_content, + begin_image_number, + True, + filing_timestamp, + None, + rep_id, + ) diff --git a/routes/src/f3x/form3x.py b/routes/src/f3x/form3x.py index fd7852b..7d40f4f 100644 --- a/routes/src/f3x/form3x.py +++ b/routes/src/f3x/form3x.py @@ -7,6 +7,7 @@ import shutil import sys import traceback +import urllib.request from collections import OrderedDict from os import path @@ -16,10 +17,15 @@ from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger from PyPDF2.generic import BooleanObject, NameObject, IndirectObject from routes.src import tmoflask, utils, common, form -from routes.src.utils import md5_for_file, directory_files, merge -from routes.src.f3x.helper import calculate_page_count, map_txn_img_num +from routes.src.utils import md5_for_text, md5_for_file, directory_files, merge, error, delete_directory +from routes.src.f3x.helper import ( + calculate_page_count, + calculate_sh3_page_count, + map_txn_img_num, + map_sh3_txn_img_num, +) -# importing prcoess schedule +# importing process schedule from routes.src.schedules.sa_schedule import print_sa_line from routes.src.schedules.sb_schedule import print_sb_line from routes.src.schedules.sc_schedule import print_sc_line @@ -45,46 +51,84 @@ ) -# Error handling -def error(msg): - if flask.request.method == "POST": - envelope = common.get_return_envelope("false", msg) - status_code = status.HTTP_400_BAD_REQUEST - return flask.jsonify(**envelope), status_code - - # stamp_print is a flag that will be passed at the time of submitting a report. -def print_pdftk(stamp_print, paginate=False): - # check if json_file is in the request +def print_pdftk( + stamp_print="", + paginate=False, + begin_image_num=None, + page_count=False, + file_content=None, + silent_print=False, + filing_timestamp=None, + rep_id=None, +): + + # check if json_file_name is in the request try: - if "json_file" in request.files: - json_file = request.files.get("json_file") - page_count = ( - True - if request.form.get("page_count") - and request.form.get("page_count").lower() in ["true", "1"] - else False - ) - silent_print = ( - True - if request.form.get("silent_print") - and request.form.get("silent_print").lower() in ["true", "1"] - else False - ) - txn_img_num = None + silent_print = silent_print + txn_img_num = begin_image_num + filing_timestamp = filing_timestamp + + if ( + (page_count and file_content) + or ((paginate or silent_print) and file_content and begin_image_num) + or (not paginate and "json_file" in request.files) + ): + + if page_count and file_content: + json_file_md5 = md5_for_text(file_content) + f3x_json = json.loads(file_content) + + elif (paginate or silent_print) and file_content and begin_image_num: + # using md5_for_text as input is content + json_file_md5 = md5_for_text(file_content) + f3x_json = json.loads(file_content) + + elif not paginate and "json_file" in request.files: + json_file = request.files.get("json_file") + silent_print = ( + True + if request.form.get("silent_print") + and request.form.get("silent_print").lower() in ["true", "1"] + else False + ) + page_count = ( + True + if request.form.get("page_count") + and request.form.get("page_count").lower() in ["true", "1"] + else False + ) - if paginate or silent_print: - txn_img_num = request.form.get("begin_image_num") + if silent_print: + txn_img_num = request.form.get("begin_image_num") - if not txn_img_num: - if flask.request.method == "POST": - envelope = common.get_return_envelope( - "false", "begin_image_num is missing from your request" - ) - status_code = status.HTTP_400_BAD_REQUEST - return flask.jsonify(**envelope), status_code + if not txn_img_num: + if flask.request.method == "POST": + envelope = common.get_return_envelope( + "false", "begin_image_num is missing from your request" + ) + status_code = status.HTTP_400_BAD_REQUEST + return flask.jsonify(**envelope), status_code + txn_img_num = int(txn_img_num) + + filing_timestamp = request.form.get("filing_timestamp", None) + + json_file_md5 = md5_for_file(json_file) + json_file.stream.seek(0) + + # save json file as md5 file name + json_file.save( + current_app.config["REQUEST_FILE_LOCATION"].format(json_file_md5) + ) - txn_img_num = int(txn_img_num) + # load json file + f3x_json = json.load( + open( + current_app.config["REQUEST_FILE_LOCATION"].format( + json_file_md5 + ) + ) + ) total_no_of_pages = 0 page_no = 1 @@ -127,12 +171,11 @@ def print_pdftk(stamp_print, paginate=False): # generate md5 for json file # FIXME: check if PDF already exist with md5, if exist return pdf instead of re-generating PDF file. - json_file_md5 = md5_for_file(json_file) - json_file.stream.seek(0) md5_directory = current_app.config["OUTPUT_DIR_LOCATION"].format( json_file_md5 ) + # checking if server has already generated pdf for same json file # if os.path.isdir(md5_directory) and path.isfile(md5_directory + 'all_pages.pdf'): # # push output file to AWS @@ -154,22 +197,14 @@ def print_pdftk(stamp_print, paginate=False): # return flask.jsonify(**envelope), status_code # + # deleting directory if it exists and has any content + delete_directory(md5_directory) if not page_count and not paginate: os.makedirs(md5_directory, exist_ok=True) infile = current_app.config["FORM_TEMPLATES_LOCATION"].format("F3X") - - # save json file as md5 file name - json_file.save( - current_app.config["REQUEST_FILE_LOCATION"].format(json_file_md5) - ) outfile = md5_directory + json_file_md5 + "_temp.pdf" - # load json file - f3x_json = json.load( - open(current_app.config["REQUEST_FILE_LOCATION"].format(json_file_md5)) - ) - # setting timestamp and imgno to empty as these needs to show up after submission if stamp_print != "stamp": f3x_json["FILING_TIMESTAMP"] = "" @@ -227,18 +262,26 @@ def print_pdftk(stamp_print, paginate=False): f3x_data["efStamp"] = "[Electronically Filed]" # checking if json contains summary details, for individual transactions print there wouldn't be summary - if f3x_summary: + if f3x_data or f3x_summary: total_no_of_pages = 5 if silent_print or paginate: + txn_img_num = int(txn_img_num) txn_img_num += 5 - f3x_data_summary_array = [f3x_data, f3x_summary] - # if 'memoText' in f3x_data and f3x_data['memoText']: + f3x_data_summary_array = [] + + if f3x_data: + f3x_data_summary_array.append(f3x_data) + if f3x_summary: + f3x_data_summary_array.append(f3x_summary) + if f3x_data.get("memoText"): total_no_of_pages += 1 if silent_print or paginate: + txn_img_num = int(txn_img_num) txn_img_num += 1 - else: - f3x_data_summary_array = [f3x_data] + # else: + # f3x_data_summary_array = [f3x_data] + f3x_data_summary = { i: j for x in f3x_data_summary_array for i, j in x.items() } @@ -254,19 +297,35 @@ def print_pdftk(stamp_print, paginate=False): txn_img_num, ) - # print("Total pages:", total_no_of_pages) + if paginate: + summary = {} + if txn_img_json: + summary["begin_image_num"] = min(txn_img_json.values()) + summary["end_image_num"] = max(txn_img_json.values()) + else: + summary["begin_image_num"] = begin_image_num + summary["end_image_num"] = txn_img_num + summary["committeeId"] = f3x_data.get("committeeId", None) + txn_img_json["summary"] = summary for key, value in process_output.items(): schedule_dict[key][0] = value - if f3x_summary and not page_count and not paginate: + if f3x_data_summary and not page_count and not paginate: f3x_data_summary["PAGESTR"] = ( "PAGE " + str(page_no) + " / " + str(total_no_of_pages) ) if silent_print: - f3x_data_summary["IMGNO"] = txn_img_num - 5 - txn_img_num += 1 + subtract_num = 6 if f3x_data.get("memoText") else 5 + f3x_data_summary["IMGNO"] = txn_img_num - subtract_num + f3x_data_summary["IMGNO_FOR_PAGE2"] = txn_img_num - subtract_num + 1 + f3x_data_summary["IMGNO_FOR_PAGE3"] = txn_img_num - subtract_num + 2 + f3x_data_summary["IMGNO_FOR_PAGE4"] = txn_img_num - subtract_num + 3 + f3x_data_summary["IMGNO_FOR_PAGE5"] = txn_img_num - subtract_num + 4 + + if filing_timestamp and page_no == 1: + f3x_data_summary["FILING_TIMESTAMP"] = filing_timestamp pypdftk.fill_form(infile, f3x_data_summary, outfile) shutil.copy(outfile, md5_directory + "F3X_Summary.pdf") @@ -286,7 +345,7 @@ def print_pdftk(stamp_print, paginate=False): memo_dict["memoDescription_1"] = f3x_data_summary["memoText"] if silent_print: - memo_dict["IMGNO"] = txn_img_num - 6 + memo_dict["IMGNO"] = txn_img_num - subtract_num + 5 txn_img_num += 1 memo_dict["PAGESTR"] = ( @@ -350,7 +409,7 @@ def print_pdftk(stamp_print, paginate=False): for key in schedule_key_list: if key == "has_sa_schedules" and schedule_dict[key][0]: shutil.move( - md5_directory + schedule_dict[key][1] + "SA/all_pages.pdf", + md5_directory + schedule_dict[key][1] + "/all_pages.pdf", md5_directory + "all_pages.pdf", ) shutil.rmtree(md5_directory + schedule_dict[key][1]) @@ -386,48 +445,82 @@ def print_pdftk(stamp_print, paginate=False): # elif key in ["has_sla_schedules", "has_slb_schedules"]: # os.remove(md5_directory + schedule_dict[key][1] + '/all_pages.pdf') - if not page_count and not paginate: - # push output file to AWS - s3 = boto3.client('s3') - s3.upload_file(md5_directory + 'all_pages.pdf', current_app.config['AWS_FECFILE_COMPONENTS_BUCKET_NAME'], - md5_directory + 'all_pages.pdf', - ExtraArgs={'ContentType': "application/pdf", 'ACL': "public-read"}) - - response = { - # 'file_name': '{}.pdf'.format(json_file_md5), - "pdf_url": current_app.config["PRINT_OUTPUT_FILE_URL"].format( - json_file_md5 - ) - + "all_pages.pdf", - "total_pages": total_no_of_pages, - } + # if flask.request.method == "POST": + # 'file_name': '{}.pdf'.format(json_file_md5), - elif page_count and not paginate: - response = { - "total_pages": total_no_of_pages, - } - elif not page_count and paginate: - response = { - "total_pages": total_no_of_pages, - "txn_img_json": txn_img_json - } + response = { + "total_pages": total_no_of_pages, + } - # return response - if flask.request.method == "POST": - envelope = common.get_return_envelope(data=response) - status_code = ( - status.HTTP_201_CREATED if not page_count else status.HTTP_200_OK - ) - return flask.jsonify(**envelope), status_code + if paginate: + response["txn_img_json"] = txn_img_json + else: + if not page_count: + s3 = boto3.client("s3") + extraArgs = {"ContentType": "application/pdf", "ACL": "public-read"} - else: + if silent_print: + response["pdf_url"] = current_app.config['S3_FILE_URL'] + rep_id + '.pdf' + s3.upload_file( + md5_directory + 'all_pages.pdf', + current_app.config['AWS_FECFILE_COMPONENTS_BUCKET_NAME'], + current_app.config['AWS_FECFILE_OUTPUT_DIRECTORY'] + '/' + + str(rep_id) + '.pdf', + ExtraArgs=extraArgs) + + s3.upload_file( + md5_directory + "all_pages.pdf", + current_app.config["AWS_FECFILE_COMPONENTS_BUCKET_NAME"], + rep_id + ".pdf", + ExtraArgs=extraArgs, + ) + + else: + response["pdf_url"] = ( + current_app.config["PRINT_OUTPUT_FILE_URL"].format( + json_file_md5 + ) + + "all_pages.pdf", + ) - if flask.request.method == "POST": + s3.upload_file( + md5_directory + "all_pages.pdf", + current_app.config["AWS_FECFILE_COMPONENTS_BUCKET_NAME"], + md5_directory + "all_pages.pdf", + ExtraArgs=extraArgs, + ) + + envelope = common.get_return_envelope(data=response) + status_code = ( + status.HTTP_200_OK + if page_count or paginate + else status.HTTP_201_CREATED + ) + + return flask.jsonify(**envelope), status_code + + # elif page_count and not paginate: + # return True, { + # "total_pages": total_no_of_pages, + # } + # elif not page_count and paginate: + # return True, { + # "total_pages": total_no_of_pages, + # "txn_img_json": txn_img_json, + # } + # elif silent_print: + # return True, { + # "total_pages": total_no_of_pages, + # } + else: + if paginate or page_count or silent_print: + envelope = common.get_return_envelope(False, "") + else: + # elif flask.request.method == "POST": envelope = common.get_return_envelope( - "false", "JSON file is missing from your request" + False, "json_file is missing from your request" ) - status_code = status.HTTP_400_BAD_REQUEST - return flask.jsonify(**envelope), status_code + return flask.jsonify(**envelope), status.HTTP_400_BAD_REQUEST except Exception as e: traceback.print_exception(*sys.exc_info()) return error("Error generating print preview, error message: " + str(e)) @@ -568,7 +661,7 @@ def process_schedules_pages( # building object for all SA line numbers sa_line_numbers_dict = OrderedDict() - sa_line_numbers_dict["11A1"] = { + sa_line_numbers_dict["11AI"] = { "data": [], "page_cnt": 0, "memo_page_cnt": 0, @@ -614,17 +707,22 @@ def process_schedules_pages( "memo_page_cnt": 0, } + temp_sa_schedules = [] # process for each Schedule A for schedule in sa_schedules: + temp_sa_schedules.append(schedule) if schedule.get("child"): child_schedules = schedule["child"] for child_schedule in child_schedules: if child_schedule["lineNumber"] in sb_line_numbers: - sb_schedules.append(child_schedule) + if not schedules.get("SB"): + schedules["SB"] = [] + schedules["SB"].append(child_schedule) else: - sa_schedules.append(child_schedule) + temp_sa_schedules.append(child_schedule) + sa_schedules = temp_sa_schedules for schedule in sa_schedules: process_sa_line_numbers(sa_line_numbers_dict, schedule) @@ -1094,7 +1192,14 @@ def process_schedules_pages( total_no_of_pages += len(value["data"]) if paginate: + map_txn_img_num( + schedules=value["data"], + num=1, + txn_img_json=txn_img_json, + image_num=txn_img_num, + ) txn_img_num += len(value["data"]) + # print("H1", total_no_of_pages) elif key == "H2": value["start_page"] = total_no_of_pages @@ -1113,25 +1218,29 @@ def process_schedules_pages( image_num=txn_img_num, ) txn_img_num += value["page_cnt"] + # print("H2", total_no_of_pages) elif key == "18A": value["start_page"] = total_no_of_pages if not page_count and not paginate: os.makedirs(md5_directory + "SH3", exist_ok=True) - value["page_cnt"], _ = calculate_page_count( - schedules=value["data"], num=1 + + # using custom method for page count + value["page_cnt"] = calculate_sh3_page_count( + schedules=value["data"] ) total_no_of_pages += value["page_cnt"] if paginate: - map_txn_img_num( + map_sh3_txn_img_num( schedules=value["data"], - num=1, txn_img_json=txn_img_json, image_num=txn_img_num, ) txn_img_num += value["page_cnt"] + # print("18A", total_no_of_pages) + elif key == "18B": value["start_page"] = total_no_of_pages if not page_count and not paginate: @@ -1154,6 +1263,7 @@ def process_schedules_pages( txn_img_num += ( value["page_cnt"] + value["memo_page_cnt"] ) + # print("18B", total_no_of_pages) elif key == "21A": value["start_page"] = total_no_of_pages @@ -1177,6 +1287,7 @@ def process_schedules_pages( txn_img_num += ( value["page_cnt"] + value["memo_page_cnt"] ) + # print("21A", total_no_of_pages) elif key == "30A": value["start_page"] = total_no_of_pages @@ -1202,6 +1313,7 @@ def process_schedules_pages( txn_img_num += ( value["page_cnt"] + value["memo_page_cnt"] ) + # print("30A", total_no_of_pages) # print("sh total_no_of_pages: ", total_no_of_pages) @@ -1640,6 +1752,6 @@ def process_schedules_pages( if paginate: return output_data, total_no_of_pages, txn_img_json - return output_data, total_no_of_pages, None + return output_data, total_no_of_pages, {} except: traceback.print_exception(*sys.exc_info()) diff --git a/routes/src/f3x/helper.py b/routes/src/f3x/helper.py index 71ff641..1006181 100644 --- a/routes/src/f3x/helper.py +++ b/routes/src/f3x/helper.py @@ -123,6 +123,46 @@ def calculate_page_count(schedules, num): return sch_page_count, memo_sch_page_count +def make_sh3_dict(schedules): + schedule_dict = {} + + for schedule in schedules: + hash_check = schedule.get("accountName") + '@@' + schedule.get("receiptDate") + if hash_check in schedule_dict: + event_type = schedule.get('activityEventType') + if event_type in schedule_dict[hash_check]: + schedule_dict[hash_check][event_type] = schedule_dict[hash_check][event_type] + [schedule] + else: + schedule_dict[hash_check][event_type] = [schedule] + else: + schedule_dict[hash_check] = {} + event_type = schedule.get('activityEventType') + schedule_dict[hash_check][event_type] = [schedule] + + return schedule_dict + + +def get_sh3_page_count(schedule_list): + count = 0 + for event, event_list in schedule_list.items(): + if event not in ['DC', 'DF']: + if len(event_list) > count: + count = len(event_list) + else: + if math.ceil(len(event_list) / 2) > count: + count = math.ceil(len(event_list) / 2) + return count + + +def calculate_sh3_page_count(schedules): + schedule_dict = make_sh3_dict(schedules) + page_count = 0 + for item in schedule_dict.values(): + page_count += get_sh3_page_count(item) + + return page_count + + def build_memo_page( memo_array, md5_directory, @@ -205,7 +245,6 @@ def build_memo_page( def map_txn_img_num(schedules, num, txn_img_json, image_num): sch_count = memo_sch_count = 0 - sch_page_count = memo_sch_page_count = 0 for schedule in schedules: sch_count += 1 @@ -217,13 +256,48 @@ def map_txn_img_num(schedules, num, txn_img_json, image_num): memo_sch_count += 1 if sch_count == num: - sch_page_count += 1 - memo_sch_page_count += math.ceil(memo_sch_count / 2) - - # increase the image_num_count - image_num += sch_page_count + memo_sch_page_count + image_num += 1 + image_num += math.ceil(memo_sch_count / 2) sch_count = 0 memo_sch_count = 0 - return txn_img_json \ No newline at end of file + return txn_img_json + + + +def map_sh3_txn_img_num(schedules, txn_img_json, image_num): + sh3_dict = make_sh3_dict(schedules) + + for item_value in sh3_dict.values(): + total_pages = get_sh3_page_count(item_value) + + event_type_dict = { + 'AD': {'current_index': 0}, + 'GV': {'current_index': 0}, + 'EA': {'current_index': 0}, + 'DC': {'current_index': 0}, + 'DF': {'current_index': 0}, + 'PC': {'current_index': 0} + } + + while total_pages: + image_num += 1 + total_pages -= 1 + + for event_type, value_list in item_value.items(): + current_index = event_type_dict[event_type]['current_index'] + + if current_index < len(value_list): + txn_img_json[value_list[current_index][ + "transactionId" + ]] = image_num + + event_type_dict[event_type]['current_index'] += 1 + + if event_type in ['DC', 'DF'] and current_index + 1 < len(value_list): + current_index += 1 + txn_img_json[value_list[current_index][ + "transactionId" + ]] = image_num + event_type_dict[event_type]['current_index'] += 1 \ No newline at end of file diff --git a/routes/src/f3x/line_numbers.py b/routes/src/f3x/line_numbers.py index 6310d62..747d583 100644 --- a/routes/src/f3x/line_numbers.py +++ b/routes/src/f3x/line_numbers.py @@ -3,7 +3,7 @@ def process_sa_line_numbers(sa_line_numbers_dict, sa_obj): line_num_val = ( - "11A1" + "11AI" if sa_obj.get("lineNumber") in ["11A", "11AI", "11AII"] else sa_obj.get("lineNumber") ) diff --git a/routes/src/form1m.py b/routes/src/form1m.py index ce936e2..dda214f 100644 --- a/routes/src/form1m.py +++ b/routes/src/form1m.py @@ -5,125 +5,272 @@ import os.path import pypdftk import shutil +import urllib.request from os import path from flask import json from flask import request, current_app from flask_api import status -from routes.src import tmoflask, utils, common, form -from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger -from PyPDF2.generic import BooleanObject, NameObject, IndirectObject +from routes.src import common +from routes.src.utils import md5_for_text, md5_for_file, error, delete_directory -# return the list of files in a directory -def directory_files(directory): - files_list = [] - file_names = sorted(os.listdir(directory)) - for file_name in file_names: - files_list.append(directory + file_name) - return files_list +# stamp_print is a flag that will be passed at the time of submitting a report. +def print_pdftk( + stamp_print="", + page_count=False, + file_content=None, + begin_image_num=None, + silent_print=False, + filing_timestamp=None, + rep_id=None, +): + # check if json_file_name is in the request + try: + silent_print = silent_print + txn_img_num = begin_image_num + filing_timestamp = filing_timestamp + if "json_file" in request.files or (page_count and file_content): + if "json_file" in request.files: + json_file = request.files.get("json_file") + silent_print = ( + True + if request.form.get("silent_print") + and request.form.get("silent_print").lower() in ["true", "1"] + else False + ) + page_count = ( + True + if request.form.get("page_count") + and request.form.get("page_count").lower() in ["true", "1"] + else False + ) -# merge two dictionaries and return -def merge(dict1, dict2): - res = {**dict1, **dict2} - return res + if silent_print: + txn_img_num = request.form.get("begin_image_num", None) + if not txn_img_num: + if flask.request.method == "POST": + envelope = common.get_return_envelope( + "false", "begin_image_num is missing from your request" + ) + status_code = status.HTTP_400_BAD_REQUEST + return flask.jsonify(**envelope), status_code + txn_img_num = int(txn_img_num) -# Error handling -def error(msg): - if flask.request.method == "POST": - envelope = common.get_return_envelope( - 'false', msg - ) - status_code = status.HTTP_400_BAD_REQUEST - return flask.jsonify(**envelope), status_code + filing_timestamp = request.form.get("filing_timestamp", None) + json_file_md5 = md5_for_file(json_file) + json_file.stream.seek(0) -# stamp_print is a flag that will be passed at the time of submitting a report. -def print_pdftk(stamp_print): - # check if json_file is in the request - try: - if 'json_file' in request.files: - - json_file = request.files.get('json_file') + # save json file as md5 file name + json_file.save( + current_app.config["REQUEST_FILE_LOCATION"].format(json_file_md5) + ) + + # load json file + f1m_json = json.load( + open( + current_app.config["REQUEST_FILE_LOCATION"].format( + json_file_md5 + ) + ) + ) + + # if page_count is True then return from here + elif page_count and file_content: + response = {"total_pages": 1} - # generate md5 for json file - # FIXME: check if PDF already exist with md5, if exist return pdf instead of re-generating PDF file. - json_file_md5 = utils.md5_for_file(json_file) - json_file.stream.seek(0) + # if flask.request.method == "POST": + envelope = common.get_return_envelope(data=response) + return flask.jsonify(**envelope), status.HTTP_200_OK + + elif silent_print and begin_image_num and file_content: + json_file_md5 = md5_for_text(file_content) + f1m_json = json.loads(file_content) + + md5_directory = current_app.config["OUTPUT_DIR_LOCATION"].format( + json_file_md5 + ) + + # deleting directory if it exists and has any content + delete_directory(md5_directory) - md5_directory = current_app.config['OUTPUT_DIR_LOCATION'].format(json_file_md5) os.makedirs(md5_directory, exist_ok=True) - infile = current_app.config['FORM_TEMPLATES_LOCATION'].format('F1M') - # save json file as md5 file name - json_file.save(current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5)) - outfile = md5_directory + json_file_md5 + '_temp.pdf' - # load json file - f1m_json = json.load(open(current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5))) + infile = current_app.config["FORM_TEMPLATES_LOCATION"].format("F1M") + outfile = md5_directory + json_file_md5 + "_temp.pdf" + # setting timestamp and imgno to empty as these needs to show up after submission - if stamp_print != 'stamp': - f1m_json['FILING_TIMESTAMP'] = '' - f1m_json['IMGNO'] = '' + if stamp_print != "stamp": + f1m_json["FILING_TIMESTAMP"] = "" + f1m_json["IMGNO"] = "" # read data from json file - f1m_data = f1m_json['data'] + f1m_data = f1m_json["data"] + + # adding txn_img_num if silent_print is True + if silent_print: + f1m_data["IMGNO"] = txn_img_num + if filing_timestamp: + f1m_data["FILING_TIMESTAMP"] = filing_timestamp + + name_list = ["LastName", "FirstName", "MiddleName", "Prefix", "Suffix"] # build treasurer name to map it to PDF template - treasurer_full_name = [] - treasurer_list = ['treasurerLastName', 'treasurerFirstName', 'treasurerMiddleName', 'treasurerPrefix', 'treasurerSuffix'] - for item in treasurer_list: - if f1m_data[item] not in [None, '', "", " "]: - treasurer_full_name.append(f1m_data[item]) - f1m_data['treasurerFullName'] = ", ".join(map(str, treasurer_full_name)) - f1m_data['treasurerName'] = f1m_data['treasurerLastName'] + ", " + f1m_data['treasurerFirstName'] - f1m_data['efStamp'] = '[Electronically Filed]' - if 'candidates' in f1m_data: - for candidate in f1m_data['candidates']: - candidate_full_name = [] - list_check = ['candidateLastName', 'candidateFirstName', 'candidateMiddleName', 'candidatePrefix', 'candidateSuffix'] - for item in list_check: - if candidate[item]: - candidate_full_name.append(candidate[item]) - f1m_data['candidateName' + str(candidate['candidateNumber'])] = ", ".join(map(str, candidate_full_name)) - f1m_data['candidateOffice' + str(candidate['candidateNumber'])] = candidate['candidateOffice'] - f1m_data['candidateStateDist' + str(candidate['candidateNumber'])] = "/ ".join(map(str, [candidate['candidateState'], candidate['candidateDistrict']])) - f1m_data['contributionDate' + str(candidate['candidateNumber'])] = candidate['contributionDate'] - - os.makedirs(md5_directory + str(f1m_data['reportId']) + '/', exist_ok=True) - infile = current_app.config['FORM_TEMPLATES_LOCATION'].format('F1M') - print(infile) - print(f1m_data) - print(outfile) + treasurerFullName = "" + for item in name_list: + item = "treasurer" + item + if f1m_data.get(item): + treasurerFullName += f1m_data.get(item) + " " + f1m_data["treasurerFullName"] = treasurerFullName[:-1] + + f1m_data["treasurerName"] = ( + f1m_data.get("treasurerLastName", "") + + ", " + + f1m_data.get("treasurerFirstName", "") + ) + f1m_data["treasurerName"] = ( + f1m_data["treasurerName"].strip().rstrip(",").strip() + ) + + f1m_data["efStamp"] = "[Electronically Filed]" + + if "candidates" in f1m_data: + for candidate in f1m_data["candidates"]: + + candidateFullName = "" + for item in name_list: + item = "candidate" + item + if f1m_data.get(item): + candidateFullName += f1m_data.get(item) + " " + f1m_data[ + "candidateName" + str(candidate["candidateNumber"]) + ] = candidateFullName[:-1] + + f1m_data[ + "candidateOffice" + str(candidate["candidateNumber"]) + ] = candidate["candidateOffice"] + + f1m_data[ + "candidateStateDist" + str(candidate["candidateNumber"]) + ] = "/ ".join( + map( + str, + [ + candidate["candidateState"], + candidate["candidateDistrict"], + ], + ) + ) + + f1m_data[ + "contributionDate" + str(candidate["candidateNumber"]) + ] = candidate["contributionDate"] + + os.makedirs(md5_directory + str(f1m_data["reportId"]) + "/", exist_ok=True) + infile = current_app.config["FORM_TEMPLATES_LOCATION"].format("F1M") + pypdftk.fill_form(infile, f1m_data, outfile) - shutil.copy(outfile, md5_directory + str(f1m_data['reportId']) + '/F1M.pdf') + shutil.copy(outfile, md5_directory + str(f1m_data["reportId"]) + "/F1M.pdf") os.remove(outfile) - - # push output file to AWSss - s3 = boto3.client('s3') - s3.upload_file(md5_directory + str(f1m_data['reportId']) + '/F1M.pdf', current_app.config['AWS_FECFILE_COMPONENTS_BUCKET_NAME'], - md5_directory + 'F1M.pdf', - ExtraArgs={'ContentType': "application/pdf", 'ACL': "public-read"}) - response = { - # 'file_name': '{}.pdf'.format(json_file_md5), - 'pdf_url': current_app.config['PRINT_OUTPUT_FILE_URL'].format(json_file_md5) + 'F1M.pdf' - } - # return response - if flask.request.method == "POST": - envelope = common.get_return_envelope( - data=response - ) - status_code = status.HTTP_201_CREATED - return flask.jsonify(**envelope), status_code + # 'file_name': '{}.pdf'.format(json_file_md5), + response = {"total_pages": 1} - else: + if not page_count: + s3 = boto3.client("s3") + extraArgs = {"ContentType": "application/pdf", "ACL": "public-read"} - if flask.request.method == "POST": + if silent_print: + response["pdf_url"] = ( + current_app.config["AWS_FECFILE_COMPONENTS_BUCKET_NAME"], + rep_id + ".pdf", + ) + + s3.upload_file( + md5_directory + str(f1m_data["reportId"]) + "/F1M.pdf", + current_app.config["AWS_FECFILE_COMPONENTS_BUCKET_NAME"], + rep_id + ".pdf", + ExtraArgs=extraArgs, + ) + else: + response["pdf_url"] = ( + current_app.config["PRINT_OUTPUT_FILE_URL"].format( + json_file_md5 + ) + + "F1M.pdf", + ) + + s3.upload_file( + md5_directory + str(f1m_data["reportId"]) + "/F1M.pdf", + current_app.config["AWS_FECFILE_COMPONENTS_BUCKET_NAME"], + md5_directory + "F1M.pdf", + ExtraArgs=extraArgs, + ) + + # return response + # if flask.request.method == "POST": + envelope = common.get_return_envelope(data=response) + status_code = status.HTTP_201_CREATED + return flask.jsonify(**envelope), status_code + # elif silent_print: + # return True, {} + else: + if page_count or silent_print: + envelope = common.get_return_envelope(False, "") + else: + # elif flask.request.method == "POST": envelope = common.get_return_envelope( - 'false', 'JSON file is missing from your request' + False, "json_file is missing from your request" ) - status_code = status.HTTP_400_BAD_REQUEST - return flask.jsonify(**envelope), status_code + return flask.jsonify(**envelope), status.HTTP_400_BAD_REQUEST except Exception as e: - return error('Error generating print preview, error message: ' + str(e)) \ No newline at end of file + return error("Error generating print preview, error message: " + str(e)) + + +def paginate(file_content=None, begin_image_num=None): + if file_content and begin_image_num: + # if "json_file_name" in request.json: + # # json_file_name = request.json.get("json_file_name") + + # txn_img_num = request.json.get("begin_image_num") + # if not txn_img_num: + # if flask.request.method == "POST": + # envelope = common.get_return_envelope( + # "false", "begin_image_num is missing from your request" + # ) + # status_code = status.HTTP_400_BAD_REQUEST + # return flask.jsonify(**envelope), status_code + + # file_url = current_app.config["AWS_S3_FECFILE_COMPONENTS_DOMAIN"] + "/" + json_file_name + ".json" + # file_url = "https://dev-efile-repo.s3.amazonaws.com/" + json_file_name + ".json" + + # with urllib.request.urlopen(file_url) as url: + # file_content = url.read().decode() + + f1m_json = json.loads(file_content) + data = f1m_json["data"] + + txn_img_json = { + "summary": { + "committeeId": data.get("committeeId", None), + "begin_image_num": begin_image_num, + "end_image_num": begin_image_num + } + } + total_no_of_pages = 1 + + # return True, {"total_pages": total_no_of_pages, "txn_img_json": txn_img_json} + response = {"total_pages": total_no_of_pages, "txn_img_json": txn_img_json} + + # if flask.request.method == "POST": + envelope = common.get_return_envelope(data=response) + status_code = status.HTTP_200_OK + return flask.jsonify(**envelope), status_code + else: + # if flask.request.method == "POST": + envelope = common.get_return_envelope( + False, "json_file_name is missing from your request" + ) + return flask.jsonify(**envelope), status.HTTP_400_BAD_REQUEST diff --git a/routes/src/form24.py b/routes/src/form24.py index 96d64b4..b5c7e53 100644 --- a/routes/src/form24.py +++ b/routes/src/form24.py @@ -5,255 +5,494 @@ import os.path import pypdftk import shutil +import urllib.request from os import path from flask import json from flask import request, current_app from flask_api import status -from routes.src import tmoflask, utils, common, form -from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger -from PyPDF2.generic import BooleanObject, NameObject, IndirectObject +from routes.src import common +from routes.src.utils import md5_for_text, md5_for_file, error, delete_directory +from routes.src.f3x.helper import calculate_page_count, map_txn_img_num -# return the list of files in a directory -def directory_files(directory): - files_list = [] - file_names = sorted(os.listdir(directory)) - for file_name in file_names: - files_list.append(directory + file_name) - return files_list +name_list = ["LastName", "FirstName", "MiddleName", "Prefix", "Suffix"] -# merge two dictionaries and return -def merge(dict1, dict2): - res = {**dict1, **dict2} - return res +# stamp_print is a flag that will be passed at the time of submitting a report. +def print_pdftk( + stamp_print="", + page_count=False, + file_content=None, + begin_image_num=None, + silent_print=False, + filing_timestamp=None, + rep_id=None, +): + # try: + silent_print = silent_print + txn_img_num = begin_image_num + filing_timestamp = filing_timestamp + if "json_file" in request.files or (page_count and file_content): + # check if json_file_name is in the request + if "json_file" in request.files: + json_file = request.files.get("json_file") + page_count = page_count + silent_print = ( + True + if request.form.get("silent_print") + and request.form.get("silent_print").lower() in ["true", "1"] + else False + ) + page_count = ( + True + if request.form.get("page_count") + and request.form.get("page_count").lower() in ["true", "1"] + else False + ) -# Error handling -def error(msg): - if flask.request.method == "POST": - envelope = common.get_return_envelope( - 'false', msg + if silent_print: + txn_img_num = request.form.get("begin_image_num", None) + + if not txn_img_num: + if flask.request.method == "POST": + envelope = common.get_return_envelope( + "false", "begin_image_num is missing from your request" + ) + status_code = status.HTTP_400_BAD_REQUEST + return flask.jsonify(**envelope), status_code + txn_img_num = int(txn_img_num) + + filing_timestamp = request.form.get("filing_timestamp", None) + + json_file_md5 = md5_for_file(json_file) + json_file.stream.seek(0) + + # save json file as md5 file name + json_file.save( + current_app.config["REQUEST_FILE_LOCATION"].format(json_file_md5) + ) + + # load json file + f24_json = json.load( + open(current_app.config["REQUEST_FILE_LOCATION"].format(json_file_md5)) + ) + + # if page_count is True then return from here + elif page_count and file_content: + f24_json = json.loads(file_content) + # return {"total_pages": get_total_pages(f24_json.get("data"))} + + response = {"total_pages": get_total_pages(f24_json.get("data"))} + + # if flask.request.method == "POST": + envelope = common.get_return_envelope(data=response) + return flask.jsonify(**envelope), status.HTTP_200_OK + elif silent_print and begin_image_num and file_content: + json_file_md5 = md5_for_text(file_content) + f24_json = json.loads(file_content) + + md5_directory = current_app.config["OUTPUT_DIR_LOCATION"].format(json_file_md5) + + # deleting directory if it exists and has any content + delete_directory(md5_directory) + + os.makedirs(md5_directory, exist_ok=True) + + # setting timestamp and imgno to empty as these needs to show up after submission + output = {} + if stamp_print != "stamp": + output["FILING_TIMESTAMP"] = "" + output["IMGNO"] = "" + + # read data from json file + f24_data = f24_json["data"] + reportId = str(f24_data["reportId"]) + + os.makedirs(md5_directory + reportId + "/", exist_ok=True) + + output["committeeId"] = f24_data["committeeId"] + output["committeeName"] = f24_data["committeeName"] + output["reportType"] = f24_data["reportType"] + output["amendIndicator"] = f24_data["amendIndicator"] + + output["efStamp"] = "[Electronically Filed]" + if output["amendIndicator"] == "A": + if f24_data["amendDate"]: + amend_date_array = f24_data["amendDate"].split("/") + output["amendDate_MM"] = amend_date_array[0] + output["amendDate_DD"] = amend_date_array[1] + output["amendDate_YY"] = amend_date_array[2] + + se_count = 0 + # Calculating total number of pages + if not f24_data["schedules"].get("SE"): + output["PAGENO"] = 1 + output["TOTALPAGES"] = 1 + else: + se_count = len(f24_data["schedules"]["SE"]) + output["TOTALPAGES"] = get_total_pages(f24_data) + + # make it true when filing_timestamp has been passed for the first time + is_file_timestamp = False + + # Printing report memo text page + if f24_data.get("memoText") and f24_data.get("reportPrint"): + memo_dict = { + "scheduleName_1": "F3X" + f24_data["amendIndicator"], + "memoDescription_1": f24_data["memoText"], + "PAGESTR": "PAGE " + str(1) + " / " + str(output["TOTALPAGES"]), + } + + if silent_print: + memo_dict["IMGNO"] = txn_img_num + txn_img_num += 1 + if filing_timestamp: + memo_dict["FILING_TIMESTAMP"] = filing_timestamp + is_file_timestamp = True + + print_summary(memo_dict, 1, reportId, json_file_md5) + + if f24_data.get("filedDate"): + filed_date_array = f24_data["filedDate"].split("/") + output["filedDate_MM"] = filed_date_array[0] + output["filedDate_DD"] = filed_date_array[1] + output["filedDate_YY"] = filed_date_array[2] + + # build treasurer name to map it to PDF template + treasurerFullName = "" + for item in name_list: + item = "treasurer" + item + if f24_data.get(item): + treasurerFullName += f24_data.get(item) + ", " + output["treasurerFullName"] = treasurerFullName[:-2] + + output["treasurerName"] = ( + f24_data.get("treasurerLastName", "") + + ", " + + f24_data.get("treasurerFirstName", "") + ) + output["treasurerName"] = ( + output["treasurerName"].strip().rstrip(",").strip() + ) + + if f24_data["schedules"].get("SE"): + page_index = ( + 2 if f24_data.get("memoText") and f24_data.get("reportPrint") else 1 + ) + page_dict = {} + sub_total = 0 + total = 0 + for i, se in enumerate(f24_data["schedules"]["SE"]): + index = (i % 2) + 1 + if se.get("payeeLastName"): + payeeName = "" + for item in name_list: + item = "payee" + item + if se.get(item): + payeeName += se.get(item) + ", " + + page_dict["payeeName_" + str(index)] = payeeName[:-2] + elif se.get("payeeOrganizationName"): + page_dict["payeeName_" + str(index)] = se["payeeOrganizationName"] + + page_dict["memoCode_" + str(index)] = se["memoCode"] + page_dict["memoDescription_" + str(index)] = se["memoDescription"] + page_dict["payeeStreet1_" + str(index)] = se["payeeStreet1"] + page_dict["payeeStreet2_" + str(index)] = se["payeeStreet2"] + page_dict["payeeCity_" + str(index)] = se["payeeCity"] + page_dict["payeeState_" + str(index)] = se["payeeState"] + page_dict["payeeZipCode_" + str(index)] = se["payeeZipCode"] + page_dict["expenditureAmount_" + str(index)] = "{:.2f}".format( + se["expenditureAmount"] + ) + page_dict["transactionId_" + str(index)] = se["transactionId"] + page_dict["expenditurePurpose_" + str(index)] = se[ + "expenditurePurposeDescription" + ] + page_dict["supportOppose_" + str(index)] = se["support/opposeCode"] + page_dict["candidateOffice_" + str(index)] = se["candidateOffice"] + page_dict["candidateState_" + str(index)] = se["candidateState"] + page_dict["candidateDistrict_" + str(index)] = se["candidateDistrict"] + page_dict["electionType_" + str(index)] = se["electionCode"][:1] + page_dict["electionYear_" + str(index)] = se["electionCode"][1:] + page_dict["electionOtherDescription_" + str(index)] = se[ + "electionOtherDescription" + ] + page_dict["calendarYTD_" + str(index)] = "{:.2f}".format( + se["calendarYTDPerElectionForOffice"] + ) + + if se.get("disseminationDate"): + dissem_date_array = se["disseminationDate"].split("/") + page_dict["disseminationDate_MM_" + str(index)] = dissem_date_array[ + 0 + ] + page_dict["disseminationDate_DD_" + str(index)] = dissem_date_array[ + 1 + ] + page_dict["disseminationDate_YY_" + str(index)] = dissem_date_array[ + 2 + ] + + if se.get("disbursementDate"): + disburse_date_array = se["disbursementDate"].split("/") + page_dict[ + "disbursementDate_MM_" + str(index) + ] = disburse_date_array[0] + page_dict[ + "disbursementDate_DD_" + str(index) + ] = disburse_date_array[1] + page_dict[ + "disbursementDate_YY_" + str(index) + ] = disburse_date_array[2] + + candidateName = "" + for item in name_list: + item = "candidate" + item + if se.get(item): + candidateName += se.get(item) + ", " + + if candidateName: + page_dict["candidateName_" + str(index)] = candidateName[:-2] + else: + page_dict["candidateName_" + str(index)] = "" + + # if se[item]: candidate_name_list.append(se[item]) + # page_dict["candidateName_" + str(index)] = " ".join(candidate_name_list) + + if se.get("memoCode") != "X": + sub_total += se["expenditureAmount"] + total += se["expenditureAmount"] + + # print and reset + if index % 2 == 0 or i == se_count - 1: + page_dict["PAGENO"] = page_index + page_dict["subTotal"] = "{:.2f}".format(sub_total) + + if silent_print: + page_dict["IMGNO"] = txn_img_num + txn_img_num += 1 + if filing_timestamp and not is_file_timestamp: + page_dict["FILING_TIMESTAMP"] = filing_timestamp + is_file_timestamp = True + + if i == se_count - 1: + page_dict["total"] = "{:.2f}".format(total) + + print_dict = {**output, **page_dict} + print_f24(print_dict, page_index, reportId, json_file_md5) + page_index += 1 + + memo_dict = {} + for xir in range(1, 3): + if page_dict.get("memoDescription_{}".format(xir)): + memo_dict["scheduleName_{}".format(xir)] = "SE" + memo_dict["memoDescription_{}".format(xir)] = page_dict[ + "memoDescription_{}".format(xir) + ] + memo_dict["transactionId_{}".format(xir)] = page_dict[ + "transactionId_{}".format(xir) + ] + memo_dict["PAGESTR"] = ( + "PAGE " + + str(page_index) + + " / " + + str(output["TOTALPAGES"]) + ) + + if silent_print: + memo_dict["IMGNO"] = txn_img_num + + if silent_print: + txn_img_num += 1 + + if memo_dict: + print_summary(memo_dict, page_index, reportId, json_file_md5) + page_index += 1 + page_dict = {} + sub_total = 0 + else: + output["subTotal"] = "0.00" + output["total"] = "0.00" + print_f24(output, 1, reportId, json_file_md5) + + # Concatenating all generated pages + for i in range(1, output["TOTALPAGES"] + 1, 1): + if path.isfile(md5_directory + reportId + "/F24_temp.pdf"): + pypdftk.concat( + [ + md5_directory + reportId + "/F24_temp.pdf", + md5_directory + reportId + "/F24_{}.pdf".format(i), + ], + md5_directory + reportId + "/concat_F24.pdf", + ) + os.rename( + md5_directory + reportId + "/concat_F24.pdf", + md5_directory + reportId + "/F24_temp.pdf", + ) + os.remove(md5_directory + reportId + "/F24_{}.pdf".format(i)) + else: + os.rename( + md5_directory + reportId + "/F24_{}.pdf".format(i), + md5_directory + reportId + "/F24_temp.pdf", + ) + os.rename( + md5_directory + reportId + "/F24_temp.pdf", + md5_directory + reportId + "/F24.pdf", ) - status_code = status.HTTP_400_BAD_REQUEST - return flask.jsonify(**envelope), status_code + response = { + "total_pages": output["TOTALPAGES"], + } + + if not page_count: + s3 = boto3.client("s3") + extraArgs = {"ContentType": "application/pdf", "ACL": "public-read"} + + if silent_print: + response["pdf_url"] = ( + current_app.config["AWS_FECFILE_COMPONENTS_BUCKET_NAME"], + rep_id + ".pdf", + ) + + s3.upload_file( + md5_directory + reportId + "/F24.pdf", + current_app.config["AWS_FECFILE_COMPONENTS_BUCKET_NAME"], + rep_id + ".pdf", + ExtraArgs=extraArgs, + ) + else: + response["pdf_url"] = ( + current_app.config["PRINT_OUTPUT_FILE_URL"].format(json_file_md5) + + "F24.pdf", + ) + + s3.upload_file( + md5_directory + reportId + "/F24.pdf", + current_app.config["AWS_FECFILE_COMPONENTS_BUCKET_NAME"], + md5_directory + "F24.pdf", + ExtraArgs=extraArgs, + ) + + # if flask.request.method == "POST": + envelope = common.get_return_envelope(data=response) + return flask.jsonify(**envelope), status.HTTP_201_CREATED + else: + if page_count or silent_print: + envelope = common.get_return_envelope(False, "") + elif flask.request.method == "POST": + envelope = common.get_return_envelope( + False, "json_file is missing from your request" + ) + return flask.jsonify(**envelope), status.HTTP_400_BAD_REQUEST + + +# except Exception as e: +# return error('Error generating print preview, error message: ' + str(e)) -# stamp_print is a flag that will be passed at the time of submitting a report. -def print_pdftk(stamp_print): - # check if json_file is in the request - # try: - if 'json_file' in request.files: - json_file = request.files.get('json_file') - - # generate md5 for json file - # FIXME: check if PDF already exist with md5, if exist return pdf instead of re-generating PDF file. - json_file_md5 = utils.md5_for_file(json_file) - json_file.stream.seek(0) - - md5_directory = current_app.config['OUTPUT_DIR_LOCATION'].format(json_file_md5) - os.makedirs(md5_directory, exist_ok=True) - # save json file as md5 file name - json_file.save(current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5)) - # load json file - f24_json = json.load(open(current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5))) - # setting timestamp and imgno to empty as these needs to show up after submission - output = {} - if stamp_print != 'stamp': - output['FILING_TIMESTAMP'] = '' - output['IMGNO'] = '' - - # read data from json file - f24_data = f24_json['data'] - reportId = str(f24_data['reportId']) - os.makedirs(md5_directory + reportId + '/', exist_ok=True) - output['committeeId'] = f24_data['committeeId'] - output['committeeName'] = f24_data['committeeName'] - output['reportType'] = f24_data['reportType'] - output['amendIndicator'] = f24_data['amendIndicator'] - - # checking report memo text - report_memo_flag = True if f24_data.get('memoText') else False - - output['efStamp'] = '[Electronically Filed]' - if output['amendIndicator'] == 'A': - if f24_data['amendDate']: - amend_date_array = f24_data['amendDate'].split("/") - output['amendDate_MM'] = amend_date_array[0] - output['amendDate_DD'] = amend_date_array[1] - output['amendDate_YY'] = amend_date_array[2] - - # Calculating total number of pages - if not f24_data['schedules'].get('SE'): - output['PAGENO'] = page_index - output['TOTALPAGES'] = page_index - else: - if len(f24_data['schedules']['SE']) % 2 == 0: - output['TOTALPAGES'] = len(f24_data['schedules']['SE'])//2 - else: - output['TOTALPAGES'] = (len(f24_data['schedules']['SE'])//2)+1 - full_counter = 0 - for page in range(0, output['TOTALPAGES'], 1): - counter = 0 - for i in range(0,2,1): - if 2*(page)+i < len(f24_data['schedules']['SE']): - item = f24_data['schedules'].get('SE')[2*(page)+i] - if item.get("memoDescription"): - counter = 1 - full_counter = full_counter + counter - if report_memo_flag and f24_data['reportPrint']: full_counter += 1 - output['TOTALPAGES'] += full_counter - - # Printing report memo text page - if report_memo_flag and f24_data['reportPrint']: - memo_dict = {'scheduleName_1' : 'F3X' + f24_data['amendIndicator'], - 'memoDescription_1' : f24_data['memoText'], - 'PAGESTR' : "PAGE " + str(1) + " / " + str(output['TOTALPAGES'])} - print_summ(memo_dict, 1, reportId, json_file_md5) - - if f24_data.get('filedDate'): - filed_date_array = f24_data['filedDate'].split("/") - output['filedDate_MM'] = filed_date_array[0] - output['filedDate_DD'] = filed_date_array[1] - output['filedDate_YY'] = filed_date_array[2] - # build treasurer name to map it to PDF template - treasurer_list = ['treasurerLastName', 'treasurerFirstName', 'treasurerMiddleName', 'treasurerPrefix', 'treasurerSuffix'] - output['treasurerFullName'] = "" - for item in treasurer_list: - output['treasurerFullName'] += f24_data.get(item, "")+',' - output['treasurerFullName'] = output['treasurerFullName'][:-1] - output['treasurerName'] = f24_data['treasurerLastName'] + ", " + f24_data['treasurerFirstName'] - - if f24_data['schedules'].get('SE'): - page_index = 2 if report_memo_flag and f24_data['reportPrint'] else 1 - page_dict = {} - sub_total = 0 - total = 0 - for i, se in enumerate(f24_data['schedules']['SE']): - index = (i%2)+1 - if 'payeeLastName' in se and se['payeeLastName']: - page_dict["payeeName_" + str(index)] = "" - for item in ['payeeLastName', 'payeeFirstName', 'payeeMiddleName', 'payeePrefix', 'payeeSuffix']: - page_dict["payeeName_" + str(index)] += se.get(item,"")+',' - page_dict["payeeName_" + str(index)] = page_dict["payeeName_" + str(index)][:-1] - elif 'payeeOrganizationName' in se: - page_dict["payeeName_" + str(index)] = se['payeeOrganizationName'] - page_dict["memoCode_" + str(index)] = se['memoCode'] - page_dict["memoDescription_" + str(index)] = se['memoDescription'] - page_dict["payeeStreet1_" + str(index)] = se['payeeStreet1'] - page_dict["payeeStreet2_" + str(index)] = se['payeeStreet2'] - page_dict["payeeCity_" + str(index)] = se['payeeCity'] - page_dict["payeeState_" + str(index)] = se['payeeState'] - page_dict["payeeZipCode_" + str(index)] = se['payeeZipCode'] - page_dict["expenditureAmount_" + str(index)] = "{:.2f}".format(se['expenditureAmount']) - page_dict["transactionId_" + str(index)] = se['transactionId'] - page_dict["expenditurePurpose_" + str(index)] = se['expenditurePurposeDescription'] - page_dict["supportOppose_" + str(index)] = se['support/opposeCode'] - page_dict["candidateOffice_" + str(index)] = se['candidateOffice'] - page_dict["candidateState_" + str(index)] = se['candidateState'] - page_dict["candidateDistrict_" + str(index)] = se['candidateDistrict'] - page_dict["electionType_" + str(index)] = se['electionCode'][:1] - page_dict["electionYear_" + str(index)] = se['electionCode'][1:] - page_dict["electionOtherDescription_" + str(index)] = se['electionOtherDescription'] - page_dict["calendarYTD_" + str(index)] = "{:.2f}".format(se['calendarYTDPerElectionForOffice']) - if se['disseminationDate']: - dissem_date_array = se['disseminationDate'].split("/") - page_dict["disseminationDate_MM_" + str(index)] = dissem_date_array[0] - page_dict["disseminationDate_DD_" + str(index)] = dissem_date_array[1] - page_dict["disseminationDate_YY_" + str(index)] = dissem_date_array[2] - if se['disbursementDate']: - disburse_date_array = se['disbursementDate'].split("/") - page_dict["disbursementDate_MM_" + str(index)] = disburse_date_array[0] - page_dict["disbursementDate_DD_" + str(index)] = disburse_date_array[1] - page_dict["disbursementDate_YY_" + str(index)] = disburse_date_array[2] - page_dict["candidateName_" + str(index)] = "" - for item in ['candidateLastName', 'candidateFirstName', 'candidateMiddleName', 'candidatePrefix', 'candidateSuffix']: - page_dict["candidateName_" + str(index)] += se.get(item,"")+',' - page_dict["candidateName_" + str(index)] = page_dict["candidateName_" + str(index)][:-1] - # if se[item]: candidate_name_list.append(se[item]) - # page_dict["candidateName_" + str(index)] = " ".join(candidate_name_list) - if se.get('memoCode') != 'X': - sub_total += se['expenditureAmount'] - total += se['expenditureAmount'] - # print and reset - if (index%2 == 0 or i == (len(f24_data['schedules']['SE'])-1)): - page_dict['PAGENO'] = page_index - page_dict["subTotal"] = "{:.2f}".format(sub_total) - if i == (len(f24_data['schedules']['SE'])-1): page_dict["total"] = "{:.2f}".format(total) - print_dict = {**output, **page_dict} - print_f24(print_dict, page_index, reportId, json_file_md5) - page_index += 1 - memo_dict = {} - for xir in range(1, 3): - if page_dict.get("memoDescription_{}".format(xir)): - memo_dict["scheduleName_{}".format(xir)] = 'SE' - memo_dict["memoDescription_{}".format(xir)] = page_dict["memoDescription_{}".format(xir)] - memo_dict["transactionId_{}".format(xir)] = page_dict["transactionId_{}".format(xir)] - memo_dict['PAGESTR'] = "PAGE " + str(page_index) + " / " + str(output['TOTALPAGES']) - if memo_dict: - print_summ(memo_dict, page_index, reportId, json_file_md5) - page_index += 1 - page_dict = {} - sub_total = 0 - else: - output["subTotal"] = "0.00" - output["total"] = "0.00" - print_f24(output, 1, reportId, json_file_md5) - - # Concatinating all pages generated - for i in range(1, output['TOTALPAGES']+1, 1): - if path.isfile(md5_directory + reportId + '/F24_temp.pdf'): - pypdftk.concat([md5_directory + reportId + '/F24_temp.pdf', md5_directory + reportId + '/F24_{}.pdf'.format(i)], - md5_directory + reportId + '/concat_F24.pdf') - os.rename(md5_directory + reportId + '/concat_F24.pdf', md5_directory + reportId + '/F24_temp.pdf') - os.remove(md5_directory + reportId + '/F24_{}.pdf'.format(i)) - else: - os.rename(md5_directory + reportId + '/F24_{}.pdf'.format(i), md5_directory + reportId + '/F24_temp.pdf') - os.rename(md5_directory + reportId + '/F24_temp.pdf', md5_directory + reportId + '/F24.pdf') - - # push output file to AWS - s3 = boto3.client('s3') - s3.upload_file(md5_directory + reportId + '/F24.pdf', current_app.config['AWS_FECFILE_COMPONENTS_BUCKET_NAME'], - md5_directory + 'F24.pdf', - ExtraArgs={'ContentType': "application/pdf", 'ACL': "public-read"}) - response = { - 'pdf_url': current_app.config['PRINT_OUTPUT_FILE_URL'].format(json_file_md5) + 'F24.pdf' - } - # response = {'yes':True} - if flask.request.method == "POST": - envelope = common.get_return_envelope( - data=response - ) - status_code = status.HTTP_201_CREATED - return flask.jsonify(**envelope), status_code - else: - if flask.request.method == "POST": - envelope = common.get_return_envelope( - 'false', 'JSON file is missing from your request' - ) - status_code = status.HTTP_400_BAD_REQUEST - return flask.jsonify(**envelope), status_code - # except Exception as e: - # return error('Error generating print preview, error message: ' + str(e)) def print_f24(print_dict, page_index, reportId, json_file_md5): - try: - md5_directory = current_app.config['OUTPUT_DIR_LOCATION'].format(json_file_md5) - infile = current_app.config['FORM_TEMPLATES_LOCATION'].format('F24') - outfile = md5_directory + json_file_md5 + '_temp.pdf' - pypdftk.fill_form(infile, print_dict, outfile) - shutil.copy(outfile, md5_directory + reportId + '/F24_{}.pdf'.format(page_index)) - os.remove(outfile) - except Exception as e: - return error('print_f24 error, error message: ' + str(e)) - -def print_summ(print_dict, page_index, reportId, json_file_md5): - try: - md5_directory = current_app.config['OUTPUT_DIR_LOCATION'].format(json_file_md5) - infile = current_app.config['FORM_TEMPLATES_LOCATION'].format('TEXT') - outfile = md5_directory + json_file_md5 + '_temp.pdf' - pypdftk.fill_form(infile, print_dict, outfile) - shutil.copy(outfile, md5_directory + reportId + '/F24_{}.pdf'.format(page_index)) - os.remove(outfile) - except Exception as e: - return error('print_f24_summ error, error message: ' + str(e)) \ No newline at end of file + try: + md5_directory = current_app.config["OUTPUT_DIR_LOCATION"].format(json_file_md5) + infile = current_app.config["FORM_TEMPLATES_LOCATION"].format("F24") + outfile = md5_directory + json_file_md5 + "_temp.pdf" + pypdftk.fill_form(infile, print_dict, outfile) + shutil.copy( + outfile, md5_directory + reportId + "/F24_{}.pdf".format(page_index) + ) + os.remove(outfile) + except Exception as e: + return error("print_f24 error, error message: " + str(e)) + + +def print_summary(print_dict, page_index, reportId, json_file_md5): + try: + md5_directory = current_app.config["OUTPUT_DIR_LOCATION"].format(json_file_md5) + infile = current_app.config["FORM_TEMPLATES_LOCATION"].format("TEXT") + outfile = md5_directory + json_file_md5 + "_temp.pdf" + pypdftk.fill_form(infile, print_dict, outfile) + shutil.copy( + outfile, md5_directory + reportId + "/F24_{}.pdf".format(page_index) + ) + os.remove(outfile) + except Exception as e: + return error("print_f24_summ error, error message: " + str(e)) + + +def paginate(file_content=None, begin_image_num=None): + if file_content and begin_image_num: + txn_img_num = begin_image_num + # if "json_file_name" in request.json: + # json_file_name = request.json.get("json_file_name") + + # txn_img_num = request.json.get("begin_image_num") + # if not txn_img_num: + # if flask.request.method == "POST": + # envelope = common.get_return_envelope( + # "false", "begin_image_num is missing from your request" + # ) + # status_code = status.HTTP_400_BAD_REQUEST + # return flask.jsonify(**envelope), status_code + + # # file_url = current_app.config["AWS_S3_FECFILE_COMPONENTS_DOMAIN"] + "/" + json_file_name + ".json" + # file_url = "https://dev-efile-repo.s3.amazonaws.com/" + json_file_name + ".json" + + # with urllib.request.urlopen(file_url) as url: + # file_content = url.read().decode() + + f24_json = json.loads(file_content) + data = f24_json["data"] + + txn_img_json = {} + total_no_of_pages = 0 + + if not data.get("memoText") or not data.get("reportPrint"): + txn_img_num -= 1 + + if data.get("schedules") and data["schedules"].get("SE"): + map_txn_img_num( + schedules=data["schedules"]["SE"], + num=2, + txn_img_json=txn_img_json, + image_num=txn_img_num, + ) + total_no_of_pages = get_total_pages(data) + + summary = {} + if txn_img_json: + summary["begin_image_num"] = min(txn_img_json.values()) + summary["end_image_num"] = max(txn_img_json.values()) + else: + summary["begin_image_num"] = begin_image_num + summary["end_image_num"] = txn_img_num + + summary["committeeId"] = data.get("committeeId", None) + txn_img_json["summary"] = summary + # return True, {"total_pages": total_no_of_pages, "txn_img_json": txn_img_json} + response = {"total_pages": total_no_of_pages, "txn_img_json": txn_img_json} + + # if flask.request.method == "POST": + envelope = common.get_return_envelope(data=response) + return flask.jsonify(**envelope), status.HTTP_200_OK + else: + # return False, None + # if flask.request.method == "POST": + envelope = common.get_return_envelope( + False, "json_file_name is missing from your request" + ) + return flask.jsonify(**envelope), status.HTTP_400_BAD_REQUEST + + +def get_total_pages(data): + total_pages = 0 + if data.get("schedules") and data["schedules"].get("SE"): + page_cnt, memo_page_cnt = calculate_page_count( + schedules=data["schedules"]["SE"], num=2 + ) + total_pages = page_cnt + memo_page_cnt + + if data.get("memoText") and data.get("reportPrint"): + total_pages += 1 + + return total_pages diff --git a/routes/src/form99.py b/routes/src/form99.py index 07f67d1..72a96e2 100644 --- a/routes/src/form99.py +++ b/routes/src/form99.py @@ -6,622 +6,426 @@ import shutil import pdfkit import bs4 +import urllib.request +import sys +import traceback from flask import json from flask import request, current_app from flask_api import status -from routes.src import tmoflask, utils, common, form -from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger -from PyPDF2.generic import BooleanObject, NameObject, IndirectObject - - -def split_f99_text_pages(json_data): - f99_page_data = {} - f99_additional_page_data = [] - f99_text = json_data['MISCELLANEOUS_TEXT'] - lines_count = 0 - main_page_data = '' - additional_pages = 0 - additional_page_data = '' - match_count = 0 - for line in f99_text.splitlines(True): - lines_count += 1 - if len(line) > 117: - (line, match_count) = re.subn("(.{1,117})( +|$)\n?|(.{117})", "\\1\\3^\n", line, match_count, re.DOTALL) - lines_count += match_count - if lines_count <= 17: - line = line.replace('^\n', ' ') - main_page_data = main_page_data + line - else: - temp_lines_count = lines_count - match_count - if temp_lines_count <= 17: - - for temp_line in line.splitlines(True): - temp_lines_count += 1 - if temp_lines_count <= 17: - temp_line = temp_line.replace('^\n', ' ') - main_page_data = main_page_data + temp_line - else: - additional_page_data = additional_page_data + temp_line - else: - additional_page_data = additional_page_data + line - f99_page_data["main_page"] = main_page_data - if lines_count > 17: - additional_pages_reminder = (lines_count - 17) % 49 - if additional_pages_reminder != 0: - additional_pages = ((lines_count - 17) // 49) + 1 - additional_lines = additional_page_data.splitlines(True) - - for additional_page_number in range(additional_pages): - start = (49 * (additional_page_number)) - end = (49 * (additional_page_number + 1)) - 1 - additional_lines_str = "".join(map(str, additional_lines[start:end])) - additional_lines_str = additional_lines_str.replace('^\n', ' ') - additional_page_dict = {additional_page_number: additional_lines_str} - f99_additional_page_data.append(additional_page_dict) - - f99_page_data["additional_pages"] = f99_additional_page_data - # convert to json data - f99_page_data_json = json.dumps(f99_page_data) - return f99_page_data_json - - -def split_f99_text_pages_html(json_data): - f99_page_data = {} - f99_additional_page_data = [] - f99_text = json_data['MISCELLANEOUS_TEXT'] - lines_count = 0 - main_page_data = '' - additional_pages = 0 - additional_page_data = '' - match_count = 0 - for line in f99_text.splitlines(True): - lines_count += 1 - if len(line) > 117: - (line, match_count) = re.subn("(.{1,117})( +|$)
?|(.{117})", "\\1\\3^
", line, match_count, re.DOTALL) - lines_count += match_count - if lines_count <= 17: - line = line.replace('^
', ' ') - main_page_data = main_page_data + line - else: - temp_lines_count = lines_count - match_count - if temp_lines_count <= 17: - - for temp_line in line.splitlines(True): - temp_lines_count += 1 - if temp_lines_count <= 17: - temp_line = temp_line.replace('^
', ' ') - main_page_data = main_page_data + temp_line - else: - additional_page_data = additional_page_data + temp_line - else: - additional_page_data = additional_page_data + line - f99_page_data["main_page"] = main_page_data - if lines_count > 17: - additional_pages_reminder = (lines_count - 17) % 49 - if additional_pages_reminder != 0: - additional_pages = ((lines_count - 17) // 49) + 1 - additional_lines = additional_page_data.splitlines(True) - - for additional_page_number in range(additional_pages): - start = (49 * (additional_page_number)) - end = (49 * (additional_page_number + 1)) - 1 - additional_lines_str = "".join(map(str, additional_lines[start:end])) - additional_lines_str = additional_lines_str.replace('^
', ' ') - additional_page_dict = {additional_page_number: additional_lines_str} - f99_additional_page_data.append(additional_page_dict) - - f99_page_data["additional_pages"] = f99_additional_page_data - # convert to json data - f99_page_data_json = json.dumps(f99_page_data) - return f99_page_data_json - -# def split_f99_text_pages(json_data): -# f99_page_data = {} -# f99_additional_page_data = [] -# f99_text = json_data['MISCELLANEOUS_TEXT'] -# lines_count = 0 -# main_page_data = '' -# additional_pages = 0 -# additional_page_data = '' -# match_count = 0 -# for line in f99_text.splitlines(True): -# lines_count += 1 -# if len(line) > 117: -# lines_count += 1 -# (line, match_count) = re.subn("(.{1,117})( +|$)\n?|(.{117})", "\\1^\n", line, match_count, re.DOTALL) -# lines_count += match_count -# if lines_count > 17: -# temp_lines_count = 0 -# for temp_line in line.splitlines(True): -# temp_lines_count += 1 -# if temp_lines_count <= 17: -# temp_line = temp_line.replace('^\n', ' ') -# main_page_data = main_page_data + temp_line -# else: -# additional_page_data = additional_page_data + temp_line -# if lines_count <= 17: -# line = line.replace('^\n', ' ') -# main_page_data = main_page_data + line -# else: -# additional_page_data = additional_page_data + line -# f99_page_data["main_page"] = main_page_data -# if lines_count > 17: -# additional_pages_reminder = (lines_count - 17) % 49 -# if additional_pages_reminder != 0: -# additional_pages = ((lines_count - 17) // 49) + 1 -# additional_lines = additional_page_data.splitlines(True) -# -# for additional_page_number in range(additional_pages): -# start = (49 * (additional_page_number)) -# end = (49 * (additional_page_number + 1)) - 1 -# additional_lines_str = "".join(map(str, additional_lines[start:end])) -# additional_lines_str = additional_lines_str.replace('^\n', ' ') -# additional_page_dict = {additional_page_number: additional_lines_str} -# f99_additional_page_data.append(additional_page_dict) -# -# f99_page_data["additional_pages"] = f99_additional_page_data -# # convert to json data -# f99_page_data_json = json.dumps(f99_page_data) -# return f99_page_data_json - -def print_f99(): - """ - This function is being invoked internally from controllers - HTTP request needs to have form_type, file, and attachment_file - form_type : F99 - json_file: please refer to below sample JSON - attachment_file: It is a PDF file that will be merged to the generated PDF file. - sample: - { - "REASON_TYPE":"MST", - "COMMITTEE_NAME":"DONALD J. TRUMP FOR PRESIDENT, INC.", - "FILER_FEC_ID_NUMBER":"C00580100", - "IMGNO":"201812179143565008", - "FILING_TIMESTAMP":"12/17/2018 17 : 09", - "STREET_1":"725 FIFTH AVENUE", - "STREET_2":"", - "CITY":"NEW YORK", - "STATE":"NY", - "ZIP":"10022", - "TREASURER_FULL_NAME":"CRATE, BRADLEY, , ,", - "TREASURER_NAME":"CRATE, BRADLEY, , ,", - "EF_STAMP":"[Electronically Filed]", - "DATE_SIGNED_MM":"01", - "DATE_SIGNED_DD":"28", - "DATE_SIGNED_YY":"2019", - "MISCELLANEOUS_TEXT":"This statement is in response to the Commission's letter to the Committee - dated November 12, 2018, regarding two items related to the - above-referenced report ('the Original Report')." - } - :return: return JSON response - sample: - { - "message": "", - "results": { - "file_name": "bd78435a70a70d656145dae89e0e22bb.pdf", - "file_url": "https://fecfile-dev-components.s3.amazonaws.com/output/bd78435a70a70d656145dae89e0e22bb.pdf" - }, - "success": "true" - } - """ - if 'json_file' in request.files: - json_file = request.files.get('json_file') - json_file_md5 = utils.md5_for_file(json_file) - json_file.stream.seek(0) - - infile = current_app.config['FORM_TEMPLATES_LOCATION'].format('F99') - json_file.save(current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5)) - outfile = current_app.config['OUTPUT_FILE_LOCATION'].format(json_file_md5) - - json_data = json.load(open(current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5))) - - # json_data['FILER_FEC_ID_NUMBER'] = json_data['FILER_FEC_ID_NUMBER'][1:] - - if json_data['REASON_TYPE'] == 'MST': - reason_type_data = {"REASON_TYPE_MST": "/MST"} - - if json_data['REASON_TYPE'] == 'MSM': - reason_type_data = {"REASON_TYPE_MSM": "/MSM"} - - if json_data['REASON_TYPE'] == 'MSI': - reason_type_data = {"REASON_TYPE_MSI": "/MSI"} - - if json_data['REASON_TYPE'] == 'MSW': - reason_type_data = {"REASON_TYPE_MSW": "/MSW"} - # open the input file - - input_stream = open(infile, "rb") - - pdf_reader = PdfFileReader(input_stream, strict=True) - - if "/AcroForm" in pdf_reader.trailer["/Root"]: - pdf_reader.trailer["/Root"]["/AcroForm"].update( - - {NameObject("/NeedAppearances"): BooleanObject(True)}) - - pdf_writer = PdfFileWriter() - - form.set_need_appearances_writer(pdf_writer) - - if "/AcroForm" in pdf_writer._root_object: - pdf_writer._root_object["/AcroForm"].update( - {NameObject("/NeedAppearances"): BooleanObject(True)}) - - for page_num in range(pdf_reader.numPages): - page_obj = pdf_reader.getPage(page_num) - - pdf_writer.addPage(page_obj) - - form.update_checkbox_values(page_obj, reason_type_data) - - pdf_writer.updatePageFormFieldValues(page_obj, json_data) - - # Add the F99 attachment - if 'attachment_file' in request.files: - # reading Attachment title file - attachment_title_file = current_app.config['FORM_TEMPLATES_LOCATION'].format('Attachment_Title') - attachment_title_stream = open(attachment_title_file, "rb") - attachment_title_reader = PdfFileReader(attachment_title_stream, strict=True) - attachment_stream = request.files.get('attachment_file') - attachment_reader = PdfFileReader(attachment_stream, strict=True) - - for attachment_page_num in range(attachment_reader.numPages): - attachment_page_obj = attachment_reader.getPage(attachment_page_num) - if attachment_page_num == 0: - attachment_page_obj.mergePage(attachment_title_reader.getPage(0)) - - pdf_writer.addPage(attachment_page_obj) - - output_stream = open(outfile, "wb") - - pdf_writer.write(output_stream) - - input_stream.close() - - output_stream.close() - - # push output file to AWS - s3 = boto3.client('s3') - s3.upload_file(outfile, current_app.config['AWS_FECFILE_COMPONENTS_BUCKET_NAME'], - current_app.config['OUTPUT_FILE_LOCATION'].format(json_file_md5), - ExtraArgs={'ContentType': "application/pdf", 'ACL': "public-read"}) - - response = { - # 'file_name': '{}.pdf'.format(json_file_md5), - 'pdf_url': current_app.config['PRINT_OUTPUT_FILE_URL'].format(json_file_md5) - } - - if flask.request.method == "POST": - envelope = common.get_return_envelope( - data=response +from routes.src import common, form + +from routes.src.utils import md5_for_text, md5_for_file, directory_files, error + + +def print_f99_pdftk_html( + stamp_print="", + paginate=False, + begin_image_num=None, + page_count=False, + file_content=None, + silent_print=False, + filing_timestamp=None, + rep_id=None, + attachment_file_content=None, +): + # check if json_file is in the request + # HTML("templates/forms/test.html").write_pdf("output/pdf/test/test.pdf") + # HTML(string='''
























This is bold text
This is underline text
This is italics text
Title

  1. one
  2. two
  3. three
''').write_pdf("output/pdf/test/test.pdf") + # pdfkit.from_file("templates/forms/test.html", "output/pdf/test/test.pdf") + # pypdftk.stamp(current_app.config['FORM_TEMPLATES_LOCATION'].format('F99'), "output/pdf/test/test.pdf", "output/pdf/test/output.pdf") + try: + silent_print = silent_print + txn_img_num = begin_image_num + filing_timestamp = filing_timestamp + + if ( + (page_count and file_content) + or ((paginate or silent_print) and file_content and begin_image_num) + or (not paginate and "json_file" in request.files) + ): + + if page_count and file_content: + json_file_md5 = md5_for_text(file_content) + json_data = json.loads(file_content) + + elif (paginate or silent_print) and file_content and begin_image_num: + # generate md5 for file_content + json_file_md5 = md5_for_text(file_content) + json_data = json.loads(file_content) + + elif not paginate and "json_file" in request.files: + json_file = request.files.get("json_file") + silent_print = ( + True + if request.form.get("silent_print") + and request.form.get("silent_print").lower() in ["true", "1"] + else False + ) + page_count = ( + True + if request.form.get("page_count") + and request.form.get("page_count").lower() in ["true", "1"] + else False + ) + + if silent_print: + txn_img_num = request.form.get("begin_image_num", None) + + if not txn_img_num: + if flask.request.method == "POST": + envelope = common.get_return_envelope( + "false", "begin_image_num is missing from your request" + ) + status_code = status.HTTP_400_BAD_REQUEST + return flask.jsonify(**envelope), status_code + txn_img_num = int(txn_img_num) + + filing_timestamp = request.form.get("filing_timestamp", None) + + json_file_md5 = md5_for_file(json_file) + json_file.stream.seek(0) + + # save json file as md5 file name + json_file.save( + current_app.config["REQUEST_FILE_LOCATION"].format(json_file_md5) + ) + + # load json file + json_data = json.load( + open( + current_app.config["REQUEST_FILE_LOCATION"].format( + json_file_md5 + ) + ) + ) + + md5_directory = current_app.config["OUTPUT_DIR_LOCATION"].format( + json_file_md5 ) - status_code = status.HTTP_201_CREATED - return flask.jsonify(**envelope), status_code - else: - if flask.request.method == "POST": - envelope = common.get_return_envelope( - 'false', 'JSON file is missing from your request' + # if paginate or page_count is True and directory exist then don't remove it + is_dir_exist = False + if os.path.isdir(md5_directory): + is_dir_exist = True + + os.makedirs(md5_directory, exist_ok=True) + # os.makedirs(md5_directory + "images", exist_ok=True) + if not os.path.exists(md5_directory + "images"): + shutil.copytree("templates/forms/F99/images", md5_directory + "images") + shutil.copyfile( + "templates/forms/F99/form-text.css", md5_directory + "form-text.css" ) - status_code = status.HTTP_400_BAD_REQUEST - return flask.jsonify(**envelope), status_code - - -def directory_files(directory): - files_list = [] - file_names = sorted(os.listdir(directory)) - for file_name in file_names: - files_list.append(directory+file_name) - return files_list - - -def print_f99_pdftk(stamp_print): - # check if json_file is in the request - - if 'json_file' in request.files: - total_no_of_pages = 1 - page_no = 1 - json_file = request.files.get('json_file') - # generate md5 for json file - json_file_md5 = utils.md5_for_file(json_file) - json_file.stream.seek(0) - md5_directory = current_app.config['OUTPUT_DIR_LOCATION'].format(json_file_md5) - os.makedirs(md5_directory, exist_ok=True) - infile = current_app.config['FORM_TEMPLATES_LOCATION'].format('F99') - # save json file as md5 file name - json_file.save(current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5)) - outfile = md5_directory+json_file_md5+'_temp.pdf' - json_data = json.load(open(current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5))) - # setting timestamp and imgno to empty as these needs to show up after submission - if stamp_print != 'stamp': - json_data['FILING_TIMESTAMP'] = '' - json_data['IMGNO'] = '' - - f99_pages_text_json = json.loads(split_f99_text_pages(json_data)) - json_data['MISCELLANEOUS_TEXT'] = f99_pages_text_json['main_page'] - total_no_of_pages += len(f99_pages_text_json['additional_pages']) - # checking if attachment_file exist - if 'attachment_file' in request.files: - # reading Attachment title file - attachment_title_file = current_app.config['FORM_TEMPLATES_LOCATION'].format('Attachment_Title') - attachment_file = request.files.get('attachment_file') - attachment_file.save(os.path.join(md5_directory + 'attachment_temp.pdf')) - os.makedirs(md5_directory + 'attachment', exist_ok=True) - os.makedirs(md5_directory + 'final_attachment', exist_ok=True) - pypdftk.split(md5_directory + 'attachment_temp.pdf', md5_directory+'attachment') - os.remove(md5_directory + 'attachment/doc_data.txt') - attachment_no_of_pages = pypdftk.get_num_pages(os.path.join(md5_directory + 'attachment_temp.pdf')) - attachment_page_no = total_no_of_pages - total_no_of_pages += attachment_no_of_pages - - # we are doing this to assign page numbers to attachment file - for filename in os.listdir(md5_directory+'attachment'): - attachment_page_no += 1 - pypdftk.fill_form(attachment_title_file, {"PAGESTR": "PAGE " + str(attachment_page_no) + " / " + str(total_no_of_pages)}, - md5_directory +'attachment/attachment_page_'+str(attachment_page_no)+'.pdf') - pypdftk.stamp(md5_directory+'attachment/'+filename, md5_directory + - 'attachment/attachment_page_'+str(attachment_page_no)+'.pdf', md5_directory + - 'final_attachment/attachment_'+str(attachment_page_no)+'.pdf') - pypdftk.concat(directory_files(md5_directory +'final_attachment/'), md5_directory + 'attachment.pdf') - os.remove(md5_directory + 'attachment_temp.pdf') - shutil.rmtree(md5_directory + 'attachment') - shutil.rmtree(md5_directory + 'final_attachment') - - json_data['PAGESTR'] = "PAGE " + str(page_no) + " / " + str(total_no_of_pages) - - pypdftk.fill_form(infile, json_data, outfile, flatten=False) - additional_page_counter = 0 - if len(f99_pages_text_json['additional_pages']) > 0: - continuation_file = current_app.config['FORM_TEMPLATES_LOCATION'].format('F99_CONT') - os.makedirs(md5_directory + 'merge', exist_ok=True) - for additional_page in f99_pages_text_json['additional_pages']: - page_no += 1 - continuation_outfile = md5_directory + 'merge/' + str(additional_page_counter)+'.pdf' - pypdftk.fill_form(continuation_file, {"PAGESTR": "PAGE "+str(page_no)+" / " + str(total_no_of_pages), - "CONTINOUS_TEXT": additional_page[str(additional_page_counter)]}, continuation_outfile) - pypdftk.concat([outfile, continuation_outfile], md5_directory + json_file_md5 + '_all_pages_temp.pdf') - shutil.copy(md5_directory + json_file_md5 + '_all_pages_temp.pdf', outfile) - additional_page_counter += 1 - os.remove(md5_directory + json_file_md5 + '_all_pages_temp.pdf') - - # Add the F99 attachment - if 'attachment_file' in request.files: - pypdftk.concat([outfile, md5_directory + 'attachment.pdf'], md5_directory + 'all_pages.pdf') - os.remove(md5_directory + 'attachment.pdf') - else: - shutil.copy(outfile, md5_directory + 'all_pages.pdf') - os.remove(md5_directory + json_file_md5 +'_temp.pdf') - # push output file to AWS - s3 = boto3.client('s3') - s3.upload_file(md5_directory + 'all_pages.pdf', current_app.config['AWS_FECFILE_COMPONENTS_BUCKET_NAME'], - md5_directory+'all_pages.pdf',ExtraArgs={'ContentType': "application/pdf", 'ACL': "public-read"}) - response = { - # 'file_name': '{}.pdf'.format(json_file_md5), - 'pdf_url': current_app.config['PRINT_OUTPUT_FILE_URL'].format(json_file_md5)+'all_pages.pdf' - } - - if flask.request.method == "POST": - envelope = common.get_return_envelope( - data=response + infile = current_app.config["HTML_FORM_TEMPLATES_LOCATION"].format( + "template" ) - status_code = status.HTTP_201_CREATED - return flask.jsonify(**envelope), status_code - - else: - - if flask.request.method == "POST": - envelope = common.get_return_envelope( - 'false', 'JSON file is missing from your request' + outfile = md5_directory + json_file_md5 + ".html" + + form99_json_data = json_data["data"] + + with open(infile) as inf: + txt = inf.read() + soup = bs4.BeautifulSoup(txt, features="html5lib") + soup.find( + "label", attrs={"id": "committeeName"} + ).string = form99_json_data["committeeName"] + soup.find("label", attrs={"id": "street1"}).string = form99_json_data[ + "street1" + ] + soup.find("label", attrs={"id": "street2"}).string = form99_json_data[ + "street2" + ] + soup.find("label", attrs={"id": "city"}).string = form99_json_data[ + "city" + ] + soup.find("label", attrs={"id": "state"}).string = form99_json_data[ + "state" + ] + soup.find("label", attrs={"id": "zipCode"}).string = form99_json_data[ + "zipCode" + ] + soup.find( + "span", attrs={"id": "committeeId"} + ).string = form99_json_data["committeeId"] + + name_list = ["LastName", "FirstName", "MiddleName", "Prefix", "Suffix"] + + treasurerFullName = "" + for item in name_list: + item = "treasurer" + item + if form99_json_data.get(item): + treasurerFullName += form99_json_data.get(item) + ", " + soup.find( + "label", attrs={"id": "treasurerFullName"} + ).string = treasurerFullName[:-2] + + soup.find("label", attrs={"id": "treasurerName"}).string = ( + ( + form99_json_data.get("treasurerLastName", "") + + ", " + + form99_json_data.get("treasurerFirstName", "") + ) + .strip() + .rstrip(",") + .strip() + ) + + f99_html_data = form99_json_data["text"] + soup.find("label", attrs={"id": "text"}).string = f99_html_data + soup.find( + "label", attrs={"id": form99_json_data["reason"]} + ).string = "X" + + date_array = form99_json_data["dateSigned"].split("/") + soup.find("span", attrs={"id": "dateSignedMonth"}).string = str( + date_array[0] + ) + soup.find("span", attrs={"id": "dateSignedDate"}).string = str( + date_array[1] + ) + soup.find("span", attrs={"id": "dateSignedYear"}).string = str( + date_array[2] + ) + + with open(outfile, "w") as output_file: + output_file.write( + str(soup).replace("<", "<").replace(">", ">") + ) + + # F99 PDF page padding options + options = { + "margin-top": "0.40in", + "margin-right": "0.20in", + "margin-bottom": "0.40in", + "margin-left": "0.20in", + } + + # HTML(outfile).write_pdf(md5_directory + json_file_md5 + '.pdf', stylesheets=[CSS(current_app.config['FORMS_LOCATION'].format('F99.css'))]) + pdfkit.from_file( + outfile, md5_directory + json_file_md5 + ".pdf", options=options + ) + # pdfkit.from_file(outfile, md5_directory + json_file_md5 + '.pdf') + + total_no_of_pages = pypdftk.get_num_pages( + md5_directory + json_file_md5 + ".pdf" + ) + + # checking if attachment_file exist + if ((paginate or page_count) and attachment_file_content) or ( + not paginate and "attachment_file" in request.files + ): + # reading Attachment title file + attachment_title_file = current_app.config[ + "FORM_TEMPLATES_LOCATION" + ].format("Attachment_Title") + + if (paginate or page_count) and attachment_file_content: + attachment_file = json.loads(attachment_file_content) + else: + attachment_file = request.files.get("attachment_file") + + attachment_file.save( + os.path.join(md5_directory + "attachment_temp.pdf") + ) + os.makedirs(md5_directory + "attachment", exist_ok=True) + os.makedirs(md5_directory + "final_attachment", exist_ok=True) + pypdftk.split( + md5_directory + "attachment_temp.pdf", md5_directory + "attachment" + ) + os.remove(md5_directory + "attachment/doc_data.txt") + attachment_no_of_pages = pypdftk.get_num_pages( + os.path.join(md5_directory + "attachment_temp.pdf") + ) + attachment_page_no = total_no_of_pages + total_no_of_pages += attachment_no_of_pages + + # we are doing this to assign page numbers to attachment file + for filename in os.listdir(md5_directory + "attachment"): + attachment_page_no += 1 + page_dict = {} + page_dict["PAGESTR"] = ( + "PAGE " + + str(attachment_page_no) + + " / " + + str(total_no_of_pages) + ) + + if silent_print: + page_dict["IMGNO"] = txn_img_num + attachment_page_no + + pypdftk.fill_form( + attachment_title_file, + md5_directory + + "attachment/attachment_page_" + + str(attachment_page_no) + + ".pdf", + ) + pypdftk.stamp( + md5_directory + "attachment/" + filename, + md5_directory + + "attachment/attachment_page_" + + str(attachment_page_no) + + ".pdf", + md5_directory + + "final_attachment/attachment_" + + str(attachment_page_no) + + ".pdf", + ) + pypdftk.concat( + directory_files(md5_directory + "final_attachment/"), + md5_directory + "attachment.pdf", + ) + os.remove(md5_directory + "attachment_temp.pdf") + + os.makedirs(md5_directory + "pages", exist_ok=True) + os.makedirs(md5_directory + "final_pages", exist_ok=True) + pypdftk.split( + md5_directory + json_file_md5 + ".pdf", md5_directory + "pages" + ) + os.remove(md5_directory + "pages/doc_data.txt") + + f99_page_no = 1 + for filename in os.listdir(md5_directory + "pages"): + page_dict = {} + page_dict["PAGESTR"] = ( + "PAGE " + str(f99_page_no) + " / " + str(total_no_of_pages) + ) + + if silent_print: + page_dict["IMGNO"] = txn_img_num + txn_img_num += 1 + # need to print timestamp on first page only + if filing_timestamp and f99_page_no == 1: + page_dict["FILING_TIMESTAMP"] = filing_timestamp + + page_number_file = current_app.config["FORM_TEMPLATES_LOCATION"].format( + "Page_Number" + ) + pypdftk.fill_form( + page_number_file, + page_dict, + md5_directory + + "pages/page_number_" + + str(f99_page_no).zfill(6) + + ".pdf", + ) + pypdftk.stamp( + md5_directory + + "pages/page_number_" + + str(f99_page_no).zfill(6) + + ".pdf", + md5_directory + "pages/" + filename, + md5_directory + + "final_pages/page_" + + str(f99_page_no).zfill(6) + + ".pdf", + ) + f99_page_no += 1 + + pypdftk.concat( + directory_files(md5_directory + "final_pages/"), + json_file_md5 + "_temp.pdf", ) - status_code = status.HTTP_400_BAD_REQUEST - return flask.jsonify(**envelope), status_code - -def print_f99_pdftk_html(stamp_print): - # check if json_file is in the request - # HTML("templates/forms/test.html").write_pdf("output/pdf/test/test.pdf") - # HTML(string='''
























This is bold text
This is underline text
This is italics text
Title

  1. one
  2. two
  3. three
''').write_pdf("output/pdf/test/test.pdf") - # pdfkit.from_file("templates/forms/test.html", "output/pdf/test/test.pdf") - # pypdftk.stamp(current_app.config['FORM_TEMPLATES_LOCATION'].format('F99'), "output/pdf/test/test.pdf", "output/pdf/test/output.pdf") + if ((paginate or page_count) and attachment_file_content) or ( + not paginate and "attachment_file" in request.files + ): + pypdftk.concat( + [json_file_md5 + "_temp.pdf", md5_directory + "attachment.pdf"], + md5_directory + "all_pages.pdf", + ) + shutil.rmtree(md5_directory + "attachment") + shutil.rmtree(md5_directory + "final_attachment") + os.remove(md5_directory + "attachment.pdf") + else: + shutil.move( + json_file_md5 + "_temp.pdf", md5_directory + "all_pages.pdf" + ) + # clean up task + shutil.rmtree(md5_directory + "pages") + shutil.rmtree(md5_directory + "final_pages") + os.remove(md5_directory + json_file_md5 + ".pdf") + # if flask.request.method == "POST": - if 'json_file' in request.files: - total_no_of_pages = 1 - page_no = 1 - json_file = request.files.get('json_file') - # generate md5 for json file - json_file_md5 = utils.md5_for_file(json_file) - json_file.stream.seek(0) - md5_directory = current_app.config['OUTPUT_DIR_LOCATION'].format(json_file_md5) - os.makedirs(md5_directory, exist_ok=True) - # os.makedirs(md5_directory + "images", exist_ok=True) - if not os.path.exists(md5_directory + "images"): - shutil.copytree("templates/forms/F99/images", md5_directory + "images") - shutil.copyfile("templates/forms/F99/form-text.css", md5_directory + "form-text.css") - infile = current_app.config['HTML_FORM_TEMPLATES_LOCATION'].format('template') - json_file.save(current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5)) - outfile = md5_directory + json_file_md5 + '.html' - json_data = json.load(open(current_app.config['REQUEST_FILE_LOCATION'].format(json_file_md5))) - form99_json_data = json_data['data'] - # load the file - with open(infile) as inf: - txt = inf.read() - soup = bs4.BeautifulSoup(txt) - soup.find('label', attrs={'id': 'committeeName'}).string = form99_json_data['committeeName'] - soup.find('label', attrs={'id': 'street1'}).string = form99_json_data['street1'] - soup.find('label', attrs={'id': 'street2'}).string = form99_json_data['street2'] - soup.find('label', attrs={'id': 'city'}).string = form99_json_data['city'] - soup.find('label', attrs={'id': 'state'}).string = form99_json_data['state'] - soup.find('label', attrs={'id': 'zipCode'}).string = form99_json_data['zipCode'] - soup.find('span', attrs={'id': 'committeeId'}).string = form99_json_data['committeeId'] - soup.find('label', attrs={'id': 'treasurerFullName'}).string = form99_json_data['treasurerLastName'] + \ - ', ' + form99_json_data['treasurerFirstName'] \ - + ', ' + form99_json_data['treasurerMiddleName'] \ - + ', ' + form99_json_data['treasurerPrefix'] \ - + ', ' + form99_json_data['treasurerSuffix'] - soup.find('label', attrs={'id': 'treasurerName'}).string = form99_json_data['treasurerLastName'] + \ - ', ' + form99_json_data['treasurerFirstName'] - f99_html_data = form99_json_data['text'] - soup.find('label', attrs={'id': 'text'}).string = f99_html_data - soup.find('label', attrs={'id': form99_json_data['reason']}).string = 'X' - - date_array = form99_json_data['dateSigned'].split("/") - soup.find('span', attrs={'id': 'dateSignedMonth'}).string = str(date_array[0]) - soup.find('span', attrs={'id': 'dateSignedDate'}).string = str(date_array[1]) - soup.find('span', attrs={'id': 'dateSignedYear'}).string = str(date_array[2]) - - - with open(outfile, "w") as output_file: - output_file.write(str(soup).replace("<", "<").replace(">", ">")) - - # F99 PDF page padding options - # options = { - # 'margin-top': '0.36in', - # 'margin-right': '0.25in', - # 'margin-bottom': '0.39in', - # 'margin-left': '0.25in' - # } - options = { - 'margin-top': '0.40in', - 'margin-right': '0.20in', - 'margin-bottom': '0.40in', - 'margin-left': '0.20in' + response = { + # 'file_name': ent_app.conf'{}.pdf'.format(json_file_md5), + "total_pages": total_no_of_pages, } - # HTML(outfile).write_pdf(md5_directory + json_file_md5 + '.pdf', stylesheets=[CSS(current_app.config['FORMS_LOCATION'].format('F99.css'))]) - pdfkit.from_file(outfile, md5_directory + json_file_md5 + '.pdf', options=options) - # pdfkit.from_file(outfile, md5_directory + json_file_md5 + '.pdf') - - total_no_of_pages = pypdftk.get_num_pages(md5_directory + json_file_md5 + '.pdf') - page_number_file = current_app.config['FORM_TEMPLATES_LOCATION'].format('Page_Number') - - - - # checking if attachment_file exist - if 'attachment_file' in request.files: - # reading Attachment title file - attachment_title_file = current_app.config['FORM_TEMPLATES_LOCATION'].format('Attachment_Title') - attachment_file = request.files.get('attachment_file') - attachment_file.save(os.path.join(md5_directory + 'attachment_temp.pdf')) - os.makedirs(md5_directory + 'attachment', exist_ok=True) - os.makedirs(md5_directory + 'final_attachment', exist_ok=True) - pypdftk.split(md5_directory + 'attachment_temp.pdf', md5_directory+'attachment') - os.remove(md5_directory + 'attachment/doc_data.txt') - attachment_no_of_pages = pypdftk.get_num_pages(os.path.join(md5_directory + 'attachment_temp.pdf')) - attachment_page_no = total_no_of_pages - total_no_of_pages += attachment_no_of_pages - - # we are doing this to assign page numbers to attachment file - for filename in os.listdir(md5_directory+'attachment'): - attachment_page_no += 1 - pypdftk.fill_form(attachment_title_file, {"PAGESTR": "PAGE " + str(attachment_page_no) + " / " + str(total_no_of_pages)}, - md5_directory +'attachment/attachment_page_'+str(attachment_page_no)+'.pdf') - pypdftk.stamp(md5_directory+'attachment/'+filename, md5_directory + - 'attachment/attachment_page_'+str(attachment_page_no)+'.pdf', md5_directory + - 'final_attachment/attachment_'+str(attachment_page_no)+'.pdf') - pypdftk.concat(directory_files(md5_directory +'final_attachment/'), md5_directory + 'attachment.pdf') - os.remove(md5_directory + 'attachment_temp.pdf') - # shutil.rmtree(md5_directory + 'attachment') - # shutil.rmtree(md5_directory + 'final_attachment') - # pypdftk.concat([md5_directory + json_file_md5 + '.pdf', md5_directory + 'attachment.pdf'], md5_directory + 'all_pages_temp.pdf') - # else: - # shutil.move(md5_directory + json_file_md5 + '.pdf', md5_directory + 'all_pages_temp.pdf') - os.makedirs(md5_directory + 'pages', exist_ok=True) - os.makedirs(md5_directory + 'final_pages', exist_ok=True) - pypdftk.split(md5_directory + json_file_md5 + '.pdf', md5_directory + 'pages') - os.remove(md5_directory + 'pages/doc_data.txt') - f99_page_no = 1 - for filename in os.listdir(md5_directory + 'pages'): - pypdftk.fill_form(page_number_file, - {"PAGESTR": "PAGE " + str(f99_page_no) + " / " + str(total_no_of_pages)}, - md5_directory + 'pages/page_number_' + str(f99_page_no) + '.pdf') - pypdftk.stamp(md5_directory + - 'pages/page_number_' + str(f99_page_no) + '.pdf', md5_directory + 'pages/' + filename, md5_directory + - 'final_pages/page_' + str(f99_page_no) + '.pdf') - f99_page_no += 1 - - pypdftk.concat(directory_files(md5_directory + 'final_pages/'), json_file_md5 + '_temp.pdf') - - if 'attachment_file' in request.files: - pypdftk.concat([json_file_md5 + '_temp.pdf', md5_directory + 'attachment.pdf'], md5_directory + 'all_pages.pdf') - shutil.rmtree(md5_directory + 'attachment') - shutil.rmtree(md5_directory + 'final_attachment') - os.remove(md5_directory + 'attachment.pdf') - else: - shutil.move(json_file_md5 + '_temp.pdf', md5_directory + 'all_pages.pdf') - - # clean up task - shutil.rmtree(md5_directory + 'pages') - shutil.rmtree(md5_directory + 'final_pages') - # os.remove(md5_directory + json_file_md5 + '.html') - # shutil.rmtree(md5_directory + 'images') - # os.remove(md5_directory + 'form-text.css') - os.remove(md5_directory + json_file_md5 + '.pdf') - - - - # for f99_page_no in range(f99_no_of_pages): - # pypdftk.fill_form(page_number_file, - # {"PAGESTR": "PAGE " + str(f99_page_no+1) + " / " + str(total_no_of_pages)}, - # md5_directory + 'pages/page_' + str(f99_page_no+1) + '.pdf') - # pypdftk.stamp(md5_directory + json_file_md5 + '.pdf', md5_directory + - # 'pages/page_' + str(f99_page_no+1) + '.pdf', md5_directory + json_file_md5 + '_temp.pdf') - - # json_data['PAGESTR'] = "PAGE " + str(page_no) + " / " + str(total_no_of_pages) - - # json_data['MISCELLANEOUS_TEXT'] = '' - # xfdf_path = pypdftk.gen_xfdf(json_data) - # pypdftk.fill_form(infile, json_data, outfile) - - - # HTML(string='''
























0: - # continuation_file = current_app.config['FORM_TEMPLATES_LOCATION'].format('F99_CONT') - # os.makedirs(md5_directory + 'merge', exist_ok=True) - # for additional_page in f99_pages_text_json['additional_pages']: - # page_no += 1 - # continuation_outfile = md5_directory + 'merge/' + str(additional_page_counter)+'.pdf' - # pypdftk.fill_form(continuation_file, {"PAGESTR": "PAGE "+str(page_no)+" / " + str(total_no_of_pages), - # "CONTINOUS_TEXT": additional_page[str(additional_page_counter)]}, continuation_outfile) - # pypdftk.concat([outfile, continuation_outfile], md5_directory + json_file_md5 + '_all_pages_temp.pdf') - # shutil.copy(md5_directory + json_file_md5 + '_all_pages_temp.pdf', outfile) - # additional_page_counter += 1 - # os.remove(md5_directory + json_file_md5 + '_all_pages_temp.pdf') - # - # # Add the F99 attachment - # if 'attachment_file' in request.files: - # pypdftk.concat([outfile, md5_directory + 'attachment.pdf'], md5_directory + 'all_pages.pdf') - # os.remove(md5_directory + 'attachment.pdf') - # else: - # shutil.copy(outfile, md5_directory + 'all_pages.pdf') - # os.remove(md5_directory + json_file_md5 +'_temp.pdf') - # push output file to AWS - s3 = boto3.client('s3') - s3.upload_file(md5_directory + 'all_pages.pdf', current_app.config['AWS_FECFILE_COMPONENTS_BUCKET_NAME'], - md5_directory+'all_pages.pdf',ExtraArgs={'ContentType': "application/pdf", 'ACL': "public-read"}) - response = { - # 'file_name': '{}.pdf'.format(json_file_md5), - 'pdf_url': current_app.config['PRINT_OUTPUT_FILE_URL'].format(json_file_md5)+'all_pages.pdf' - } - - if flask.request.method == "POST": - envelope = common.get_return_envelope( - data=response - ) - status_code = status.HTTP_201_CREATED - return flask.jsonify(**envelope), status_code - - else: - if flask.request.method == "POST": - envelope = common.get_return_envelope( - 'false', 'JSON file is missing from your request' + if not page_count and not paginate: + s3 = boto3.client("s3") + extraArgs = {"ContentType": "application/pdf", "ACL": "public-read"} + + if silent_print: + response["pdf_url"] = current_app.config['S3_FILE_URL'] + rep_id + '.pdf' + s3.upload_file( + md5_directory + 'all_pages.pdf', + current_app.config['AWS_FECFILE_COMPONENTS_BUCKET_NAME'], + current_app.config['AWS_FECFILE_OUTPUT_DIRECTORY'] + '/' + + str(rep_id) + '.pdf', + ExtraArgs=extraArgs) + else: + response["pdf_url"] = ( + current_app.config["PRINT_OUTPUT_FILE_URL"].format( + json_file_md5 + ) + + "all_pages.pdf", + ) + + s3.upload_file( + md5_directory + "all_pages.pdf", + current_app.config["AWS_FECFILE_COMPONENTS_BUCKET_NAME"], + md5_directory + "all_pages.pdf", + ExtraArgs=extraArgs, + ) + else: + if not is_dir_exist: + shutil.rmtree(md5_directory) + if paginate: + txn_img_json = { + "summary": { + "committeeId": form99_json_data.get("committeeId", None), + "begin_image_num": begin_image_num, + "end_image_num": txn_img_num + } + } + response["txn_img_json"] = txn_img_json + + envelope = common.get_return_envelope(data=response) + status_code = ( + status.HTTP_200_OK + if page_count or paginate + else status.HTTP_201_CREATED ) - status_code = status.HTTP_400_BAD_REQUEST return flask.jsonify(**envelope), status_code + # elif page_count or paginate: + # if not is_dir_exist: + # shutil.rmtree(md5_directory) + # response = { + # "total_pages": total_no_of_pages, + # } + # elif paginate: + # txn_img_json = { + # 'summary' : { + # 'committeeId': form99_json_data.get('committeeId', None) + # } + # } + # response['txn_img_json'] = txn_img_json + # return True, response + # elif silent_print and not flask.request.method == "POST": + # return True, {} + else: + if paginate or page_count or silent_print: + envelope = common.get_return_envelope(False, "") + else: + # elif flask.request.method == "POST": + envelope = common.get_return_envelope( + False, "json_file is missing from your request" + ) + return flask.jsonify(**envelope), status.HTTP_400_BAD_REQUEST + except Exception as e: + traceback.print_exception(*sys.exc_info()) + return error("Error generating print preview, error message: " + str(e)) diff --git a/routes/src/schedules/sb_schedule.py b/routes/src/schedules/sb_schedule.py index cf06fa8..31650f3 100644 --- a/routes/src/schedules/sb_schedule.py +++ b/routes/src/schedules/sb_schedule.py @@ -1,5 +1,8 @@ import os import pypdftk +import sys +import traceback +import logging from flask import current_app from os import path @@ -18,86 +21,95 @@ def print_sb_line( total_no_of_pages, image_num=None, ): + try: + if sb_list: + last_page_cnt = 3 if len(sb_list) % 3 == 0 else len(sb_list) % 3 + schedule_total = 0 + os.makedirs(md5_directory + "SB/" + line_number, exist_ok=True) + sb_infile = current_app.config["FORM_TEMPLATES_LOCATION"].format("SB") + + for page_num in range(page_cnt): + current_page_num += 1 + memo_array = [] + last_page = False + schedule_page_dict = {} + schedule_page_dict["lineNumber"] = line_number + schedule_page_dict["pageNo"] = current_page_num + schedule_page_dict["totalPages"] = total_no_of_pages + + if image_num: + schedule_page_dict["IMGNO"] = image_num + image_num += 1 + + page_start_index = page_num * 3 + if page_num + 1 == page_cnt: + last_page = True + + # This call prepares data to render on PDF + build_sb_per_page_schedule_dict( + last_page, + last_page_cnt, + page_start_index, + schedule_page_dict, + sb_list, + memo_array, + ) + try: + schedule_total += float(schedule_page_dict["pageSubtotal"]) + + if page_cnt == page_num + 1: + schedule_page_dict["scheduleTotal"] = "{0:.2f}".format(schedule_total) + schedule_page_dict["committeeName"] = f3x_data["committeeName"] + sb_outfile = ( + md5_directory + "SB/" + line_number + "/page_" + str(page_num) + ".pdf" + ) + pypdftk.fill_form(sb_infile, schedule_page_dict, sb_outfile) + + # Memo text changes and build memo pages and return updated current_page_num + current_page_num, image_num = build_memo_page( + memo_array, + md5_directory, + line_number, + current_page_num, + page_num, + total_no_of_pages, + sb_outfile, + name="SB", + image_num=image_num, + ) - if sb_list: - last_page_cnt = 3 if len(sb_list) % 3 == 0 else len(sb_list) % 3 - schedule_total = 0 - os.makedirs(md5_directory + "SB/" + line_number, exist_ok=True) - sb_infile = current_app.config["FORM_TEMPLATES_LOCATION"].format("SB") - - for page_num in range(page_cnt): - current_page_num += 1 - memo_array = [] - last_page = False - schedule_page_dict = {} - schedule_page_dict["lineNumber"] = line_number - schedule_page_dict["pageNo"] = current_page_num - schedule_page_dict["totalPages"] = total_no_of_pages - - if image_num: - schedule_page_dict["IMGNO"] = image_num - image_num += 1 - - page_start_index = page_num * 3 - if page_num + 1 == page_cnt: - last_page = True - - # This call prepares data to render on PDF - build_sb_per_page_schedule_dict( - last_page, - last_page_cnt, - page_start_index, - schedule_page_dict, - sb_list, - memo_array, - ) - - schedule_total += float(schedule_page_dict["pageSubtotal"]) - - if page_cnt == page_num + 1: - schedule_page_dict["scheduleTotal"] = "{0:.2f}".format(schedule_total) - schedule_page_dict["committeeName"] = f3x_data["committeeName"] - sb_outfile = ( - md5_directory + "SB/" + line_number + "/page_" + str(page_num) + ".pdf" - ) - pypdftk.fill_form(sb_infile, schedule_page_dict, sb_outfile) - - # Memo text changes and build memo pages and return updated current_page_num - current_page_num, image_num = build_memo_page( - memo_array, - md5_directory, - line_number, - current_page_num, - page_num, - total_no_of_pages, - sb_outfile, - name="SB", - image_num=image_num, - ) - - pypdftk.concat( - directory_files(md5_directory + "SB/" + line_number + "/"), - md5_directory + "SB/" + line_number + "/all_pages.pdf", - ) - if path.isfile(md5_directory + "SB/all_pages.pdf"): - pypdftk.concat( - [ - md5_directory + "SB/all_pages.pdf", + pypdftk.concat( + directory_files(md5_directory + "SB/" + line_number + "/"), md5_directory + "SB/" + line_number + "/all_pages.pdf", - ], - md5_directory + "SB/temp_all_pages.pdf", - ) - os.rename( - md5_directory + "SB/temp_all_pages.pdf", - md5_directory + "SB/all_pages.pdf", - ) - else: - os.rename( - md5_directory + "SB/" + line_number + "/all_pages.pdf", - md5_directory + "SB/all_pages.pdf", - ) - - return current_page_num, image_num + ) + if path.isfile(md5_directory + "SB/all_pages.pdf"): + pypdftk.concat( + [ + md5_directory + "SB/all_pages.pdf", + md5_directory + "SB/" + line_number + "/all_pages.pdf", + ], + md5_directory + "SB/temp_all_pages.pdf", + ) + os.rename( + md5_directory + "SB/temp_all_pages.pdf", + md5_directory + "SB/all_pages.pdf", + ) + else: + os.rename( + md5_directory + "SB/" + line_number + "/all_pages.pdf", + md5_directory + "SB/all_pages.pdf", + ) + except: + logging.error('**** Start - Error inside if condition ****') + # printing stack trace + traceback.print_exception(*sys.exc_info()) + logging.error('**** End - Error inside if condition ****') + + + return current_page_num, image_num + except: + # printing stack trace + traceback.print_exception(*sys.exc_info()) # This method builds data for individual SB page diff --git a/routes/src/schedules/sc1_schedule.py b/routes/src/schedules/sc1_schedule.py index 88c9d2c..3e3f408 100644 --- a/routes/src/schedules/sc1_schedule.py +++ b/routes/src/schedules/sc1_schedule.py @@ -45,7 +45,7 @@ def print_sc1_line( sc1_schedule_page_dict["loanIncurredDateDay"] = date_array[1] sc1_schedule_page_dict["loanIncurredDateYear"] = date_array[2] - if sc1.get("loanDueDate") != "": + if sc1.get("loanDueDate") not in ["none", "null", " ", "", None]: if "-" in sc1.get("loanDueDate"): date_array = sc1.get("loanDueDate").split("-") if len(date_array) == 3: diff --git a/routes/src/schedules/sc_schedule.py b/routes/src/schedules/sc_schedule.py index 6f2544c..191c157 100644 --- a/routes/src/schedules/sc_schedule.py +++ b/routes/src/schedules/sc_schedule.py @@ -370,7 +370,6 @@ def print_sc_line( if image_num: sc_schedule_page_dict["IMGNO"] = image_num - print("Bug", image_num) image_num += 1 if sc_schedules[len(sc_schedules) - 1].get( diff --git a/routes/src/schedules/sd_schedule.py b/routes/src/schedules/sd_schedule.py index 25e7d4c..8b9f1ac 100644 --- a/routes/src/schedules/sd_schedule.py +++ b/routes/src/schedules/sd_schedule.py @@ -38,9 +38,7 @@ def print_sd_line( concat_no = index % 3 + 1 if image_num: - print("Schedule D image number", image_num) sd_page_dict["IMGNO"] = image_num - image_num += 1 if ( "creditorOrganizationName" in sd_list[index] @@ -149,6 +147,8 @@ def print_sd_line( md5_directory + "SD/all_pages.pdf", ) page_count += 1 + if image_num: + image_num += 1 sd_sub_total = 0 return sd_total_balance, image_num diff --git a/routes/src/schedules/se_schedule.py b/routes/src/schedules/se_schedule.py index 58bd8a5..3f0c916 100644 --- a/routes/src/schedules/se_schedule.py +++ b/routes/src/schedules/se_schedule.py @@ -143,7 +143,6 @@ def print_se_line( md5_directory + "SE/" + line_number + "/all_pages.pdf", md5_directory + "SE/all_pages.pdf", ) - return current_page_num, image_num except: traceback.print_exception(*sys.exc_info()) diff --git a/routes/src/schedules/sh2_schedule.py b/routes/src/schedules/sh2_schedule.py index 3457588..ea9097a 100644 --- a/routes/src/schedules/sh2_schedule.py +++ b/routes/src/schedules/sh2_schedule.py @@ -97,11 +97,11 @@ def build_sh2_info_dict(index, key, schedule_dict, schedule_page_dict): try: for key in schedule_dict: if key in ["fundraising", "directCandidateSupport"]: - if key == 'fundraising' and schedule_dict[key]: - schedule_dict[key] = 'f' - if key == 'directCandidateSupport' and schedule_dict[key]: - schedule_dict[key] = 'd' - schedule_dict[key] = schedule_dict[key] + if key == "fundraising" and schedule_dict[key]: + schedule_dict[key] = "f" + if key == "directCandidateSupport" and schedule_dict[key]: + schedule_dict[key] = "d" + # schedule_dict[key] = schedule_dict[key] if key in ["federalPercent", "nonFederalPercent"]: schedule_dict[key] = "{:.2f}".format(float(schedule_dict[key])) diff --git a/routes/src/schedules/sh3_schedule.py b/routes/src/schedules/sh3_schedule.py index 92e5a85..c92155d 100644 --- a/routes/src/schedules/sh3_schedule.py +++ b/routes/src/schedules/sh3_schedule.py @@ -1,9 +1,11 @@ import os import pypdftk +from math import ceil from flask import current_app from os import path from routes.src.utils import directory_files +from routes.src.f3x.helper import get_sh3_page_count, make_sh3_dict def print_sh3_line( @@ -19,165 +21,165 @@ def print_sh3_line( if sh3_list: os.makedirs(md5_directory + "SH3/" + line_number, exist_ok=True) sh3_infile = current_app.config["FORM_TEMPLATES_LOCATION"].format("SH3") - sh3_line_dict = [] - sh3_line_transaction = [] - total_dict = {} - t_transfered = {} - # dc_subtotal = 0 - # df_subtotal = 0 - for sh3 in sh3_list: - a_n = sh3["accountName"] - hash_check = "%s-%s" % (sh3["accountName"], sh3["receiptDate"]) - if hash_check not in sh3_line_transaction: - sh3_line_transaction.append(hash_check) - ind = sh3_line_transaction.index(hash_check) - - if len(sh3_line_dict) <= ind: - sh3_line_dict.insert(ind, sh3) - - if sh3["activityEventType"] == "DF": - ind = sh3_line_transaction.index(hash_check) - if sh3_line_dict[ind].get("dfsubs"): - sh3_line_dict[ind]["dfsubs"].append(sh3) - sh3_line_dict[ind]["dftotal"] += sh3["transferredAmount"] - else: - sh3_line_dict[ind]["dfsubs"] = [sh3] - sh3_line_dict[ind]["dftotal"] = sh3["transferredAmount"] - elif sh3["activityEventType"] == "DC": - ind = sh3_line_transaction.index(hash_check) - if sh3_line_dict[ind].get("dcsubs"): - sh3_line_dict[ind]["dcsubs"].append(sh3) - sh3_line_dict[ind]["dctotal"] += sh3["transferredAmount"] - else: - sh3_line_dict[ind]["dcsubs"] = [sh3] - sh3_line_dict[ind]["dctotal"] = sh3["transferredAmount"] - else: - ind = sh3_line_transaction.index(hash_check) - if sh3_line_dict[ind].get("subs"): - sh3_line_dict[ind]["subs"].append(sh3) - else: - sh3_line_dict[ind]["subs"] = [sh3] - - if ind in t_transfered: - t_transfered[ind] += sh3["transferredAmount"] - else: - t_transfered[ind] = sh3["transferredAmount"] - - if a_n in total_dict and sh3["activityEventType"] in total_dict[a_n]: - total_dict[a_n][sh3["activityEventType"]] += sh3["transferredAmount"] - total_dict[a_n]["lastpage"] = ind - elif a_n in total_dict: - total_dict[a_n][sh3["activityEventType"]] = sh3["transferredAmount"] - total_dict[a_n]["lastpage"] = ind - else: - total_dict[a_n] = { - sh3["activityEventType"]: sh3["transferredAmount"], - "lastpage": ind, - } - - if sh3_line_page_cnt > 0: - sh3_line_start_page += 1 - for sh3_page_no, sh3_page in enumerate(sh3_line_dict): - # page_subtotal = 0.00 - last_page = False + + sh3_dict = make_sh3_dict(sh3_list) + + current_page_num = 0 + + for hash_check, hash_check_value in sh3_dict.items(): + hash_check_total_pages = get_sh3_page_count(hash_check_value) + account_name = hash_check.split("@@")[0] + receipt_date = None + + if len(hash_check.split("@@")[0]) > 1: + receipt_date = hash_check.split("@@")[1] + + event_type_dict = { + "AD": {"current_index": 0, "amount": 0}, + "GV": {"current_index": 0, "amount": 0}, + "EA": {"current_index": 0, "amount": 0}, + "DC": {"current_index": 0, "amount": 0}, + "DF": {"current_index": 0, "amount": 0}, + "PC": {"current_index": 0, "amount": 0}, + } + + total_amount = 0 + while hash_check_total_pages: + hash_check_total_pages -= 1 + current_page_total_amount = 0 + + current_page_num += 1 + sh3_schedule_page_dict = {} + sh3_schedule_page_dict["lineNumber"] = line_number - sh3_schedule_page_dict["pageNo"] = sh3_line_start_page + sh3_page_no + sh3_schedule_page_dict["pageNo"] = ( + sh3_line_start_page + current_page_num + ) sh3_schedule_page_dict["totalPages"] = total_no_of_pages - acc_name = sh3_page.get("accountName") - lastpage_c = total_dict[acc_name]["lastpage"] if image_num: sh3_schedule_page_dict["IMGNO"] = image_num image_num += 1 - # page_start_index = sh3_page_no * 1 - if sh3_page_no == lastpage_c: - last_page = True - # This call prepares data to render on PDF - # sh3_schedule_page_dict['adtransactionId'] = sh3_page['transactionId'] - # sh3_schedule_page_dict['adtransferredAmount'] = t_transfered[sh3_page_no] - sh3_schedule_page_dict["accountName"] = acc_name - sh3_schedule_page_dict["totalAmountTransferred"] = "{0:.2f}".format( - float(t_transfered[sh3_page_no]) - ) - - if "receiptDate" in sh3_page: + sh3_schedule_page_dict["accountName"] = account_name - date_array = sh3_page["receiptDate"].split("/") + if receipt_date: + date_array = receipt_date.split("/") sh3_schedule_page_dict["receiptDateMonth"] = date_array[0] sh3_schedule_page_dict["receiptDateDay"] = date_array[1] sh3_schedule_page_dict["receiptDateYear"] = date_array[2] - for sub_sh3 in sh3_page.get("subs", []): - s_ = sub_sh3["activityEventType"].lower() - sh3_schedule_page_dict[s_ + "transactionId"] = sub_sh3[ - "transactionId" - ] - sh3_schedule_page_dict[s_ + "transferredAmount"] = "{0:.2f}".format( - float(sub_sh3["transferredAmount"]) - ) + for event_type, value_list in hash_check_value.items(): + current_index = event_type_dict[event_type]["current_index"] + + if current_index < len(value_list): + sh3_schedule_page_dict[ + event_type.lower() + "transactionId" + ] = value_list[current_index]["transactionId"] + sh3_schedule_page_dict[ + event_type.lower() + "transferredAmount" + ] = "{0:.2f}".format( + float(value_list[current_index]["transferredAmount"]) + ) + current_page_total_amount += float( + sh3_schedule_page_dict[ + event_type.lower() + "transferredAmount" + ] + ) + + event_type_dict[event_type]["current_index"] += 1 + + if event_type in ["DC", "DF"]: + sub_transferred_amount = float( + sh3_schedule_page_dict[ + event_type.lower() + "transferredAmount" + ] + ) + event_type_dict[event_type]["amount"] += float( + sh3_schedule_page_dict[ + event_type.lower() + "transferredAmount" + ] + ) + + sh3_schedule_page_dict[ + event_type.lower() + "activityEventName" + ] = value_list[current_index]["activityEventName"] + + if current_index + 1 < len(value_list): + current_index += 1 + sh3_schedule_page_dict[ + event_type.lower() + "transactionId_1" + ] = value_list[current_index]["transactionId"] + sh3_schedule_page_dict[ + event_type.lower() + "transferredAmount_1" + ] = "{0:.2f}".format( + float( + value_list[current_index]["transferredAmount"] + ) + ) + sh3_schedule_page_dict[ + event_type.lower() + "activityEventName_1" + ] = value_list[current_index]["activityEventName"] + + sub_transferred_amount += float( + sh3_schedule_page_dict[ + event_type.lower() + "transferredAmount_1" + ] + ) + current_page_total_amount += float( + sh3_schedule_page_dict[ + event_type.lower() + "transferredAmount_1" + ] + ) + + event_type_dict[event_type]["current_index"] += 1 + event_type_dict[event_type]["amount"] += float( + sh3_schedule_page_dict[ + event_type.lower() + "transferredAmount_1" + ] + ) + + sh3_schedule_page_dict[ + event_type.lower() + "subtransferredAmount" + ] = "{0:.2f}".format(float(sub_transferred_amount)) + else: + event_type_dict[event_type]["amount"] += float( + sh3_schedule_page_dict[ + event_type.lower() + "transferredAmount" + ] + ) - df_inc = "" - - for sub_sh3 in sh3_page.get("dfsubs", []): - s_ = sub_sh3["activityEventType"].lower() - sh3_schedule_page_dict[s_ + "transactionId" + df_inc] = sub_sh3[ - "transactionId" - ] - sh3_schedule_page_dict[ - s_ + "transferredAmount" + df_inc - ] = "{0:.2f}".format(float(sub_sh3["transferredAmount"])) - sh3_schedule_page_dict[s_ + "activityEventName" + df_inc] = sub_sh3[ - "activityEventName" - ] - sh3_schedule_page_dict[ - s_ + "subtransferredAmount" - ] = "{0:.2f}".format(float(sh3_page.get(s_ + "total", ""))) - df_inc = "_1" - - dc_inc = "" - - for sub_sh3 in sh3_page.get("dcsubs", []): - s_ = sub_sh3["activityEventType"].lower() - sh3_schedule_page_dict[s_ + "transactionId" + dc_inc] = sub_sh3[ - "transactionId" - ] - sh3_schedule_page_dict[ - s_ + "transferredAmount" + dc_inc - ] = "{0:.2f}".format(float(sub_sh3["transferredAmount"])) - sh3_schedule_page_dict[s_ + "activityEventName" + dc_inc] = sub_sh3[ - "activityEventName" - ] - sh3_schedule_page_dict[ - s_ + "subtransferredAmount" - ] = "{0:.2f}".format(float(sh3_page.get(s_ + "total", ""))) - dc_inc = "_1" - - sh3_schedule_page_dict["committeeName"] = f3x_data["committeeName"] - if last_page: - total_dict[acc_name]["lastpage"] = 0 + sh3_schedule_page_dict["totalAmountTransferred"] = "{0:.2f}".format( + float(current_page_total_amount) + ) + total_amount += current_page_total_amount + + # condition for last page + if not hash_check_total_pages: sh3_schedule_page_dict["totalAmountPeriod"] = "{0:.2f}".format( - float(sum(total_dict[acc_name].values())) + float(total_amount) ) - for total_key in total_dict[acc_name]: + for key, value in event_type_dict.items(): sh3_schedule_page_dict[ - total_key.lower() + "total" - ] = "{0:.2f}".format(float(total_dict[acc_name][total_key])) + key.lower() + "total" + ] = "{0:.2f}".format(float(value["amount"])) sh3_outfile = ( md5_directory + "SH3/" + line_number + "/page_" - + str(sh3_page_no) + + str(current_page_num - 1) + ".pdf" ) pypdftk.fill_form(sh3_infile, sh3_schedule_page_dict, sh3_outfile) + pypdftk.concat( directory_files(md5_directory + "SH3/" + line_number + "/"), md5_directory + "SH3/" + line_number + "/all_pages.pdf", ) + if path.isfile(md5_directory + "SH3/all_pages.pdf"): pypdftk.concat( [ diff --git a/routes/src/utils.py b/routes/src/utils.py index e6a91e4..21bfab0 100644 --- a/routes/src/utils.py +++ b/routes/src/utils.py @@ -1,5 +1,12 @@ import hashlib import os +import flask + + +from shutil import rmtree +from flask_api import status +from routes.src import common + def md5_for_file(f, block_size=4096): md5 = hashlib.md5() @@ -11,6 +18,11 @@ def md5_for_file(f, block_size=4096): return md5.hexdigest() +def md5_for_text(text): + result = hashlib.md5(text.encode()) + return result.hexdigest() + + # return the list of files in a directory def directory_files(directory): files_list = [] @@ -26,3 +38,15 @@ def merge(dict1, dict2): return res +# Error handling +def error(msg): + if flask.request.method == "POST": + envelope = common.get_return_envelope("false", msg) + status_code = status.HTTP_400_BAD_REQUEST + return flask.jsonify(**envelope), status_code + + +# delete directory if it exists +def delete_directory(dir_name): + if os.path.isdir(dir_name) and os.listdir(dir_name): + rmtree(dir_name) \ No newline at end of file diff --git a/templates/forms/SD.pdf b/templates/forms/SD.pdf index 8d319e2..9437247 100644 Binary files a/templates/forms/SD.pdf and b/templates/forms/SD.pdf differ diff --git a/templates/forms/SH3.pdf b/templates/forms/SH3.pdf index 1767f0a..ff3678d 100644 Binary files a/templates/forms/SH3.pdf and b/templates/forms/SH3.pdf differ