From 0dfe8aac2d2fca548bb26ed36f2dff807a80d8b0 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Thu, 22 Aug 2024 20:01:46 -0400 Subject: [PATCH] feat(subscription): Removes logic to handle document as bytes objects This commit removes the redundant logic that handled tasks involving documents stored as bytes objects. --- bc/subscription/tasks.py | 58 ++++++++++---------------- bc/subscription/tests/test_tasks.py | 63 +++++++++-------------------- 2 files changed, 42 insertions(+), 79 deletions(-) diff --git a/bc/subscription/tasks.py b/bc/subscription/tasks.py index a0a3956c..6bd42559 100644 --- a/bc/subscription/tasks.py +++ b/bc/subscription/tasks.py @@ -39,7 +39,7 @@ def enqueue_posts_for_new_case( subscription: Subscription, - document: bytes | str | None = None, + document_url: str | None = None, check_sponsor_message: bool = False, initial_document: DocumentDict | None = None, ) -> None: @@ -49,21 +49,15 @@ def enqueue_posts_for_new_case( Args: subscription (Subscription): the new subscription object. - document (bytes | str | None): The document, either as raw bytes or as - a URL path to download the file. + document_url (str | None): URL path to download the docuement. check_sponsor_message (bool, optional): designates whether this method should check the sponsorships field and compute the sponsor_message for each channel. Defaults to False. """ files = None - # TODO: Refactor to remove isinstance check in the if statements - # This code was added to handle potential data already in the queue - # during deployment. - if isinstance(document, str): - document = download_pdf_from_cl(document) - - if document is not None and isinstance(document, bytes): + if document_url: + document = download_pdf_from_cl(document_url) files = get_thumbnails_from_range(document, "[1,2,3,4]") docket: DocketDict | None = None @@ -139,7 +133,7 @@ def enqueue_posts_for_new_case( def enqueue_posts_for_docket_alert( webhook_event: FilingWebhookEvent, - document: bytes | str | None = None, + document_url: str | None = None, check_sponsor_message: bool = False, ) -> None: """ @@ -148,12 +142,10 @@ def enqueue_posts_for_docket_alert( Args: webhook_event (FilingWebhookEvent): The FilingWebhookEvent record. - document (bytes | None, optional): document as bytes or the URL to - download the file. + document_url (str | None): URL to download the document. check_sponsor_message (bool, optional): designates whether this method - should check. - the sponsorships field and compute the sponsor_message for each channel. - Defaults to False. + should check. the sponsorships field and compute the sponsor_message + for each channel. Defaults to False. """ if not webhook_event.subscription: return @@ -171,7 +163,7 @@ def enqueue_posts_for_docket_alert( make_post_for_webhook_event, channel.pk, webhook_event.pk, - document, + document_url, sponsor_message, retry=Retry( max=settings.RQ_MAX_NUMBER_OF_RETRIES, @@ -244,10 +236,10 @@ def check_webhook_before_posting(fwe_pk: int): return filing_webhook_event # check if the document is available or there's a sponsorship to purchase it. - document = None + document_url = None cl_document = lookup_document_by_doc_id(filing_webhook_event.doc_id) if cl_document["filepath_local"]: - document = cl_document["filepath_local"] + document_url = cl_document["filepath_local"] else: sponsorship = check_active_sponsorships( filing_webhook_event.subscription.pk @@ -267,7 +259,7 @@ def check_webhook_before_posting(fwe_pk: int): return filing_webhook_event # Got the document or no sponsorship. Tweet and toot. - enqueue_posts_for_docket_alert(filing_webhook_event, document) + enqueue_posts_for_docket_alert(filing_webhook_event, document_url) return filing_webhook_event @@ -289,10 +281,10 @@ def check_initial_complaint_before_posting( """ subscription = Subscription.objects.get(pk=subscription_pk) - document = None + document_url = None cl_document = lookup_initial_complaint(subscription.cl_docket_id) if cl_document and cl_document["filepath_local"]: - document = download_pdf_from_cl(cl_document["filepath_local"]) + document_url = cl_document["filepath_local"] elif cl_document and cl_document["pacer_doc_id"]: sponsorship = check_active_sponsorships(subscription.pk) if sponsorship: @@ -303,7 +295,7 @@ def check_initial_complaint_before_posting( # Got the document or no sponsorship. Tweet and toot. enqueue_posts_for_new_case( - subscription, document, initial_document=cl_document + subscription, document_url, initial_document=cl_document ) return subscription @@ -352,7 +344,7 @@ def process_fetch_webhook_event( "The RECAP document doesn't have a path to download the file" ) - pdf_data = cl_document["filepath_local"] + pdf_path = cl_document["filepath_local"] sponsor_groups = get_sponsored_groups_per_subscription(subscription.pk) @@ -371,9 +363,9 @@ def process_fetch_webhook_event( log_purchase(sponsor_groups, subscription.pk, document) if record_type == "filing_webhook": - enqueue_posts_for_docket_alert(filing_webhook_event, pdf_data, True) + enqueue_posts_for_docket_alert(filing_webhook_event, pdf_path, True) else: - enqueue_posts_for_new_case(subscription, pdf_data, True, cl_document) + enqueue_posts_for_new_case(subscription, pdf_path, True, cl_document) return record_pk @@ -382,7 +374,7 @@ def process_fetch_webhook_event( def make_post_for_webhook_event( channel_pk: int, fwe_pk: int, - document: bytes | str | None, + document_url: str | None, sponsor_text: str | None = None, ) -> Post: """Post a new status in the given channel using the data of the given webhook @@ -391,8 +383,7 @@ def make_post_for_webhook_event( Args: channel_pk (int): The pk of the channel where the post will be created. fwe_pk (int): The PK of the FilingWebhookEvent record. - document (bytes | str | None): The document, either as raw bytes or as - a URL path to download the file. + document_url (str | None): URL path to download the document. sponsor_text (str | None): sponsor message to include in the thumbnails. Returns: @@ -424,13 +415,8 @@ def make_post_for_webhook_event( ) files = None - # TODO: Refactor to remove isinstance check in the if statements - # This code was added to handle potential data already in the queue - # during deployment. - if isinstance(document, str): - document = download_pdf_from_cl(document) - - if document is not None and isinstance(document, bytes): + if document_url: + document = download_pdf_from_cl(document_url) thumbnail_range = "[1,2,3]" if image else "[1,2,3,4]" files = get_thumbnails_from_range(document, thumbnail_range) diff --git a/bc/subscription/tests/test_tasks.py b/bc/subscription/tests/test_tasks.py index f85ebc25..f0d82a80 100644 --- a/bc/subscription/tests/test_tasks.py +++ b/bc/subscription/tests/test_tasks.py @@ -177,6 +177,7 @@ def test_can_create_post_after_purchase(self, mock_lookup, mock_enqueue): @patch("bc.subscription.tasks.add_sponsored_text_to_thumbnails") @patch("bc.subscription.tasks.get_thumbnails_from_range") @patch.object(Channel, "get_api_wrapper") +@patch("bc.subscription.tasks.download_pdf_from_cl") class MakePostForWebhookEventTest(TestCase): webhook_event = None webhook_minute_entry = None @@ -212,7 +213,7 @@ def mock_api_wrapper(self, status_id): return wrapper def test_can_create_full_post_no_document( - self, mock_api, mock_thumbnails, mock_add_sponsor_text + self, mock_download, mock_api, mock_thumbnails, mock_add_sponsor_text ): mock_api.return_value = self.mock_api_wrapper(self.status_id) @@ -223,11 +224,12 @@ def test_can_create_full_post_no_document( self.assertEqual(post.object_id, self.status_id) self.assertIn("New filing", post.text) + mock_download.assert_not_called() mock_add_sponsor_text.assert_not_called() mock_thumbnails.assert_not_called() def test_can_create_minute_entry_no_document( - self, mock_api, mock_thumbnails, mock_add_sponsor_text + self, mock_download, mock_api, mock_thumbnails, mock_add_sponsor_text ): mock_api.return_value = self.mock_api_wrapper(self.status_id) @@ -239,10 +241,10 @@ def test_can_create_minute_entry_no_document( self.assertEqual(post.object_id, self.status_id) self.assertIn("New minute entry", post.text) + mock_download.assert_not_called() mock_add_sponsor_text.assert_not_called() mock_thumbnails.assert_not_called() - @patch("bc.subscription.tasks.download_pdf_from_cl") def test_get_three_thumbnails_from_documents( self, mock_download, mock_api, mock_thumbnails, mock_add_sponsor_text ): @@ -252,15 +254,6 @@ def test_get_three_thumbnails_from_documents( self.webhook_event.short_description = "" self.webhook_event.save(update_fields=["short_description"]) - # This test case handles scenarios where the document is provided - # directly as a binary object. - make_post_for_webhook_event( - self.channel.pk, self.webhook_event.pk, self.bin_object - ) - - mock_thumbnails.assert_called_with(self.bin_object, "[1,2,3]") - mock_add_sponsor_text.assert_not_called() - # This test case verifies that `make_post_for_webhook_event` can handle # document URLs as input, in contrast to the previous test that used # URLs. @@ -274,31 +267,34 @@ def test_get_three_thumbnails_from_documents( mock_add_sponsor_text.assert_not_called() def test_get_four_thumbnails_from_document( - self, mock_api, mock_thumbnails, mock_add_sponsor_text + self, mock_download, mock_api, mock_thumbnails, mock_add_sponsor_text ): mock_api.return_value = self.mock_api_wrapper(self.status_id) + mock_download.return_value = self.bin_object make_post_for_webhook_event( - self.channel.pk, self.webhook_event.pk, self.bin_object + self.channel.pk, self.webhook_event.pk, self.fake_document_path ) - + mock_download.assert_called_once_with(self.fake_document_path) mock_thumbnails.assert_called_with(self.bin_object, "[1,2,3,4]") mock_add_sponsor_text.assert_not_called() def test_add_sponsor_text_to_thumbails( - self, mock_api, mock_thumbnails, mock_add_sponsor_text + self, mock_download, mock_api, mock_thumbnails, mock_add_sponsor_text ): sponsor_text = "This document contributed by Free Law Project" mock_api.return_value = self.mock_api_wrapper(self.status_id) mock_thumbnails.return_value = [self.bin_object for _ in range(4)] + mock_download.return_value = self.bin_object make_post_for_webhook_event( self.channel.pk, self.webhook_event.pk, - self.bin_object, + self.fake_document_path, sponsor_text, ) + mock_download.assert_called_once_with(self.fake_document_path) mock_thumbnails.assert_called_with(self.bin_object, "[1,2,3,4]") mock_add_sponsor_text.assert_called_with( mock_thumbnails(), sponsor_text @@ -360,10 +356,8 @@ def test_can_create_new_post_wo_document( ) @patch("bc.subscription.tasks.enqueue_posts_for_new_case") - @patch("bc.subscription.tasks.download_pdf_from_cl") def test_can_download_initial_complaint( self, - mock_download, mock_enqueue, mock_api, mock_queue, @@ -377,15 +371,12 @@ def test_can_download_initial_complaint( "filepath_local": local_filepath, "pacer_doc_id": "051023651280", } - mock_download.return_value = faker.binary(3) check_initial_complaint_before_posting(self.subscription.pk) - mock_lookup.assert_called_once_with(self.subscription.cl_docket_id) - mock_download.assert_called_with(local_filepath) mock_enqueue.assert_called_once_with( self.subscription, - mock_download(), + local_filepath, initial_document=mock_lookup.return_value, ) @@ -500,24 +491,6 @@ def test_can_post_new_case_w_thumbnails( article_url=self.subscription_w_link.article_url, ) - # This test case ensures backward compatibility with legacy tasks using - # the old bytes document format, which is scheduled for deprecation. It - # verifies that the system can still process tasks enqueued before the - # format change. - enqueue_posts_for_new_case(self.subscription_w_link, document) - - # Since we passed the PDF as a bytes object, we don't expect the - # download function to be called. - mock_download_pdf.assert_not_called() - mock_thumbnails.assert_called_once_with(document, "[1,2,3,4]") - mock_queue.enqueue.assert_called_once_with( - api_wrapper.add_status, - message, - None, - [thumb_1, thumb_2], - retry=mock_retry(), - ) - # This test case verifies the handling of document URLs. # We provide a fake URL to the enqueue function and expect: # - The download PDF function to be called with the URL. @@ -538,8 +511,10 @@ def test_can_post_new_case_w_thumbnails( @patch("bc.subscription.tasks.add_sponsored_text_to_thumbnails") @patch("bc.subscription.tasks.get_thumbnails_from_range") + @patch("bc.subscription.tasks.download_pdf_from_cl") def test_can_create_post_w_sponsored_thumbnails( self, + mock_download_pdf, mock_thumbnails, mock_sponsored, mock_api, @@ -559,7 +534,9 @@ def test_can_create_post_w_sponsored_thumbnails( mock_lookup.return_value = None mock_docket_by_cl_id.return_value = None + fake_path = faker.url() document = faker.binary(2) + mock_download_pdf.return_value = document thumb_1 = faker.binary(4) thumb_2 = faker.binary(6) @@ -583,7 +560,7 @@ def test_can_create_post_w_sponsored_thumbnails( article_url=self.subscription_w_link.article_url, ) - enqueue_posts_for_new_case(self.subscription_w_link, document, True) + enqueue_posts_for_new_case(self.subscription_w_link, fake_path, True) expected_enqueue_calls = [ call( @@ -601,7 +578,7 @@ def test_can_create_post_w_sponsored_thumbnails( retry=mock_retry(), ), ] - + mock_download_pdf.assert_called_once_with(fake_path) mock_thumbnails.assert_called_once_with(document, "[1,2,3,4]") mock_sponsored.assert_called_with( [thumb_1, thumb_2], sponsorship.watermark_message