Skip to content

Commit

Permalink
Merge pull request #580 from freelawproject/feat-remove-logic-to-hand…
Browse files Browse the repository at this point in the history
…le-document-as-bytes
  • Loading branch information
mlissner authored Aug 23, 2024
2 parents 387153f + 0dfe8aa commit 8925bb1
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 79 deletions.
58 changes: 22 additions & 36 deletions bc/subscription/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

def enqueue_posts_for_new_case(
subscription: Subscription,
document: bytes | str | None = None,
document_url: str | None = None,
check_sponsor_message: bool = False,
initial_document: DocumentDict | None = None,
) -> None:
Expand All @@ -49,21 +49,15 @@ def enqueue_posts_for_new_case(
Args:
subscription (Subscription): the new subscription object.
document (bytes | str | None): The document, either as raw bytes or as
a URL path to download the file.
document_url (str | None): URL path to download the docuement.
check_sponsor_message (bool, optional): designates whether this method
should check the sponsorships field and compute the sponsor_message
for each channel. Defaults to False.
"""

files = None
# TODO: Refactor to remove isinstance check in the if statements
# This code was added to handle potential data already in the queue
# during deployment.
if isinstance(document, str):
document = download_pdf_from_cl(document)

if document is not None and isinstance(document, bytes):
if document_url:
document = download_pdf_from_cl(document_url)
files = get_thumbnails_from_range(document, "[1,2,3,4]")

docket: DocketDict | None = None
Expand Down Expand Up @@ -139,7 +133,7 @@ def enqueue_posts_for_new_case(

def enqueue_posts_for_docket_alert(
webhook_event: FilingWebhookEvent,
document: bytes | str | None = None,
document_url: str | None = None,
check_sponsor_message: bool = False,
) -> None:
"""
Expand All @@ -148,12 +142,10 @@ def enqueue_posts_for_docket_alert(
Args:
webhook_event (FilingWebhookEvent): The FilingWebhookEvent record.
document (bytes | None, optional): document as bytes or the URL to
download the file.
document_url (str | None): URL to download the document.
check_sponsor_message (bool, optional): designates whether this method
should check.
the sponsorships field and compute the sponsor_message for each channel.
Defaults to False.
should check. the sponsorships field and compute the sponsor_message
for each channel. Defaults to False.
"""
if not webhook_event.subscription:
return
Expand All @@ -171,7 +163,7 @@ def enqueue_posts_for_docket_alert(
make_post_for_webhook_event,
channel.pk,
webhook_event.pk,
document,
document_url,
sponsor_message,
retry=Retry(
max=settings.RQ_MAX_NUMBER_OF_RETRIES,
Expand Down Expand Up @@ -244,10 +236,10 @@ def check_webhook_before_posting(fwe_pk: int):
return filing_webhook_event

# check if the document is available or there's a sponsorship to purchase it.
document = None
document_url = None
cl_document = lookup_document_by_doc_id(filing_webhook_event.doc_id)
if cl_document["filepath_local"]:
document = cl_document["filepath_local"]
document_url = cl_document["filepath_local"]
else:
sponsorship = check_active_sponsorships(
filing_webhook_event.subscription.pk
Expand All @@ -267,7 +259,7 @@ def check_webhook_before_posting(fwe_pk: int):
return filing_webhook_event

# Got the document or no sponsorship. Tweet and toot.
enqueue_posts_for_docket_alert(filing_webhook_event, document)
enqueue_posts_for_docket_alert(filing_webhook_event, document_url)

return filing_webhook_event

Expand All @@ -289,10 +281,10 @@ def check_initial_complaint_before_posting(
"""
subscription = Subscription.objects.get(pk=subscription_pk)

document = None
document_url = None
cl_document = lookup_initial_complaint(subscription.cl_docket_id)
if cl_document and cl_document["filepath_local"]:
document = download_pdf_from_cl(cl_document["filepath_local"])
document_url = cl_document["filepath_local"]
elif cl_document and cl_document["pacer_doc_id"]:
sponsorship = check_active_sponsorships(subscription.pk)
if sponsorship:
Expand All @@ -303,7 +295,7 @@ def check_initial_complaint_before_posting(

# Got the document or no sponsorship. Tweet and toot.
enqueue_posts_for_new_case(
subscription, document, initial_document=cl_document
subscription, document_url, initial_document=cl_document
)

return subscription
Expand Down Expand Up @@ -352,7 +344,7 @@ def process_fetch_webhook_event(
"The RECAP document doesn't have a path to download the file"
)

pdf_data = cl_document["filepath_local"]
pdf_path = cl_document["filepath_local"]

sponsor_groups = get_sponsored_groups_per_subscription(subscription.pk)

Expand All @@ -371,9 +363,9 @@ def process_fetch_webhook_event(
log_purchase(sponsor_groups, subscription.pk, document)

if record_type == "filing_webhook":
enqueue_posts_for_docket_alert(filing_webhook_event, pdf_data, True)
enqueue_posts_for_docket_alert(filing_webhook_event, pdf_path, True)
else:
enqueue_posts_for_new_case(subscription, pdf_data, True, cl_document)
enqueue_posts_for_new_case(subscription, pdf_path, True, cl_document)

return record_pk

Expand All @@ -382,7 +374,7 @@ def process_fetch_webhook_event(
def make_post_for_webhook_event(
channel_pk: int,
fwe_pk: int,
document: bytes | str | None,
document_url: str | None,
sponsor_text: str | None = None,
) -> Post:
"""Post a new status in the given channel using the data of the given webhook
Expand All @@ -391,8 +383,7 @@ def make_post_for_webhook_event(
Args:
channel_pk (int): The pk of the channel where the post will be created.
fwe_pk (int): The PK of the FilingWebhookEvent record.
document (bytes | str | None): The document, either as raw bytes or as
a URL path to download the file.
document_url (str | None): URL path to download the document.
sponsor_text (str | None): sponsor message to include in the thumbnails.
Returns:
Expand Down Expand Up @@ -424,13 +415,8 @@ def make_post_for_webhook_event(
)

files = None
# TODO: Refactor to remove isinstance check in the if statements
# This code was added to handle potential data already in the queue
# during deployment.
if isinstance(document, str):
document = download_pdf_from_cl(document)

if document is not None and isinstance(document, bytes):
if document_url:
document = download_pdf_from_cl(document_url)
thumbnail_range = "[1,2,3]" if image else "[1,2,3,4]"
files = get_thumbnails_from_range(document, thumbnail_range)

Expand Down
63 changes: 20 additions & 43 deletions bc/subscription/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def test_can_create_post_after_purchase(self, mock_lookup, mock_enqueue):
@patch("bc.subscription.tasks.add_sponsored_text_to_thumbnails")
@patch("bc.subscription.tasks.get_thumbnails_from_range")
@patch.object(Channel, "get_api_wrapper")
@patch("bc.subscription.tasks.download_pdf_from_cl")
class MakePostForWebhookEventTest(TestCase):
webhook_event = None
webhook_minute_entry = None
Expand Down Expand Up @@ -212,7 +213,7 @@ def mock_api_wrapper(self, status_id):
return wrapper

def test_can_create_full_post_no_document(
self, mock_api, mock_thumbnails, mock_add_sponsor_text
self, mock_download, mock_api, mock_thumbnails, mock_add_sponsor_text
):
mock_api.return_value = self.mock_api_wrapper(self.status_id)

Expand All @@ -223,11 +224,12 @@ def test_can_create_full_post_no_document(

self.assertEqual(post.object_id, self.status_id)
self.assertIn("New filing", post.text)
mock_download.assert_not_called()
mock_add_sponsor_text.assert_not_called()
mock_thumbnails.assert_not_called()

def test_can_create_minute_entry_no_document(
self, mock_api, mock_thumbnails, mock_add_sponsor_text
self, mock_download, mock_api, mock_thumbnails, mock_add_sponsor_text
):
mock_api.return_value = self.mock_api_wrapper(self.status_id)

Expand All @@ -239,10 +241,10 @@ def test_can_create_minute_entry_no_document(
self.assertEqual(post.object_id, self.status_id)
self.assertIn("New minute entry", post.text)

mock_download.assert_not_called()
mock_add_sponsor_text.assert_not_called()
mock_thumbnails.assert_not_called()

@patch("bc.subscription.tasks.download_pdf_from_cl")
def test_get_three_thumbnails_from_documents(
self, mock_download, mock_api, mock_thumbnails, mock_add_sponsor_text
):
Expand All @@ -252,15 +254,6 @@ def test_get_three_thumbnails_from_documents(
self.webhook_event.short_description = ""
self.webhook_event.save(update_fields=["short_description"])

# This test case handles scenarios where the document is provided
# directly as a binary object.
make_post_for_webhook_event(
self.channel.pk, self.webhook_event.pk, self.bin_object
)

mock_thumbnails.assert_called_with(self.bin_object, "[1,2,3]")
mock_add_sponsor_text.assert_not_called()

# This test case verifies that `make_post_for_webhook_event` can handle
# document URLs as input, in contrast to the previous test that used
# URLs.
Expand All @@ -274,31 +267,34 @@ def test_get_three_thumbnails_from_documents(
mock_add_sponsor_text.assert_not_called()

def test_get_four_thumbnails_from_document(
self, mock_api, mock_thumbnails, mock_add_sponsor_text
self, mock_download, mock_api, mock_thumbnails, mock_add_sponsor_text
):
mock_api.return_value = self.mock_api_wrapper(self.status_id)
mock_download.return_value = self.bin_object

make_post_for_webhook_event(
self.channel.pk, self.webhook_event.pk, self.bin_object
self.channel.pk, self.webhook_event.pk, self.fake_document_path
)

mock_download.assert_called_once_with(self.fake_document_path)
mock_thumbnails.assert_called_with(self.bin_object, "[1,2,3,4]")
mock_add_sponsor_text.assert_not_called()

def test_add_sponsor_text_to_thumbails(
self, mock_api, mock_thumbnails, mock_add_sponsor_text
self, mock_download, mock_api, mock_thumbnails, mock_add_sponsor_text
):
sponsor_text = "This document contributed by Free Law Project"
mock_api.return_value = self.mock_api_wrapper(self.status_id)
mock_thumbnails.return_value = [self.bin_object for _ in range(4)]
mock_download.return_value = self.bin_object

make_post_for_webhook_event(
self.channel.pk,
self.webhook_event.pk,
self.bin_object,
self.fake_document_path,
sponsor_text,
)

mock_download.assert_called_once_with(self.fake_document_path)
mock_thumbnails.assert_called_with(self.bin_object, "[1,2,3,4]")
mock_add_sponsor_text.assert_called_with(
mock_thumbnails(), sponsor_text
Expand Down Expand Up @@ -360,10 +356,8 @@ def test_can_create_new_post_wo_document(
)

@patch("bc.subscription.tasks.enqueue_posts_for_new_case")
@patch("bc.subscription.tasks.download_pdf_from_cl")
def test_can_download_initial_complaint(
self,
mock_download,
mock_enqueue,
mock_api,
mock_queue,
Expand All @@ -377,15 +371,12 @@ def test_can_download_initial_complaint(
"filepath_local": local_filepath,
"pacer_doc_id": "051023651280",
}
mock_download.return_value = faker.binary(3)

check_initial_complaint_before_posting(self.subscription.pk)

mock_lookup.assert_called_once_with(self.subscription.cl_docket_id)
mock_download.assert_called_with(local_filepath)
mock_enqueue.assert_called_once_with(
self.subscription,
mock_download(),
local_filepath,
initial_document=mock_lookup.return_value,
)

Expand Down Expand Up @@ -500,24 +491,6 @@ def test_can_post_new_case_w_thumbnails(
article_url=self.subscription_w_link.article_url,
)

# This test case ensures backward compatibility with legacy tasks using
# the old bytes document format, which is scheduled for deprecation. It
# verifies that the system can still process tasks enqueued before the
# format change.
enqueue_posts_for_new_case(self.subscription_w_link, document)

# Since we passed the PDF as a bytes object, we don't expect the
# download function to be called.
mock_download_pdf.assert_not_called()
mock_thumbnails.assert_called_once_with(document, "[1,2,3,4]")
mock_queue.enqueue.assert_called_once_with(
api_wrapper.add_status,
message,
None,
[thumb_1, thumb_2],
retry=mock_retry(),
)

# This test case verifies the handling of document URLs.
# We provide a fake URL to the enqueue function and expect:
# - The download PDF function to be called with the URL.
Expand All @@ -538,8 +511,10 @@ def test_can_post_new_case_w_thumbnails(

@patch("bc.subscription.tasks.add_sponsored_text_to_thumbnails")
@patch("bc.subscription.tasks.get_thumbnails_from_range")
@patch("bc.subscription.tasks.download_pdf_from_cl")
def test_can_create_post_w_sponsored_thumbnails(
self,
mock_download_pdf,
mock_thumbnails,
mock_sponsored,
mock_api,
Expand All @@ -559,7 +534,9 @@ def test_can_create_post_w_sponsored_thumbnails(
mock_lookup.return_value = None
mock_docket_by_cl_id.return_value = None

fake_path = faker.url()
document = faker.binary(2)
mock_download_pdf.return_value = document

thumb_1 = faker.binary(4)
thumb_2 = faker.binary(6)
Expand All @@ -583,7 +560,7 @@ def test_can_create_post_w_sponsored_thumbnails(
article_url=self.subscription_w_link.article_url,
)

enqueue_posts_for_new_case(self.subscription_w_link, document, True)
enqueue_posts_for_new_case(self.subscription_w_link, fake_path, True)

expected_enqueue_calls = [
call(
Expand All @@ -601,7 +578,7 @@ def test_can_create_post_w_sponsored_thumbnails(
retry=mock_retry(),
),
]

mock_download_pdf.assert_called_once_with(fake_path)
mock_thumbnails.assert_called_once_with(document, "[1,2,3,4]")
mock_sponsored.assert_called_with(
[thumb_1, thumb_2], sponsorship.watermark_message
Expand Down

0 comments on commit 8925bb1

Please sign in to comment.