From 27b28c5b0d5d5e46c6f8030803d950242e35d5ff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Sep 2023 21:23:37 +0000 Subject: [PATCH 01/92] build(deps): bump aws-actions/configure-aws-credentials from 1 to 4 Bumps [aws-actions/configure-aws-credentials](https://github.com/aws-actions/configure-aws-credentials) from 1 to 4. - [Release notes](https://github.com/aws-actions/configure-aws-credentials/releases) - [Changelog](https://github.com/aws-actions/configure-aws-credentials/blob/main/CHANGELOG.md) - [Commits](https://github.com/aws-actions/configure-aws-credentials/compare/v1...v4) --- updated-dependencies: - dependency-name: aws-actions/configure-aws-credentials dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/bulid-and-deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/bulid-and-deploy.yml b/.github/workflows/bulid-and-deploy.yml index 887994b3..997f60ea 100644 --- a/.github/workflows/bulid-and-deploy.yml +++ b/.github/workflows/bulid-and-deploy.yml @@ -31,7 +31,7 @@ jobs: id: vars run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} From a4bfa880ebcd5b513b29eaf087ff118d8bf620ef Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Oct 2023 06:43:17 +0000 Subject: [PATCH 02/92] build(deps): bump docker/login-action from 2 to 3 Bumps [docker/login-action](https://github.com/docker/login-action) from 2 to 3. - [Release notes](https://github.com/docker/login-action/releases) - [Commits](https://github.com/docker/login-action/compare/v2...v3) --- updated-dependencies: - dependency-name: docker/login-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/bulid-and-deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/bulid-and-deploy.yml b/.github/workflows/bulid-and-deploy.yml index 887994b3..c22060fa 100644 --- a/.github/workflows/bulid-and-deploy.yml +++ b/.github/workflows/bulid-and-deploy.yml @@ -14,7 +14,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Login to Docker Hub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} From 41b0a694c9755a02fcccabfc89be4523d6744022 Mon Sep 17 00:00:00 2001 From: mlissner Date: Tue, 10 Oct 2023 13:43:55 -0700 Subject: [PATCH 03/92] fix(docker): Include scripts in build --- .dockerignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.dockerignore b/.dockerignore index c71b92d2..d27061dd 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,6 +7,7 @@ !pyproject.toml !docker !manage.py +!scripts # But no matter what, ignore these things. bc/assets/static From 7ce49d3564f59d5d60f89a401eec91b0c9f21325 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Tue, 17 Oct 2023 18:58:46 -0400 Subject: [PATCH 04/92] feat(subscription): Add the FilingWebhookEvent model to the admin --- bc/subscription/admin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bc/subscription/admin.py b/bc/subscription/admin.py index 1b779be2..f3ca62b5 100644 --- a/bc/subscription/admin.py +++ b/bc/subscription/admin.py @@ -2,7 +2,7 @@ from bc.channel.models import Channel -from .models import Subscription +from .models import FilingWebhookEvent, Subscription class ChannelInline(admin.StackedInline): @@ -18,3 +18,4 @@ class SubscriptionAdmin(admin.ModelAdmin): admin.site.register(Subscription, SubscriptionAdmin) +admin.site.register(FilingWebhookEvent) From 7cbd351910ec7830dd4c212e554214dc9b582feb Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Tue, 17 Oct 2023 18:59:03 -0400 Subject: [PATCH 05/92] feat(subscription): Handle the MultipleObjectsReturned exception --- bc/subscription/api_views.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bc/subscription/api_views.py b/bc/subscription/api_views.py index 3d43f82a..abf92b08 100644 --- a/bc/subscription/api_views.py +++ b/bc/subscription/api_views.py @@ -116,6 +116,11 @@ def handle_recap_fetch_webhook(request: Request) -> Response: webhook_record = FilingWebhookEvent.objects.get( doc_id=data["payload"]["recap_document"] ) + except FilingWebhookEvent.MultipleObjectsReturned: + # we have received the same docket entry in different webhooks + webhook_record = FilingWebhookEvent.objects.filter( # type: ignore + doc_id=data["payload"]["recap_document"] + ).first() except FilingWebhookEvent.DoesNotExist: # if we dont have a filing webhook related to the document, It must be an initial complaint webhook_record = Subscription.objects.get( From 69cacb71d40724191b1017fd5dcd82a5cb657cd5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Oct 2023 23:50:47 +0000 Subject: [PATCH 06/92] build(deps): bump urllib3 from 1.26.14 to 1.26.18 Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.14 to 1.26.18. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.26.14...1.26.18) --- updated-dependencies: - dependency-name: urllib3 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- poetry.lock | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index a2666ce0..faa107db 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2018,17 +2018,17 @@ files = [ [[package]] name = "urllib3" -version = "1.26.14" +version = "1.26.18" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ - {file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"}, - {file = "urllib3-1.26.14.tar.gz", hash = "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72"}, + {file = "urllib3-1.26.18-py2.py3-none-any.whl", hash = "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07"}, + {file = "urllib3-1.26.18.tar.gz", hash = "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] @@ -2103,6 +2103,16 @@ files = [ {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"}, {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"}, {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"}, + {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"}, + {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"}, + {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"}, + {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"}, + {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"}, + {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"}, + {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"}, + {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"}, + {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"}, + {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"}, From d7614502368f4ecac8bf98add0722fb18e6329e0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 2 Nov 2023 21:48:03 +0000 Subject: [PATCH 07/92] build(deps): bump django from 4.2.3 to 4.2.7 Bumps [django](https://github.com/django/django) from 4.2.3 to 4.2.7. - [Commits](https://github.com/django/django/compare/4.2.3...4.2.7) --- updated-dependencies: - dependency-name: django dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- poetry.lock | 10 +++++----- pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/poetry.lock b/poetry.lock index faa107db..958fe22e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -542,13 +542,13 @@ files = [ [[package]] name = "django" -version = "4.2.3" +version = "4.2.7" description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." optional = false python-versions = ">=3.8" files = [ - {file = "Django-4.2.3-py3-none-any.whl", hash = "sha256:f7c7852a5ac5a3da5a8d5b35cc6168f31b605971441798dac845f17ca8028039"}, - {file = "Django-4.2.3.tar.gz", hash = "sha256:45a747e1c5b3d6df1b141b1481e193b033fd1fdbda3ff52677dc81afdaacbaed"}, + {file = "Django-4.2.7-py3-none-any.whl", hash = "sha256:e1d37c51ad26186de355cbcec16613ebdabfa9689bbade9c538835205a8abbe9"}, + {file = "Django-4.2.7.tar.gz", hash = "sha256:8e0f1c2c2786b5c0e39fe1afce24c926040fad47c8ea8ad30aaf1188df29fc41"}, ] [package.dependencies] @@ -1916,7 +1916,7 @@ files = [ [package.dependencies] requests = "*" -requests-oauthlib = "*" +requests_oauthlib = "*" [[package]] name = "types-pyopenssl" @@ -2163,4 +2163,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "ad68126814650489517a02a4dcfedca377808b0293051c12078b794be97d1e21" +content-hash = "4fb1be1b7d05b27c779c6e8e7642ae6e4cd9e9e1ec42ea785f405f505b5e4485" diff --git a/pyproject.toml b/pyproject.toml index 01f52856..2af57e60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ psycopg2-binary = "^2.9.6" ansicolors = "^1.1.8" prettytable = "^3.8.0" courts-db = "^0.10.17" -django = "^4.2.3" +django = "^4.2.7" djangorestframework = "^3.14.0" django-environ = "^0.10.0" sentry-sdk = "^1.29.2" From c134dfade5a0c8b139ce43bfd70e04bdbcc56404 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sat, 2 Dec 2023 13:02:38 -0400 Subject: [PATCH 08/92] feat(channels): Register Bluesky as a service in the Channel class --- ...er_channel_service_alter_post_object_id.py | 27 +++++++++++++++++++ bc/channel/models.py | 4 ++- 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 bc/channel/migrations/0008_alter_channel_service_alter_post_object_id.py diff --git a/bc/channel/migrations/0008_alter_channel_service_alter_post_object_id.py b/bc/channel/migrations/0008_alter_channel_service_alter_post_object_id.py new file mode 100644 index 00000000..e4b948a1 --- /dev/null +++ b/bc/channel/migrations/0008_alter_channel_service_alter_post_object_id.py @@ -0,0 +1,27 @@ +# Generated by Django 4.2.7 on 2023-12-03 16:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("channel", "0007_group_border_color"), + ] + + operations = [ + migrations.AlterField( + model_name="channel", + name="service", + field=models.PositiveSmallIntegerField( + choices=[(1, "Twitter"), (2, "Mastodon"), (3, "Bluesky")], + help_text="Type of the service", + ), + ), + migrations.AlterField( + model_name="post", + name="object_id", + field=models.CharField( + help_text="The object's id returned by Twitter/Mastodon/etc's API" + ), + ), + ] diff --git a/bc/channel/models.py b/bc/channel/models.py index e15c1adb..6cfc0b36 100644 --- a/bc/channel/models.py +++ b/bc/channel/models.py @@ -64,9 +64,11 @@ class Channel(AbstractDateTimeModel): TWITTER = 1 MASTODON = 2 + BLUESKY = 3 CHANNELS = ( (TWITTER, "Twitter"), (MASTODON, "Mastodon"), + (BLUESKY, "Bluesky"), ) service = models.PositiveSmallIntegerField( help_text="Type of the service", @@ -148,7 +150,7 @@ class Post(AbstractDateTimeModel): channel = models.ForeignKey( "Channel", related_name="posts", on_delete=models.CASCADE ) - object_id = models.PositiveBigIntegerField( + object_id = models.CharField( help_text="The object's id returned by Twitter/Mastodon/etc's API", ) text = models.TextField( From fed0bca4ce849922ff08af7df36e534872c22d77 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sun, 3 Dec 2023 09:21:35 -0400 Subject: [PATCH 09/92] feat(channel): Add a Bluesky client class to handle post creation --- .../utils/connectors/bluesky_api/__init__.py | 0 .../utils/connectors/bluesky_api/client.py | 178 ++++++++++++++++++ .../utils/connectors/bluesky_api/types.py | 50 +++++ 3 files changed, 228 insertions(+) create mode 100644 bc/channel/utils/connectors/bluesky_api/__init__.py create mode 100644 bc/channel/utils/connectors/bluesky_api/client.py create mode 100644 bc/channel/utils/connectors/bluesky_api/types.py diff --git a/bc/channel/utils/connectors/bluesky_api/__init__.py b/bc/channel/utils/connectors/bluesky_api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bc/channel/utils/connectors/bluesky_api/client.py b/bc/channel/utils/connectors/bluesky_api/client.py new file mode 100644 index 00000000..768a0e77 --- /dev/null +++ b/bc/channel/utils/connectors/bluesky_api/client.py @@ -0,0 +1,178 @@ +import re +from datetime import datetime, timezone + +import requests + +from .types import ImageBlob, RegexMatch, Session, TextAnnotation, Thumbnail + +_BASE_API_URL = "https://bsky.social/xrpc" +_DEFAULT_CONTENT_TYPE = "application/json" +DEFAULT_LANGUAGE_CODE1 = "en" + + +class BlueskyAPI: + def __init__( + self, identifier: str, password: str, timeout: int = 30 + ) -> None: + self._identifier = identifier + self._password = password + self._timeout = timeout + self._session = self._get_session() + + def _get_session(self) -> Session: + """ + Create an authentication session + + Returns: + Session: response with the "accessJwt", "refreshJwt", "handle" and "did" + """ + response = requests.post( + f"{_BASE_API_URL}/com.atproto.server.createSession", + headers={ + "Content-Type": _DEFAULT_CONTENT_TYPE, + }, + json={ + "identifier": self._identifier, + "password": self._password, + }, + timeout=self._timeout, + ) + return Session(**response.json()) + + def post_media(self, media: bytes, mime_type: str) -> ImageBlob: + """ + Upload bytes data (a "blob") using the given content type. + + Args: + media (bytes): The file to be attached. + mime_type (str): The MIME type of the content being uploaded. + + Returns: + ImageBlob: response with the size, $type, $ref of the file. + """ + # this size limit is specified in the app.bsky.embed.images lexicon + if len(media) > 1000000: + raise Exception( + f"image file size too large. 1000000 bytes maximum, got: {len(media)}" + ) + + resp = requests.post( + f"{_BASE_API_URL}/com.atproto.repo.uploadBlob", + headers={ + "Content-Type": mime_type, + "Authorization": f"Bearer {self._session.accessJwt}", + }, + data=media, + timeout=self._timeout, + ) + resp.raise_for_status() + blob = resp.json()["blob"] + return blob + + def get_current_time_iso(self) -> str: + """Get current time in Server Timezone (UTC) and ISO format.""" + return datetime.now(timezone.utc).isoformat() + + def _parse_urls(self, text: str) -> list[RegexMatch]: + """ + Parses a URL from text. + + This helper function takes a string as input and attempts to extract + URLs from it. If any URLs are found, they are appended to a list of + URLs. If no URLs are found, an empty list is returned. + + Args: + text (str): The text to parse. + + Returns: + list[RegexMatch]: List of matches. + """ + spans = [] + # partial/naive URL regex based on: https://stackoverflow.com/a/3809435 + # tweaked to disallow some training punctuation + url_regex = rb"[$|\W](https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*[-a-zA-Z0-9@%_\+~#//=])?)" + text_bytes = text.encode("UTF-8") + for m in re.finditer(url_regex, text_bytes): + spans.append( + RegexMatch( + start=m.start(1), + end=m.end(1), + text=m.group(1).decode("UTF-8"), + ) + ) + return spans + + def _parse_text_facets(self, text) -> list[TextAnnotation]: + """ + Parses text and extracts text annotations (e.g., links and mentions) + + This method takes a text string as input and identifies various facets, + such as named entities (mentions) and links. It returns a list of text + annotations, where each annotation represents a specific facet identified + in the text. Each annotation includes the starting and ending byte + positions of the facet within the text, along with its type. + + Args: + text (str): The text string to parse. + + Returns: + List[TextAnnotation]: A list of text annotations. Each annotation + includes the starting and ending byte positions of the facet within + the text, along with its type. + """ + facets = [] + for u in self._parse_urls(text): + facets.append( + { + "index": { + "byteStart": u.start, + "byteEnd": u.end, + }, + "features": [ + { + "$type": "app.bsky.richtext.facet#link", + "uri": u.text, + } + ], + } + ) + return facets + + def post_status(self, text: str, media: list[Thumbnail]) -> dict[str, str]: + """ + Creates a new status post on Bluesky using the provided text and thumbnails. + + Args: + text: The text content of the status post. + media: A list of dicts representing the thumbnails for the new post. + + Returns: + dict[str, str]: Response including the cid and the uri of the record. + """ + # Fetch the current time + now = self.get_current_time_iso() + message_object = { + "$type": "app.bsky.feed.post", + "text": text, + "facets": self._parse_text_facets(text), + "createdAt": now, + } + + if media: + message_object["embed"] = { + "$type": "app.bsky.embed.images", + "images": media, + } + + response = requests.post( + f"{_BASE_API_URL}/com.atproto.repo.createRecord", + headers={"Authorization": f"Bearer {self._session.accessJwt}"}, + json={ + "repo": self._session.did, + "collection": "app.bsky.feed.post", + "record": message_object, + }, + timeout=self._timeout, + ) + + return response.json() diff --git a/bc/channel/utils/connectors/bluesky_api/types.py b/bc/channel/utils/connectors/bluesky_api/types.py new file mode 100644 index 00000000..374bbf32 --- /dev/null +++ b/bc/channel/utils/connectors/bluesky_api/types.py @@ -0,0 +1,50 @@ +from dataclasses import dataclass +from typing import Dict, Literal, TypedDict + + +@dataclass +class Session: + accessJwt: str + did: str + handle: str + refreshJwt: str + didDoc: dict + email: str + emailConfirmed: bool + + +@dataclass +class RegexMatch: + start: int + end: int + text: str + + +class ByteSlice(TypedDict): + byteStart: int + byteEnd: int + + +ImageBlob = TypedDict( + "ImageBlob", + { + "$type": Literal["blob"], + "mimeType": str, + "size": int, + "ref": TypedDict("ref", {"$link": str}), + }, +) + +LinkFacet = TypedDict( + "LinkFacet", {"$type": Literal["app.bsky.richtext.facet#link"], "uri": str} +) + + +class TextAnnotation(TypedDict): + index: ByteSlice + features: list[LinkFacet] + + +class Thumbnail(TypedDict): + alt_text: str + image: ImageBlob From edab82ef7831e2d9195e6b4d1f4b8259b9125b27 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sun, 3 Dec 2023 09:22:55 -0400 Subject: [PATCH 10/92] feat(channel): Add the BlueskyConnector interface --- bc/channel/utils/connectors/bluesky.py | 51 ++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 bc/channel/utils/connectors/bluesky.py diff --git a/bc/channel/utils/connectors/bluesky.py b/bc/channel/utils/connectors/bluesky.py new file mode 100644 index 00000000..d8360c21 --- /dev/null +++ b/bc/channel/utils/connectors/bluesky.py @@ -0,0 +1,51 @@ +from bc.core.utils.images import TextImage + +from .alt_text_utils import text_image_alt_text, thumb_num_alt_text +from .base import ApiWrapper +from .bluesky_api.client import BlueskyAPI +from .bluesky_api.types import ImageBlob + + +class BlueskyConnector: + def __init__(self, identifier: str, password: str) -> None: + self.identifier = identifier + self.password = password + self.api = self.get_api_object() + + def get_api_object(self, _version=None) -> ApiWrapper: + return BlueskyAPI(self.identifier, self.password) + + def upload_media(self, media, alt_text) -> ImageBlob: + """Upload a new blob to be added to a post in a later request.""" + return self.api.post_media(media, mime_type="image/png") + + def add_status( + self, + message: str, + text_image: TextImage | None = None, + thumbnails: list[bytes] | None = None, + ) -> str: + """Send post with attached image.""" + media = [] + if text_image: + blob = self.upload_media(text_image.to_bytes(), None) + media.append( + { + "alt": text_image_alt_text(text_image.description), + "image": blob, + } + ) + + if thumbnails: + for idx, thumbnail in enumerate(thumbnails): + blob = self.upload_media(thumbnail, None) + media.append( + { + "alt": thumb_num_alt_text(idx), + "image": blob, + } + ) + + api_response = self.api.post_status(message, media) + + return api_response["cid"] From b0eac4774a519cc65e5d2bc0fadee25ffc5b3f11 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sun, 3 Dec 2023 09:24:20 -0400 Subject: [PATCH 11/92] feat(channel): Add the BlueskyConnector to the list of API wrappers --- bc/channel/models.py | 3 +++ bc/channel/utils/connectors/base.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/bc/channel/models.py b/bc/channel/models.py index 6cfc0b36..e5e7ff9f 100644 --- a/bc/channel/models.py +++ b/bc/channel/models.py @@ -7,6 +7,7 @@ from bc.users.models import User from .utils.connectors.base import BaseAPIConnector +from .utils.connectors.bluesky import BlueskyConnector from .utils.connectors.masto import ( MastodonConnector, get_server_url, @@ -117,6 +118,8 @@ def get_api_wrapper(self) -> BaseAPIConnector: return MastodonConnector( self.access_token, get_server_url(self.account) ) + case self.BLUESKY: + return BlueskyConnector(self.account_id, self.access_token) case _: raise NotImplementedError( f"No wrapper implemented for service: '{self.service}'." diff --git a/bc/channel/utils/connectors/base.py b/bc/channel/utils/connectors/base.py index 2a1116a7..5e4111db 100644 --- a/bc/channel/utils/connectors/base.py +++ b/bc/channel/utils/connectors/base.py @@ -5,7 +5,9 @@ from bc.core.utils.images import TextImage -ApiWrapper = Union[Mastodon, TwitterAPI] +from .bluesky_api.client import BlueskyAPI + +ApiWrapper = Union[Mastodon, TwitterAPI, BlueskyAPI] class BaseAPIConnector(Protocol): From de844ede9c71b401ac2a76848fe77dff90148d90 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sun, 3 Dec 2023 10:52:03 -0400 Subject: [PATCH 12/92] feat(core): Add a new exception for invalid templates --- bc/core/utils/status/base.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/bc/core/utils/status/base.py b/bc/core/utils/status/base.py index 19a5fc82..a411faa5 100644 --- a/bc/core/utils/status/base.py +++ b/bc/core/utils/status/base.py @@ -5,6 +5,10 @@ from ..images import TextImage +class InvalidTemplate(Exception): + pass + + class AlwaysBlankValueDict(dict): """Just return blank, regardless of the key""" @@ -18,6 +22,7 @@ class BaseTemplate: link_placeholders: list[str] max_characters: int border_color: tuple[int, ...] = (243, 195, 62) + is_valid: bool = True def __len__(self) -> int: """Returns the length of the template without the placeholders @@ -74,7 +79,11 @@ def format(self, *args, **kwargs) -> tuple[str, TextImage | None]: "…\n\n[full entry below 👇]", ) - return self.str_template.format(**kwargs), image + text = self.str_template.format(**kwargs) + + self.is_valid = len(text) <= self.max_characters + + return text, image @dataclass From 04b0b12566c2f5c9201cccef52aed85f996b313d Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sun, 3 Dec 2023 11:00:27 -0400 Subject: [PATCH 13/92] feat(subscription):Enhance validation logic for new case posts The enhanced logic verifies the validity of the content of new case posts, ensuring they adhere to character limits across all channels. --- bc/subscription/views.py | 47 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/bc/subscription/views.py b/bc/subscription/views.py index 024f43be..2fe8174e 100644 --- a/bc/subscription/views.py +++ b/bc/subscription/views.py @@ -1,6 +1,7 @@ from django.conf import settings from django.contrib.auth.mixins import LoginRequiredMixin from django.core.exceptions import ValidationError +from django.db import transaction from django.shortcuts import render from django.views import View from django_htmx.http import trigger_client_event @@ -10,7 +11,10 @@ from rest_framework.response import Response from rq import Retry +from bc.channel.models import Channel from bc.channel.selectors import get_channel_groups_per_user +from bc.core.utils.status.base import InvalidTemplate +from bc.core.utils.status.selectors import get_new_case_template from .forms import AddSubscriptionForm from .services import create_or_update_subscription_from_docket @@ -80,10 +84,45 @@ def post(self, request, *args, **kwargs): docket["case_name"] = cd["docket_name"] docket["case_summary"] = cd["case_summary"] docket["article_url"] = cd["article_url"] - subscription, created = create_or_update_subscription_from_docket( - docket - ) - channels = request.POST.getlist("channels") + try: + with transaction.atomic(): # Inner atomic block, create a savepoint + ( + subscription, + created, + ) = create_or_update_subscription_from_docket(docket) + channels = request.POST.getlist("channels") + + # Verify that all templates produce valid post content + for channel_id in channels: + channel = Channel.objects.get(pk=channel_id) + template = get_new_case_template( + channel.service, subscription.article_url + ) + + template.format( + docket=subscription.name_with_summary, + docket_link=subscription.cl_url, + docket_id=subscription.cl_docket_id, + article_url=subscription.article_url, + ) + + if not template.is_valid: + raise InvalidTemplate + except InvalidTemplate: + context = { + "docket_id": docket_id, + "form": form, + "channels": get_channel_groups_per_user(request.user.pk), + "error": ( + "The combination of name, summary and article URL exceeds " + f"the maximum character limit for {channel.get_service_display()} " + "posts. Please try reducing the number of characters in the inputs." + "You can use abbreviations, or remove unnecessary words. Once you " + "have made these changes, resubmit the form." + ), + } + template = "./includes/search_htmx/case-form.html" + return render(request, template, context) for channel_id in channels: subscription.channel.add(channel_id) From c4248102a041afb00638f03870b501e8521df770 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sun, 3 Dec 2023 11:04:41 -0400 Subject: [PATCH 14/92] feat(core): Add a class for Bluesky templates --- bc/core/utils/status/base.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/bc/core/utils/status/base.py b/bc/core/utils/status/base.py index a411faa5..66a7069b 100644 --- a/bc/core/utils/status/base.py +++ b/bc/core/utils/status/base.py @@ -110,3 +110,27 @@ def __len__(self) -> int: They count as 23 characters. """ return 23 * len(self.link_placeholders) + self.count_fixed_characters() + + +@dataclass +class BlueskyTemplate(BaseTemplate): + max_characters: int = 300 + + def __len__(self) -> int: + return self.count_fixed_characters() + + def _available_space(self, *args, **kwargs) -> int: + """This method overrides `Template._available_space`. + + Bluesky don't use a fixed length for links like mastodon or + Twitter/X. + """ + placeholder_characters = sum( + [ + len(str(val)) + for key, val in kwargs.items() + if key != "description" + ] + ) + + return self.max_characters - len(self) - placeholder_characters From 476c4705758542fa6a57ea333456ef64e422f23b Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sun, 3 Dec 2023 11:05:21 -0400 Subject: [PATCH 15/92] feat(core): Add Bluesky templates for new case and regular posts --- bc/core/utils/status/selectors.py | 16 +++++++++++ bc/core/utils/status/templates.py | 47 ++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/bc/core/utils/status/selectors.py b/bc/core/utils/status/selectors.py index 71914a88..1fc0da3f 100644 --- a/bc/core/utils/status/selectors.py +++ b/bc/core/utils/status/selectors.py @@ -1,6 +1,10 @@ from bc.channel.models import Channel from .templates import ( + BLUESKY_FOLLOW_A_NEW_CASE, + BLUESKY_FOLLOW_A_NEW_CASE_W_ARTICLE, + BLUESKY_MINUTE_TEMPLATE, + BLUESKY_POST_TEMPLATE, MASTODON_FOLLOW_A_NEW_CASE, MASTODON_FOLLOW_A_NEW_CASE_W_ARTICLE, MASTODON_MINUTE_TEMPLATE, @@ -43,6 +47,12 @@ def get_template_for_channel( if document_number else MASTODON_MINUTE_TEMPLATE ) + case Channel.BLUESKY: + return ( + BLUESKY_POST_TEMPLATE + if document_number + else BLUESKY_MINUTE_TEMPLATE + ) case _: raise NotImplementedError( f"No wrapper implemented for service: '{service}'." @@ -77,6 +87,12 @@ def get_new_case_template( if article_url else MASTODON_FOLLOW_A_NEW_CASE ) + case Channel.BLUESKY: + return ( + BLUESKY_FOLLOW_A_NEW_CASE_W_ARTICLE + if article_url + else BLUESKY_FOLLOW_A_NEW_CASE + ) case _: raise NotImplementedError( f"No template implemented for service: '{service}'." diff --git a/bc/core/utils/status/templates.py b/bc/core/utils/status/templates.py index 2f360dbb..66e397fb 100644 --- a/bc/core/utils/status/templates.py +++ b/bc/core/utils/status/templates.py @@ -1,6 +1,11 @@ import re -from .base import BaseTemplate, MastodonTemplate, TwitterTemplate +from .base import ( + BaseTemplate, + BlueskyTemplate, + MastodonTemplate, + TwitterTemplate, +) DO_NOT_POST = re.compile( r"""( @@ -104,3 +109,43 @@ #CL{docket_id}""", ) + + +BLUESKY_FOLLOW_A_NEW_CASE = TwitterTemplate( + link_placeholders=[], + str_template="""I'm now following {docket}: + +{docket_link} + +#CL{docket_id}""", +) + +BLUESKY_FOLLOW_A_NEW_CASE_W_ARTICLE = TwitterTemplate( + link_placeholders=["article_url"], + str_template="""I'm now following {docket}: + +Docket: {docket_link} + +Context: {article_url} + +#CL{docket_id}""", +) + +BLUESKY_POST_TEMPLATE = TwitterTemplate( + link_placeholders=["pdf_link"], + str_template="""New filing: "{docket}" +Doc #{doc_num}: {description} + +PDF: {pdf_link} + +#CL{docket_id}""", +) + +BLUESKY_MINUTE_TEMPLATE = TwitterTemplate( + link_placeholders=["docket_link"], + str_template="""New minute entry in {docket}: {description} + +Docket: {docket_link} + +#CL{docket_id}""", +) From 92a15bde7a2c8107a9638de6afe75b4003357274 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sun, 3 Dec 2023 13:57:34 -0400 Subject: [PATCH 16/92] fix(test): Tweak setup method to cast status_id as a str --- bc/subscription/tests/test_tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bc/subscription/tests/test_tasks.py b/bc/subscription/tests/test_tasks.py index a411cc22..f6e6a483 100644 --- a/bc/subscription/tests/test_tasks.py +++ b/bc/subscription/tests/test_tasks.py @@ -210,8 +210,8 @@ def setUpTestData(cls) -> None: cls.bin_object = b"\x68\x65\x6c\x6c\x6f" def setUp(self) -> None: - self.status_id = faker.pyint( - min_value=100_000_000, max_value=900_000_000 + self.status_id = str( + faker.pyint(min_value=100_000_000, max_value=900_000_000) ) def mock_api_wrapper(self, status_id): From 351004b1fa08695777cafef578726c5adc9dfa2e Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sun, 3 Dec 2023 14:33:52 -0400 Subject: [PATCH 17/92] feat(channel): Add type annotations for ImageBlobRef and Records --- .../utils/connectors/bluesky_api/types.py | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/bc/channel/utils/connectors/bluesky_api/types.py b/bc/channel/utils/connectors/bluesky_api/types.py index 374bbf32..c044865c 100644 --- a/bc/channel/utils/connectors/bluesky_api/types.py +++ b/bc/channel/utils/connectors/bluesky_api/types.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Dict, Literal, TypedDict +from typing import Literal, NotRequired, TypedDict @dataclass @@ -25,16 +25,29 @@ class ByteSlice(TypedDict): byteEnd: int +ImageBlobRef = TypedDict("ImageBlobRef", {"$link": str}) + ImageBlob = TypedDict( "ImageBlob", { "$type": Literal["blob"], "mimeType": str, "size": int, - "ref": TypedDict("ref", {"$link": str}), + "ref": ImageBlobRef, }, ) + +class Thumbnail(TypedDict): + alt_text: str + image: ImageBlob + + +ImageEmbed = TypedDict( + "ImageEmbed", + {"$type": Literal["app.bsky.embed.images"], "images": list[Thumbnail]}, +) + LinkFacet = TypedDict( "LinkFacet", {"$type": Literal["app.bsky.richtext.facet#link"], "uri": str} ) @@ -45,6 +58,13 @@ class TextAnnotation(TypedDict): features: list[LinkFacet] -class Thumbnail(TypedDict): - alt_text: str - image: ImageBlob +Record = TypedDict( + "Record", + { + "$type": Literal["app.bsky.feed.post"], + "text": str, + "facets": list[TextAnnotation], + "createdAt": str, + "embed": NotRequired[ImageEmbed], + }, +) From c3868def7ab6b6f9c139ea03b019875437c6d1d0 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sun, 3 Dec 2023 14:35:45 -0400 Subject: [PATCH 18/92] feat(channel): Update type hints in the BaseAPIConnector protocol --- bc/channel/utils/connectors/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bc/channel/utils/connectors/base.py b/bc/channel/utils/connectors/base.py index 5e4111db..c0b9e142 100644 --- a/bc/channel/utils/connectors/base.py +++ b/bc/channel/utils/connectors/base.py @@ -5,9 +5,9 @@ from bc.core.utils.images import TextImage -from .bluesky_api.client import BlueskyAPI +from .bluesky_api.types import ImageBlob -ApiWrapper = Union[Mastodon, TwitterAPI, BlueskyAPI] +ApiWrapper = Union[Mastodon, TwitterAPI] class BaseAPIConnector(Protocol): @@ -27,7 +27,7 @@ def get_api_object(self, version: str | None = None) -> ApiWrapper: """ ... - def upload_media(self, media: bytes, alt_text: str) -> int: + def upload_media(self, media: bytes, alt_text: str) -> int | ImageBlob: """ creates a media attachment to be used with a new status. @@ -49,7 +49,7 @@ def add_status( message: str, text_image: TextImage | None = None, thumbnails: list[bytes] | None = None, - ) -> int: + ) -> int | str: """ Creates a new status using the API wrapper object and returns the integer representation of the identifier for the new status. From 5c265285612c398a7d0c73f39de877939869d359 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Sun, 3 Dec 2023 14:36:19 -0400 Subject: [PATCH 19/92] feat(channel): Add variable annotations to avoid mypy complains --- bc/channel/utils/connectors/bluesky.py | 4 +- .../utils/connectors/bluesky_api/client.py | 39 +++++++++++-------- .../utils/connectors/bluesky_api/types.py | 2 +- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/bc/channel/utils/connectors/bluesky.py b/bc/channel/utils/connectors/bluesky.py index d8360c21..f24b9518 100644 --- a/bc/channel/utils/connectors/bluesky.py +++ b/bc/channel/utils/connectors/bluesky.py @@ -3,7 +3,7 @@ from .alt_text_utils import text_image_alt_text, thumb_num_alt_text from .base import ApiWrapper from .bluesky_api.client import BlueskyAPI -from .bluesky_api.types import ImageBlob +from .bluesky_api.types import ImageBlob, Thumbnail class BlueskyConnector: @@ -26,7 +26,7 @@ def add_status( thumbnails: list[bytes] | None = None, ) -> str: """Send post with attached image.""" - media = [] + media: list[Thumbnail] = [] if text_image: blob = self.upload_media(text_image.to_bytes(), None) media.append( diff --git a/bc/channel/utils/connectors/bluesky_api/client.py b/bc/channel/utils/connectors/bluesky_api/client.py index 768a0e77..724d82cf 100644 --- a/bc/channel/utils/connectors/bluesky_api/client.py +++ b/bc/channel/utils/connectors/bluesky_api/client.py @@ -3,7 +3,14 @@ import requests -from .types import ImageBlob, RegexMatch, Session, TextAnnotation, Thumbnail +from .types import ( + ImageBlob, + Record, + RegexMatch, + Session, + TextAnnotation, + Thumbnail, +) _BASE_API_URL = "https://bsky.social/xrpc" _DEFAULT_CONTENT_TYPE = "application/json" @@ -121,21 +128,21 @@ def _parse_text_facets(self, text) -> list[TextAnnotation]: the text, along with its type. """ facets = [] + annotation: TextAnnotation for u in self._parse_urls(text): - facets.append( - { - "index": { - "byteStart": u.start, - "byteEnd": u.end, - }, - "features": [ - { - "$type": "app.bsky.richtext.facet#link", - "uri": u.text, - } - ], - } - ) + annotation = { + "index": { + "byteStart": u.start, + "byteEnd": u.end, + }, + "features": [ + { + "$type": "app.bsky.richtext.facet#link", + "uri": u.text, + } + ], + } + facets.append(annotation) return facets def post_status(self, text: str, media: list[Thumbnail]) -> dict[str, str]: @@ -151,7 +158,7 @@ def post_status(self, text: str, media: list[Thumbnail]) -> dict[str, str]: """ # Fetch the current time now = self.get_current_time_iso() - message_object = { + message_object: Record = { "$type": "app.bsky.feed.post", "text": text, "facets": self._parse_text_facets(text), diff --git a/bc/channel/utils/connectors/bluesky_api/types.py b/bc/channel/utils/connectors/bluesky_api/types.py index c044865c..f74395cd 100644 --- a/bc/channel/utils/connectors/bluesky_api/types.py +++ b/bc/channel/utils/connectors/bluesky_api/types.py @@ -39,7 +39,7 @@ class ByteSlice(TypedDict): class Thumbnail(TypedDict): - alt_text: str + alt: str image: ImageBlob From 9a41dc20f725dfca448bfabf3cfa0bfbd592afde Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Mon, 4 Dec 2023 19:26:08 -0400 Subject: [PATCH 20/92] fix(subscription): Update the case-form.html template to remove typo --- bc/subscription/templates/includes/search_htmx/case-form.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bc/subscription/templates/includes/search_htmx/case-form.html b/bc/subscription/templates/includes/search_htmx/case-form.html index ef8e2727..dd91920d 100644 --- a/bc/subscription/templates/includes/search_htmx/case-form.html +++ b/bc/subscription/templates/includes/search_htmx/case-form.html @@ -1,6 +1,6 @@ {% if error %} {% endif %} From 3ebba38ce7f35562837f7f253d699d60ae29c179 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Tue, 5 Dec 2023 01:21:46 -0400 Subject: [PATCH 21/92] fix(subscription): Tweak the error message for invalid templates --- bc/subscription/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bc/subscription/views.py b/bc/subscription/views.py index 2fe8174e..fad20ab3 100644 --- a/bc/subscription/views.py +++ b/bc/subscription/views.py @@ -117,7 +117,7 @@ def post(self, request, *args, **kwargs): "The combination of name, summary and article URL exceeds " f"the maximum character limit for {channel.get_service_display()} " "posts. Please try reducing the number of characters in the inputs." - "You can use abbreviations, or remove unnecessary words. Once you " + "You can use abbreviations or remove unnecessary words. Once you " "have made these changes, resubmit the form." ), } From 3781300f32c396c48270f34c0de217a319045841 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Tue, 5 Dec 2023 01:25:16 -0400 Subject: [PATCH 22/92] fix(core): Remove a docstring typo in the BlueskyTemplate class --- bc/core/utils/status/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bc/core/utils/status/base.py b/bc/core/utils/status/base.py index 66a7069b..90eda947 100644 --- a/bc/core/utils/status/base.py +++ b/bc/core/utils/status/base.py @@ -122,8 +122,7 @@ def __len__(self) -> int: def _available_space(self, *args, **kwargs) -> int: """This method overrides `Template._available_space`. - Bluesky don't use a fixed length for links like mastodon or - Twitter/X. + Bluesky doesn't use a fixed length for links like mastodon or Twitter/X """ placeholder_characters = sum( [ From 3474751fe4a4d6cb2bd794c56e8fc9461d00ce79 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Tue, 5 Dec 2023 01:27:07 -0400 Subject: [PATCH 23/92] fix(templates): Use the BlueskyTemplate instead of TwitterTemplate. --- bc/core/utils/status/selectors.py | 9 +++++---- bc/core/utils/status/templates.py | 8 ++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/bc/core/utils/status/selectors.py b/bc/core/utils/status/selectors.py index 1fc0da3f..5f241e84 100644 --- a/bc/core/utils/status/selectors.py +++ b/bc/core/utils/status/selectors.py @@ -13,6 +13,7 @@ TWITTER_FOLLOW_A_NEW_CASE_W_ARTICLE, TWITTER_MINUTE_TEMPLATE, TWITTER_POST_TEMPLATE, + BlueskyTemplate, MastodonTemplate, TwitterTemplate, ) @@ -20,7 +21,7 @@ def get_template_for_channel( service: int, document_number: int | None -) -> TwitterTemplate | MastodonTemplate: +) -> TwitterTemplate | MastodonTemplate | BlueskyTemplate: """Returns a template object that uses the data of a webhook to create a new status update in the given service. This method checks the document number to pick one of the templates available. @@ -31,7 +32,7 @@ def get_template_for_channel( event. Returns: - TwitterTemplate | MastodonTemplate: template object to create + TwitterTemplate | MastodonTemplate | BlueskyTemplate: template object to create a new post. """ match service: @@ -61,7 +62,7 @@ def get_template_for_channel( def get_new_case_template( service: int, article_url: str -) -> TwitterTemplate | MastodonTemplate: +) -> TwitterTemplate | MastodonTemplate | BlueskyTemplate: """Returns a template object that uses the data of a subscription to create a status update in the given service. this method checks the article URL to pick one of the templates available. @@ -71,7 +72,7 @@ def get_new_case_template( article_url (str): the article url of the new subscription Returns: - TwitterTemplate | MastodonTemplate: template object to create + TwitterTemplate | MastodonTemplate | BlueskyTemplate: template object to create a new post. """ match service: diff --git a/bc/core/utils/status/templates.py b/bc/core/utils/status/templates.py index 66e397fb..40ba5ee7 100644 --- a/bc/core/utils/status/templates.py +++ b/bc/core/utils/status/templates.py @@ -111,7 +111,7 @@ ) -BLUESKY_FOLLOW_A_NEW_CASE = TwitterTemplate( +BLUESKY_FOLLOW_A_NEW_CASE = BlueskyTemplate( link_placeholders=[], str_template="""I'm now following {docket}: @@ -120,7 +120,7 @@ #CL{docket_id}""", ) -BLUESKY_FOLLOW_A_NEW_CASE_W_ARTICLE = TwitterTemplate( +BLUESKY_FOLLOW_A_NEW_CASE_W_ARTICLE = BlueskyTemplate( link_placeholders=["article_url"], str_template="""I'm now following {docket}: @@ -131,7 +131,7 @@ #CL{docket_id}""", ) -BLUESKY_POST_TEMPLATE = TwitterTemplate( +BLUESKY_POST_TEMPLATE = BlueskyTemplate( link_placeholders=["pdf_link"], str_template="""New filing: "{docket}" Doc #{doc_num}: {description} @@ -141,7 +141,7 @@ #CL{docket_id}""", ) -BLUESKY_MINUTE_TEMPLATE = TwitterTemplate( +BLUESKY_MINUTE_TEMPLATE = BlueskyTemplate( link_placeholders=["docket_link"], str_template="""New minute entry in {docket}: {description} From f593d347cdf08563e07a317aa1844c77545e4058 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Tue, 5 Dec 2023 12:10:56 -0400 Subject: [PATCH 24/92] feat(core): Refactor logic to check output validity into a helper method --- bc/core/utils/status/base.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/bc/core/utils/status/base.py b/bc/core/utils/status/base.py index 90eda947..a8437001 100644 --- a/bc/core/utils/status/base.py +++ b/bc/core/utils/status/base.py @@ -1,3 +1,4 @@ +import re from dataclasses import dataclass from bc.core.utils.string_utils import trunc @@ -61,6 +62,18 @@ def _available_space(self, *args, **kwargs) -> int: return self.max_characters - len(self) - placeholder_characters + def _check_output_validity(self, text: str) -> bool: + """ + Checks whether the provided text exceeds the maximum allowed length. + + Args: + text (str): The text to be evaluated. + + Returns: + bool: True if the text length is within the limit, False otherwise. + """ + return len(text) <= self.max_characters + def format(self, *args, **kwargs) -> tuple[str, TextImage | None]: image = None @@ -81,7 +94,7 @@ def format(self, *args, **kwargs) -> tuple[str, TextImage | None]: text = self.str_template.format(**kwargs) - self.is_valid = len(text) <= self.max_characters + self.is_valid = self._check_output_validity(text) return text, image @@ -116,8 +129,14 @@ def __len__(self) -> int: class BlueskyTemplate(BaseTemplate): max_characters: int = 300 - def __len__(self) -> int: - return self.count_fixed_characters() + def _check_output_validity(self, text: str) -> bool: + """This method overrides `Template._check_output_validity`. + + Strips links from the output text since they form part of the custom + markup language. + """ + cleaned_text = re.sub(r"(?<=])\(\S+\)", "", text) + return len(cleaned_text) <= self.max_characters def _available_space(self, *args, **kwargs) -> int: """This method overrides `Template._available_space`. From e7d6bb94a083c5bcf5fc8c7009d4d5da388a4ae1 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Tue, 5 Dec 2023 12:11:42 -0400 Subject: [PATCH 25/92] feat(templates): Update Bluesky templates to use embedded links --- bc/core/utils/status/templates.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/bc/core/utils/status/templates.py b/bc/core/utils/status/templates.py index 40ba5ee7..552a27b6 100644 --- a/bc/core/utils/status/templates.py +++ b/bc/core/utils/status/templates.py @@ -112,31 +112,29 @@ BLUESKY_FOLLOW_A_NEW_CASE = BlueskyTemplate( - link_placeholders=[], + link_placeholders=["docket_link"], str_template="""I'm now following {docket}: -{docket_link} +[Docket]({docket_link}) #CL{docket_id}""", ) BLUESKY_FOLLOW_A_NEW_CASE_W_ARTICLE = BlueskyTemplate( - link_placeholders=["article_url"], + link_placeholders=["docket_link", "article_url"], str_template="""I'm now following {docket}: -Docket: {docket_link} - -Context: {article_url} +[Docket]({docket_link}) | [Article Link]({article_url}) #CL{docket_id}""", ) BLUESKY_POST_TEMPLATE = BlueskyTemplate( - link_placeholders=["pdf_link"], + link_placeholders=["pdf_link", "docket_link"], str_template="""New filing: "{docket}" Doc #{doc_num}: {description} -PDF: {pdf_link} +[Download PDF]({pdf_link}) | [View Docket]({docket_link}) #CL{docket_id}""", ) @@ -145,7 +143,7 @@ link_placeholders=["docket_link"], str_template="""New minute entry in {docket}: {description} -Docket: {docket_link} +[View Docket]({docket_link}) #CL{docket_id}""", ) From 0e3584083ad1d41ed4d3990895d78ff0fa108a35 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Tue, 5 Dec 2023 12:47:20 -0400 Subject: [PATCH 26/92] feat(channel): Add helper method to parse embedded links before posting --- .../utils/connectors/bluesky_api/client.py | 81 ++++++++++++++++++- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/bc/channel/utils/connectors/bluesky_api/client.py b/bc/channel/utils/connectors/bluesky_api/client.py index 724d82cf..46629439 100644 --- a/bc/channel/utils/connectors/bluesky_api/client.py +++ b/bc/channel/utils/connectors/bluesky_api/client.py @@ -109,6 +109,68 @@ def _parse_urls(self, text: str) -> list[RegexMatch]: ) return spans + def _parse_embedded_links(self, text: str) -> list[RegexMatch]: + """ + Parses embedded links from text. + + This helper method attempts to identify and return all occurrences of + link markup in the provided text. If no occurrences are found, an empty + list is returned. + + Args: + text (str): The text to parse. + + Returns: + list[RegexMatch]: List of matches. + """ + spans = [] + # Matches anything that isn't a square closing bracket + name_regex = "[^]]+" + # Matches http:// or https:// followed by anything but a closing parenthesis + url_regex = "http[s]?://[^)]+" + # Combined regex expression with named groups. + markup_regex = ( + rf"(?P\[{name_regex}])(?P\(\s*{url_regex}\s*\))" + ).encode() + text_bytes = text.encode("UTF-8") + offset = 0 + for m in re.finditer(markup_regex, text_bytes): + # Remove parenthesis and whitespaces from the uri + cleaned_uri = re.sub( + r"\(\s*|\s*\)", "", m.group("uri").decode("UTF-8") + ) + # We need the offset variable to fine-tune the target word's + # position and ensure the link is created in the right spot + # because We run the regex on the full text with links, but + # only post the cleaned-up version without them + spans.append( + RegexMatch( + start=m.start("name") - offset, + end=m.end("name") - offset, + text=cleaned_uri, + ) + ) + offset += len(m.group("uri")) + return spans + + def _clean_text(sef, text: str) -> str: + """ + Removes all link markup notations from a given text, leaving only the + plain text content. + + This helper function specifically targets the markup used for embedding + hyperlinks within the provided text. It aims to strip away all URLs + associated with links, leaving behind the raw textual information. + + Args: + text (str): the text to be cleansed of link markup. + + Returns: + str: string containing the original text with all link markup + notations removed. + """ + return re.sub(r"(?<=])\(\S+\)", "", text) + def _parse_text_facets(self, text) -> list[TextAnnotation]: """ Parses text and extracts text annotations (e.g., links and mentions) @@ -129,6 +191,22 @@ def _parse_text_facets(self, text) -> list[TextAnnotation]: """ facets = [] annotation: TextAnnotation + for u in self._parse_embedded_links(text): + annotation = { + "index": { + "byteStart": u.start, + "byteEnd": u.end, + }, + "features": [ + { + "$type": "app.bsky.richtext.facet#link", + "uri": u.text, + } + ], + } + facets.append(annotation) + + text = self._clean_text(text) for u in self._parse_urls(text): annotation = { "index": { @@ -156,11 +234,10 @@ def post_status(self, text: str, media: list[Thumbnail]) -> dict[str, str]: Returns: dict[str, str]: Response including the cid and the uri of the record. """ - # Fetch the current time now = self.get_current_time_iso() message_object: Record = { "$type": "app.bsky.feed.post", - "text": text, + "text": self._clean_text(text), "facets": self._parse_text_facets(text), "createdAt": now, } From bac832abc68197eed5e9f5d9e7cbd70a50218c0f Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 6 Dec 2023 04:19:44 -0400 Subject: [PATCH 27/92] build(deps): Add beautifulsoup4 as a dependency --- poetry.lock | 35 ++++++++++++++++++++++++++++++++--- pyproject.toml | 1 + 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 958fe22e..9eab3558 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "ansicolors" @@ -115,6 +115,24 @@ files = [ {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, ] +[[package]] +name = "beautifulsoup4" +version = "4.12.2" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "black" version = "23.1.0" @@ -1832,6 +1850,17 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "soupsieve" +version = "2.5" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, + {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, +] + [[package]] name = "sqlparse" version = "0.4.4" @@ -1916,7 +1945,7 @@ files = [ [package.dependencies] requests = "*" -requests_oauthlib = "*" +requests-oauthlib = "*" [[package]] name = "types-pyopenssl" @@ -2163,4 +2192,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "4fb1be1b7d05b27c779c6e8e7642ae6e4cd9e9e1ec42ea785f405f505b5e4485" +content-hash = "91f3bba3b43bc541453570d8c1295b1f6bd88278adf70b9acc3fddb286104581" diff --git a/pyproject.toml b/pyproject.toml index 2af57e60..c1c44a88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ factory-boy = "^3.3.0" faker = "^19.3.1" disposable-email-domains = "^0.0.90" django-hcaptcha = "^0.2.0" +beautifulsoup4 = "^4.12.2" [tool.poetry.group.dev.dependencies] black = "^23.1.0" From fc82291d14d28f36df515969273274d0b17d9d3c Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 6 Dec 2023 04:24:18 -0400 Subject: [PATCH 28/92] feat(channel): Add a helper method to create social cards for Bluesky --- .../utils/connectors/bluesky_api/client.py | 63 +++++++++++++++++++ .../utils/connectors/bluesky_api/types.py | 15 ++++- 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/bc/channel/utils/connectors/bluesky_api/client.py b/bc/channel/utils/connectors/bluesky_api/client.py index 46629439..2c491f1d 100644 --- a/bc/channel/utils/connectors/bluesky_api/client.py +++ b/bc/channel/utils/connectors/bluesky_api/client.py @@ -1,13 +1,17 @@ import re from datetime import datetime, timezone +from urllib.parse import urljoin import requests +from bs4 import BeautifulSoup +from requests import HTTPError from .types import ( ImageBlob, Record, RegexMatch, Session, + SocialCard, TextAnnotation, Thumbnail, ) @@ -223,6 +227,56 @@ def _parse_text_facets(self, text) -> list[TextAnnotation]: facets.append(annotation) return facets + def fetch_embed_url_card(self, url: str) -> SocialCard | None: + """ + Fetches metadata from a given URL to add the rendered preview in a post + + This method uses the Open Graph protocol to extract metadata from the + provided url and build a rich social card for a Bluesky post. + + Args: + url (str): The URL to fetch the social card information from. + + Returns: + SocialCard (optional): A dictionary containing the keys for building + the Bluesky post social card + """ + try: + resp = requests.get(url) + resp.raise_for_status() + except HTTPError: + return None + + soup = BeautifulSoup(resp.text, "html.parser") + + # parse out the "og:title" and "og:description" HTML meta tags + title_tag = soup.find("meta", property="og:title") + description_tag = soup.find("meta", property="og:description") + + # if there is an "og:image" HTML meta tag, fetch and upload that image + image_tag = soup.find("meta", property="og:image") + if not image_tag: + return None + + mime_tag = soup.find("meta", property="og:image:type") + mime_type = mime_tag["content"] if mime_tag else "image/png" + img_url = urljoin(url, image_tag["content"]) + try: + resp = requests.get(img_url) + resp.raise_for_status() + except HTTPError: + return None + + thumbnail = self.post_media(resp.content, mime_type) + return { + "uri": url, + "title": title_tag["content"] if title_tag else "", + "description": description_tag["content"] + if description_tag + else "", + "thumb": thumbnail, + } + def post_status(self, text: str, media: list[Thumbnail]) -> dict[str, str]: """ Creates a new status post on Bluesky using the provided text and thumbnails. @@ -247,6 +301,15 @@ def post_status(self, text: str, media: list[Thumbnail]) -> dict[str, str]: "$type": "app.bsky.embed.images", "images": media, } + elif message_object["facets"]: + card = self.fetch_embed_url_card( + message_object["facets"][-1]["features"][0]["uri"] + ) + if card: + message_object["embed"] = { + "$type": "app.bsky.embed.external", + "external": card, + } response = requests.post( f"{_BASE_API_URL}/com.atproto.repo.createRecord", diff --git a/bc/channel/utils/connectors/bluesky_api/types.py b/bc/channel/utils/connectors/bluesky_api/types.py index f74395cd..a54a62b8 100644 --- a/bc/channel/utils/connectors/bluesky_api/types.py +++ b/bc/channel/utils/connectors/bluesky_api/types.py @@ -48,6 +48,19 @@ class Thumbnail(TypedDict): {"$type": Literal["app.bsky.embed.images"], "images": list[Thumbnail]}, ) + +class SocialCard(TypedDict): + uri: str + title: str + description: str + thumb: ImageBlob + + +ExternalEmbed = TypedDict( + "ExternalEmbed", + {"$type": Literal["app.bsky.embed.external"], "external": SocialCard}, +) + LinkFacet = TypedDict( "LinkFacet", {"$type": Literal["app.bsky.richtext.facet#link"], "uri": str} ) @@ -65,6 +78,6 @@ class TextAnnotation(TypedDict): "text": str, "facets": list[TextAnnotation], "createdAt": str, - "embed": NotRequired[ImageEmbed], + "embed": NotRequired[ImageEmbed | ExternalEmbed], }, ) From 6e07124d1471027a68dfaa400ba1195d5cf21ec5 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 6 Dec 2023 11:11:42 -0400 Subject: [PATCH 29/92] feat(core): Refine the _available_space method in the BaseTemplate class --- bc/core/utils/status/base.py | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/bc/core/utils/status/base.py b/bc/core/utils/status/base.py index a8437001..fd5c0ed9 100644 --- a/bc/core/utils/status/base.py +++ b/bc/core/utils/status/base.py @@ -1,5 +1,6 @@ import re from dataclasses import dataclass +from string import Formatter from bc.core.utils.string_utils import trunc @@ -54,9 +55,11 @@ def _available_space(self, *args, **kwargs) -> int: placeholder_characters = sum( [ - len(str(val)) - for key, val in kwargs.items() - if key not in excluded + len(str(kwargs.get(field_name))) + for text, field_name, *_ in Formatter().parse( + self.str_template + ) + if field_name and field_name not in excluded ] ) @@ -137,18 +140,3 @@ def _check_output_validity(self, text: str) -> bool: """ cleaned_text = re.sub(r"(?<=])\(\S+\)", "", text) return len(cleaned_text) <= self.max_characters - - def _available_space(self, *args, **kwargs) -> int: - """This method overrides `Template._available_space`. - - Bluesky doesn't use a fixed length for links like mastodon or Twitter/X - """ - placeholder_characters = sum( - [ - len(str(val)) - for key, val in kwargs.items() - if key != "description" - ] - ) - - return self.max_characters - len(self) - placeholder_characters From 14ddbd2cb01dcb2f4edac2fe62babcbaaae8b9da Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 6 Dec 2023 17:16:56 -0400 Subject: [PATCH 30/92] feat(channel): Add a timeout to the request that fetches HTML to create cards --- bc/channel/utils/connectors/bluesky_api/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bc/channel/utils/connectors/bluesky_api/client.py b/bc/channel/utils/connectors/bluesky_api/client.py index 2c491f1d..e850c7c5 100644 --- a/bc/channel/utils/connectors/bluesky_api/client.py +++ b/bc/channel/utils/connectors/bluesky_api/client.py @@ -242,7 +242,7 @@ def fetch_embed_url_card(self, url: str) -> SocialCard | None: the Bluesky post social card """ try: - resp = requests.get(url) + resp = requests.get(url, timeout=self._timeout) resp.raise_for_status() except HTTPError: return None @@ -262,7 +262,7 @@ def fetch_embed_url_card(self, url: str) -> SocialCard | None: mime_type = mime_tag["content"] if mime_tag else "image/png" img_url = urljoin(url, image_tag["content"]) try: - resp = requests.get(img_url) + resp = requests.get(img_url, timeout=self._timeout) resp.raise_for_status() except HTTPError: return None From 21696aebe45b0dcb91baeadc732560fb008c0ced Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 6 Dec 2023 17:19:22 -0400 Subject: [PATCH 31/92] feat(template): Update Bluesky templates --- bc/core/utils/status/templates.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bc/core/utils/status/templates.py b/bc/core/utils/status/templates.py index 552a27b6..62b9ad63 100644 --- a/bc/core/utils/status/templates.py +++ b/bc/core/utils/status/templates.py @@ -115,7 +115,7 @@ link_placeholders=["docket_link"], str_template="""I'm now following {docket}: -[Docket]({docket_link}) +[View Full Case]({docket_link}) #CL{docket_id}""", ) @@ -124,7 +124,7 @@ link_placeholders=["docket_link", "article_url"], str_template="""I'm now following {docket}: -[Docket]({docket_link}) | [Article Link]({article_url}) +[View Full Case]({docket_link}) | [Background Info]({article_url}) #CL{docket_id}""", ) @@ -134,7 +134,7 @@ str_template="""New filing: "{docket}" Doc #{doc_num}: {description} -[Download PDF]({pdf_link}) | [View Docket]({docket_link}) +[Download PDF]({pdf_link}) | [View Full Case]({docket_link}) #CL{docket_id}""", ) @@ -143,7 +143,7 @@ link_placeholders=["docket_link"], str_template="""New minute entry in {docket}: {description} -[View Docket]({docket_link}) +[View Full Case]({docket_link}) #CL{docket_id}""", ) From e2483e474a70fc71d7ce3f40269608f5f703db20 Mon Sep 17 00:00:00 2001 From: mlissner Date: Wed, 6 Dec 2023 15:32:36 -0800 Subject: [PATCH 32/92] fix(bluesky): Add a user agent to pierce CL's firewall --- bc/channel/utils/connectors/bluesky_api/client.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bc/channel/utils/connectors/bluesky_api/client.py b/bc/channel/utils/connectors/bluesky_api/client.py index e850c7c5..fe1fe4a5 100644 --- a/bc/channel/utils/connectors/bluesky_api/client.py +++ b/bc/channel/utils/connectors/bluesky_api/client.py @@ -242,7 +242,11 @@ def fetch_embed_url_card(self, url: str) -> SocialCard | None: the Bluesky post social card """ try: - resp = requests.get(url, timeout=self._timeout) + resp = requests.get( + url, + headers={"User-Agent": "bots.law"}, + timeout=self._timeout, + ) resp.raise_for_status() except HTTPError: return None From 27a4c653ece3014e4e56c9db54ef4fd7b5895fc6 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Thu, 7 Dec 2023 02:06:28 -0400 Subject: [PATCH 33/92] feat(template): Tweak template to render Follow on Bluesky button --- bc/assets/templates/includes/follow-button.html | 4 +++- bc/assets/templates/includes/inlines/bluesky.svg | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 bc/assets/templates/includes/inlines/bluesky.svg diff --git a/bc/assets/templates/includes/follow-button.html b/bc/assets/templates/includes/follow-button.html index 75031219..368f22a1 100644 --- a/bc/assets/templates/includes/follow-button.html +++ b/bc/assets/templates/includes/follow-button.html @@ -3,8 +3,10 @@
{% if service_name == 'Twitter' %} {% include 'includes/inlines/twitter.svg' %} - {% else %} + {% elif service_name == 'Mastodon' %} {% include 'includes/inlines/mastodon.svg' %} + {% elif service_name == 'Bluesky' %} + {% include 'includes/inlines/bluesky.svg' %} {% endif %}
diff --git a/bc/assets/templates/includes/inlines/bluesky.svg b/bc/assets/templates/includes/inlines/bluesky.svg new file mode 100644 index 00000000..4de2b2d6 --- /dev/null +++ b/bc/assets/templates/includes/inlines/bluesky.svg @@ -0,0 +1 @@ + From 15321f420f864fc5835236bda16ab7b5c351fbd6 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Thu, 7 Dec 2023 02:07:19 -0400 Subject: [PATCH 34/92] feat(channel): Add logic to compute links to Bluesky profiles --- bc/channel/models.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/bc/channel/models.py b/bc/channel/models.py index e5e7ff9f..8053981c 100644 --- a/bc/channel/models.py +++ b/bc/channel/models.py @@ -126,17 +126,20 @@ def get_api_wrapper(self) -> BaseAPIConnector: ) def self_url(self): - if self.service == self.TWITTER: - return f"https://twitter.com/{self.account}" - elif self.service == self.MASTODON: - result = masto_regex.search(self.account) - assert len(result.groups()) == 2 - account_part, instance_part = result.groups() - return f"https://{instance_part}/@{account_part}" - else: - raise NotImplementedError( - f"Channel.self_url() not yet implemented for service {self.service}" - ) + match self.service: + case self.TWITTER: + return f"https://twitter.com/{self.account}" + case self.MASTODON: + result = masto_regex.search(self.account) + assert len(result.groups()) == 2 + account_part, instance_part = result.groups() + return f"https://{instance_part}/@{account_part}" + case self.BLUESKY: + return f"https://bsky.app/profile/{self.account_id}" + case _: + raise NotImplementedError( + f"Channel.self_url() not yet implemented for service {self.service}" + ) def __str__(self) -> str: if self.account: From dc289fc48284c70cf80061e2449bf15ef7ec5a4b Mon Sep 17 00:00:00 2001 From: ImgBotApp Date: Thu, 7 Dec 2023 07:10:26 +0000 Subject: [PATCH 35/92] [ImgBot] Optimize images /bc/assets/templates/includes/inlines/bluesky.svg -- 0.21kb -> 0.20kb (3.26%) Signed-off-by: ImgBotApp --- bc/assets/templates/includes/inlines/bluesky.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bc/assets/templates/includes/inlines/bluesky.svg b/bc/assets/templates/includes/inlines/bluesky.svg index 4de2b2d6..3eb8b599 100644 --- a/bc/assets/templates/includes/inlines/bluesky.svg +++ b/bc/assets/templates/includes/inlines/bluesky.svg @@ -1 +1 @@ - + \ No newline at end of file From 233b69e7d0a387a9fe2350df1b7af5a046475762 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Thu, 7 Dec 2023 11:09:40 -0400 Subject: [PATCH 36/92] feat(utils): Add logic to count emojis according to the Twitter spec --- bc/core/utils/string_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bc/core/utils/string_utils.py b/bc/core/utils/string_utils.py index be2ae165..941f68c2 100644 --- a/bc/core/utils/string_utils.py +++ b/bc/core/utils/string_utils.py @@ -8,7 +8,11 @@ def trunc(s: str, length: int, ellipsis: str | None = None) -> str: addition of the ellipsis without being longer than length. """ if ellipsis: - ellipsis_length = len(ellipsis) + # Emojis are encoded as multiple characters, so we need to count them + # individually. + ellipsis_length = sum( + [2 if len(char.encode("utf-8")) > 1 else 1 for char in ellipsis] + ) else: ellipsis_length = 0 From 727198d8044c271c8e200c0f65cdfbd052102745 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 13 Dec 2023 03:56:24 -0400 Subject: [PATCH 37/92] feat(channels): Refine media upload for Bluesky posts tweaks the post_media method to handle big thumbnails by logging the error and returning None instead of raising an exception. This new approach allow us to exclude those big thumbnails from the embed array and continue the post creation process. --- bc/channel/utils/connectors/bluesky.py | 15 +++++++++------ bc/channel/utils/connectors/bluesky_api/client.py | 11 +++++++++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/bc/channel/utils/connectors/bluesky.py b/bc/channel/utils/connectors/bluesky.py index f24b9518..172340b1 100644 --- a/bc/channel/utils/connectors/bluesky.py +++ b/bc/channel/utils/connectors/bluesky.py @@ -29,16 +29,19 @@ def add_status( media: list[Thumbnail] = [] if text_image: blob = self.upload_media(text_image.to_bytes(), None) - media.append( - { - "alt": text_image_alt_text(text_image.description), - "image": blob, - } - ) + if blob: + media.append( + { + "alt": text_image_alt_text(text_image.description), + "image": blob, + } + ) if thumbnails: for idx, thumbnail in enumerate(thumbnails): blob = self.upload_media(thumbnail, None) + if not blob: + continue media.append( { "alt": thumb_num_alt_text(idx), diff --git a/bc/channel/utils/connectors/bluesky_api/client.py b/bc/channel/utils/connectors/bluesky_api/client.py index fe1fe4a5..4310ade2 100644 --- a/bc/channel/utils/connectors/bluesky_api/client.py +++ b/bc/channel/utils/connectors/bluesky_api/client.py @@ -1,3 +1,4 @@ +import logging import re from datetime import datetime, timezone from urllib.parse import urljoin @@ -16,6 +17,8 @@ Thumbnail, ) +logger = logging.getLogger(__name__) + _BASE_API_URL = "https://bsky.social/xrpc" _DEFAULT_CONTENT_TYPE = "application/json" DEFAULT_LANGUAGE_CODE1 = "en" @@ -50,7 +53,7 @@ def _get_session(self) -> Session: ) return Session(**response.json()) - def post_media(self, media: bytes, mime_type: str) -> ImageBlob: + def post_media(self, media: bytes, mime_type: str) -> ImageBlob | None: """ Upload bytes data (a "blob") using the given content type. @@ -63,9 +66,10 @@ def post_media(self, media: bytes, mime_type: str) -> ImageBlob: """ # this size limit is specified in the app.bsky.embed.images lexicon if len(media) > 1000000: - raise Exception( + logger.error( f"image file size too large. 1000000 bytes maximum, got: {len(media)}" ) + return None resp = requests.post( f"{_BASE_API_URL}/com.atproto.repo.uploadBlob", @@ -272,6 +276,9 @@ def fetch_embed_url_card(self, url: str) -> SocialCard | None: return None thumbnail = self.post_media(resp.content, mime_type) + if not thumbnail: + return None + return { "uri": url, "title": title_tag["content"] if title_tag else "", From 4ee10b7ee9d983130481ce18970ca388a17c857e Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Thu, 14 Dec 2023 22:36:39 -0400 Subject: [PATCH 38/92] fix(template): Tweak method to check output validity in the base class --- bc/core/tests/test_mastodon_template.py | 151 +++++++++++++++++++++++- bc/core/utils/status/base.py | 9 +- 2 files changed, 158 insertions(+), 2 deletions(-) diff --git a/bc/core/tests/test_mastodon_template.py b/bc/core/tests/test_mastodon_template.py index 5181ffba..22f8e54c 100644 --- a/bc/core/tests/test_mastodon_template.py +++ b/bc/core/tests/test_mastodon_template.py @@ -2,7 +2,156 @@ from django.test import SimpleTestCase -from bc.core.utils.status.templates import MastodonTemplate +from bc.core.utils.status.templates import ( + BLUESKY_FOLLOW_A_NEW_CASE, + BLUESKY_FOLLOW_A_NEW_CASE_W_ARTICLE, + MASTODON_FOLLOW_A_NEW_CASE, + MASTODON_FOLLOW_A_NEW_CASE_W_ARTICLE, + TWITTER_FOLLOW_A_NEW_CASE, + TWITTER_FOLLOW_A_NEW_CASE_W_ARTICLE, + MastodonTemplate, +) + + +class NewSubscriptionValidTemplateTest(SimpleTestCase): + def setUp(self) -> None: + self.docket_url = "https://www.courtlistener.com/docket/68073028/01208579363/united-states-v-donald-trump/?redirect_or_modal=True" + self.article_url = "https://www.theverge.com/2023/9/11/23868870/internet-archive-hachette-open-library-copyright-lawsuit-appeal" + self.docket_id = "68073028" + return super().setUp() + + def test_check_output_validity_mastodon_simple_template(self): + template = MASTODON_FOLLOW_A_NEW_CASE + valid_multipliers = [5, 10, 20, 40, 48] + for multiplier in valid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + self.assertTrue(template.is_valid) + + invalid_multipliers = [50, 100] + for multiplier in invalid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + self.assertFalse(template.is_valid) + + def test_check_output_validity_mastodon_template_w_article(self): + template = MASTODON_FOLLOW_A_NEW_CASE_W_ARTICLE + valid_multipliers = [5, 10, 20, 40] + for multiplier in valid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + + self.assertTrue(template.is_valid) + + invalid_multipliers = [41, 50, 100] + for multiplier in invalid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + self.assertFalse(template.is_valid) + + def test_check_output_validity_twitter_simple_template(self): + template = TWITTER_FOLLOW_A_NEW_CASE + valid_multipliers = [5, 10, 20, 40, 44] + for multiplier in valid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + self.assertTrue(template.is_valid) + + invalid_multipliers = [45, 50, 100] + for multiplier in invalid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + self.assertFalse(template.is_valid) + + def test_check_output_validity_twitter_template_w_article(self): + template = TWITTER_FOLLOW_A_NEW_CASE_W_ARTICLE + valid_multipliers = [5, 10, 20, 35] + for multiplier in valid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + self.assertTrue(template.is_valid) + + invalid_multipliers = [37, 50, 100] + for multiplier in invalid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + self.assertFalse(template.is_valid) + + def test_check_output_validity_bluesky_simple_template(self): + template = BLUESKY_FOLLOW_A_NEW_CASE + valid_multipliers = [5, 10, 20, 40, 50] + for multiplier in valid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + self.assertTrue(template.is_valid) + + invalid_multipliers = [51, 100] + for multiplier in invalid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + self.assertFalse(template.is_valid) + + def test_check_output_validity_bluesky_template_w_article(self): + template = BLUESKY_FOLLOW_A_NEW_CASE_W_ARTICLE + valid_multipliers = [5, 10, 20, 40, 46] + for multiplier in valid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + self.assertTrue(template.is_valid) + + invalid_multipliers = [47, 50, 100] + for multiplier in invalid_multipliers: + template.format( + docket=multiplier * "short", + docket_link=self.docket_url, + docket_id=self.docket_id, + article_url=self.article_url, + ) + self.assertFalse(template.is_valid) class MastodonTemplateTest(SimpleTestCase): diff --git a/bc/core/utils/status/base.py b/bc/core/utils/status/base.py index fd5c0ed9..a19542da 100644 --- a/bc/core/utils/status/base.py +++ b/bc/core/utils/status/base.py @@ -69,13 +69,20 @@ def _check_output_validity(self, text: str) -> bool: """ Checks whether the provided text exceeds the maximum allowed length. + Strips links from the output text since they use a fixed character + count. + Args: text (str): The text to be evaluated. Returns: bool: True if the text length is within the limit, False otherwise. """ - return len(text) <= self.max_characters + url_pattern = r"https?://\S+" + url_match = re.findall(url_pattern, text) + output = re.sub(url_pattern, "", text) + + return len(output) + 23 * len(url_match) <= self.max_characters def format(self, *args, **kwargs) -> tuple[str, TextImage | None]: image = None From 36263164cf2338cb0d20edd8a590cd605698fbbd Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Fri, 15 Dec 2023 02:10:09 -0400 Subject: [PATCH 39/92] feat(templates): Update the donate link to use new Neon form --- bc/assets/templates/includes/footer.html | 2 +- bc/assets/templates/includes/header.html | 4 ++-- bc/web/templates/homepage.html | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bc/assets/templates/includes/footer.html b/bc/assets/templates/includes/footer.html index 58584979..0d5e7fbc 100644 --- a/bc/assets/templates/includes/footer.html +++ b/bc/assets/templates/includes/footer.html @@ -50,7 +50,7 @@
Other work
About FLP
{% include "./footer-link.html" with href="https://free.law/about/" text="Our Mission" %} - {% include "./footer-link.html" with href="https://free.law/donate/" text="Donate Now" %} + {% include "./footer-link.html" with href="https://donate.free.law/forms/supportflp" text="Donate Now" %} {% include "./footer-link.html" with href="https://free.law/blog/" text="Follow the FLP Blog" %} {% include "./footer-link.html" with href="https://free.law/contact/" text="Contact Us" %}
diff --git a/bc/assets/templates/includes/header.html b/bc/assets/templates/includes/header.html index 7af144df..fcf9035c 100644 --- a/bc/assets/templates/includes/header.html +++ b/bc/assets/templates/includes/header.html @@ -41,7 +41,7 @@
From 00024c18a10680284dab4f58694379ad74267c54 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Fri, 15 Dec 2023 02:10:51 -0400 Subject: [PATCH 40/92] feat(readme): Update the donate link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6853de0b..5c3b065c 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ It costs a lot to build and maintain this system and we need your support. [slack]: https://github.com/freelawproject/bigcases2/issues/10 [teams]: https://github.com/freelawproject/bigcases2/issues/41 [flp]: https://free.law/ -[flpd]: https://free.law/donate/ +[flpd]: https://donate.free.law/forms/supportflp [bcb1]: https://github.com/bdheath/Big-Cases [c]: https://free.law/contact/ [litte]: https://github.com/freelawproject/bigcases2/issues/8 From 3a333b562c629872bed45801f9bd16c74bdd2eee Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Mon, 18 Dec 2023 21:19:55 -0400 Subject: [PATCH 41/92] refactor(core): Adds comment and renames variables in the BaseTemplate class --- bc/core/utils/status/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bc/core/utils/status/base.py b/bc/core/utils/status/base.py index a19542da..7ee486ba 100644 --- a/bc/core/utils/status/base.py +++ b/bc/core/utils/status/base.py @@ -79,10 +79,11 @@ def _check_output_validity(self, text: str) -> bool: bool: True if the text length is within the limit, False otherwise. """ url_pattern = r"https?://\S+" - url_match = re.findall(url_pattern, text) - output = re.sub(url_pattern, "", text) + url_count = len(re.findall(url_pattern, text)) + linkless_output = re.sub(url_pattern, "", text) - return len(output) + 23 * len(url_match) <= self.max_characters + # Twitter and Mastodon both count links as 23 chars at present + return len(linkless_output) + (23 * url_count) <= self.max_characters def format(self, *args, **kwargs) -> tuple[str, TextImage | None]: image = None From ec03ff2fdbabe7b4b058a368dc7449f7be4d6b12 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Mon, 18 Dec 2023 21:26:14 -0400 Subject: [PATCH 42/92] refactor(core): Rename the test_mastodon_template.py file Rename the test_mastodon_template.py file to test_template.py to better reflect its use for testing template functionality. --- bc/core/tests/{test_mastodon_template.py => test_templates.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename bc/core/tests/{test_mastodon_template.py => test_templates.py} (100%) diff --git a/bc/core/tests/test_mastodon_template.py b/bc/core/tests/test_templates.py similarity index 100% rename from bc/core/tests/test_mastodon_template.py rename to bc/core/tests/test_templates.py From 671a4301c8e2e8be0ff7f92409294440bb8efec7 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Thu, 28 Dec 2023 16:03:03 -0400 Subject: [PATCH 43/92] fix(subscription): Update the lookup_initial_complaint method This commit tweaks the lookup_initial_complaint method to use the /recap endpoint instead of the /docket-entries endpoint. --- bc/subscription/utils/courtlistener.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/bc/subscription/utils/courtlistener.py b/bc/subscription/utils/courtlistener.py index d5c0bc44..d2b11e27 100644 --- a/bc/subscription/utils/courtlistener.py +++ b/bc/subscription/utils/courtlistener.py @@ -149,7 +149,7 @@ def lookup_document_by_doc_id(doc_id: int | None) -> DocumentDict: def lookup_initial_complaint(docket_id: int | None) -> DocumentDict | None: """ - Performs a GET query on /api/rest/v3/docket-entries/ + Performs a GET query on /api/rest/v3/recap/ using the docket_id to get the first entry of the case. Args: @@ -162,9 +162,16 @@ def lookup_initial_complaint(docket_id: int | None) -> DocumentDict | None: if not docket_id: return None + params: dict[str, str | int] = { + "docket_entry__docket__id": docket_id, + "docket_entry__entry_number": 1, + "order_by": "id", + "fields": "id,filepath_local,page_count,pacer_doc_id", + } + response = requests.get( - f"{CL_API['docket-entries']}", - params={"docket__id": docket_id, "entry_number": 1}, + f"{CL_API['recap-documents']}", + params=params, headers=auth_header(), timeout=5, ) @@ -174,7 +181,7 @@ def lookup_initial_complaint(docket_id: int | None) -> DocumentDict | None: if not data["count"]: return None - document = data["results"][0]["recap_documents"][0] + document = data["results"][0] return { "id": document["id"], "filepath_local": document["filepath_local"], From 505d0129438ba65f5e7a4fe3ade809e1921dfd0b Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 3 Jan 2024 17:05:30 -0400 Subject: [PATCH 44/92] feat(includes): Updates the Bluesky svg icon --- bc/assets/templates/includes/inlines/bluesky.svg | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bc/assets/templates/includes/inlines/bluesky.svg b/bc/assets/templates/includes/inlines/bluesky.svg index 3eb8b599..5cf6bd2f 100644 --- a/bc/assets/templates/includes/inlines/bluesky.svg +++ b/bc/assets/templates/includes/inlines/bluesky.svg @@ -1 +1,4 @@ - \ No newline at end of file + + + + From bc35acbe734efeadb79cd1773da86bbf1806ec62 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 3 Jan 2024 17:05:30 -0400 Subject: [PATCH 45/92] feat(templates): Adds the bluesky button to the header --- bc/assets/templates/includes/header.html | 28 +++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/bc/assets/templates/includes/header.html b/bc/assets/templates/includes/header.html index fcf9035c..e7d65d9b 100644 --- a/bc/assets/templates/includes/header.html +++ b/bc/assets/templates/includes/header.html @@ -44,7 +44,13 @@  Donate - + +
+ {% include './inlines/bluesky.svg' %} +
+ Follow on Bluesky +
+
{% include './inlines/mastodon.svg' %}
@@ -74,7 +80,7 @@
-