From 63a685bc1a46b31d2f5c1bcc28135a1befbab755 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jochen=20Wersd=C3=B6rfer?= <jochen@wersdoerfer.de>
Date: Sun, 10 Nov 2024 14:44:56 +0100
Subject: [PATCH] #168 start implementing transcripts

---
 cast/models/transcript.py |  32 +++++
 cast/views/transcript.py  | 269 ++++++++++++++++++++++++++++++++++++++
 docs/transcript.rst       |  22 ++++
 3 files changed, 323 insertions(+)
 create mode 100644 cast/models/transcript.py
 create mode 100644 cast/views/transcript.py
 create mode 100644 docs/transcript.rst

diff --git a/cast/models/transcript.py b/cast/models/transcript.py
new file mode 100644
index 00000000..88fd5bed
--- /dev/null
+++ b/cast/models/transcript.py
@@ -0,0 +1,32 @@
+from django.db import models
+from wagtail.models import CollectionMember
+from wagtail.search import index
+
+from . import Audio
+
+
+class Transcript(CollectionMember, index.Indexed, models.Model):
+    audio = models.OneToOneField(Audio, on_delete=models.CASCADE, related_name="transcript")
+    podlove = models.FileField(
+        upload_to="cast_transcript/",
+        null=True,
+        blank=True,
+        verbose_name="Podlove Transcript",
+        help_text="The transcript format for the Podlove Web Player",
+    )
+    vtt = models.FileField(
+        upload_to="cast_transcript/",
+        null=True,
+        blank=True,
+        verbose_name="WebVTT Transcript",
+        help_text="The WebVTT format for feed / podcatchers",
+    )
+    dote = models.FileField(
+        upload_to="cast_transcript/",
+        null=True,
+        blank=True,
+        verbose_name="DOTe Transcript",
+        help_text="The DOTe json format for feed / podcatchers",
+    )
+
+    admin_form_fields: tuple[str, ...] = ("audio", "podlove", "vtt", "dote")
diff --git a/cast/views/transcript.py b/cast/views/transcript.py
new file mode 100644
index 00000000..26defc02
--- /dev/null
+++ b/cast/views/transcript.py
@@ -0,0 +1,269 @@
+import json
+from typing import Any
+
+from django.http import HttpRequest, HttpResponse, JsonResponse
+from django.shortcuts import get_object_or_404, redirect, render
+from django.urls import reverse
+from django.utils.translation import gettext_lazy as _
+from django.views.decorators.vary import vary_on_headers
+from wagtail.admin import messages
+from wagtail.admin.modal_workflow import render_modal_workflow
+from wagtail.search.backends import get_search_backends
+
+from ..appsettings import CHOOSER_PAGINATION, MENU_ITEM_PAGINATION
+from ..forms import NonEmptySearchForm, TranscriptForm
+from ..models import Transcript
+from . import AuthenticatedHttpRequest
+from .wagtail_pagination import paginate, pagination_template
+
+
+@vary_on_headers("X-Requested-With")
+def index(request: HttpRequest) -> HttpResponse:
+    transcripts = Transcript.objects.all()
+
+    # Search
+    query_string = None
+    if "q" in request.GET:
+        form = NonEmptySearchForm(request.GET, placeholder=_("Search transcript files"))
+        if form.is_valid():
+            query_string = form.cleaned_data["q"]
+            transcripts = transcripts.search(query_string)
+    else:
+        form = NonEmptySearchForm(placeholder=_("Search transcripts"))
+
+    # Pagination
+    paginator, transcripts = paginate(request, transcripts, per_page=MENU_ITEM_PAGINATION)
+
+    # Create response
+    if request.headers.get("x-requested-with") == "XMLHttpRequest":
+        return render(
+            request,
+            "cast/transcript/results.html",
+            {
+                "transcripts": transcripts,
+                "query_string": query_string,
+                "is_searching": bool(query_string),
+            },
+        )
+    else:
+        return render(
+            request,
+            "cast/transcript/index.html",
+            {
+                "transcripts": transcripts,
+                "query_string": query_string,
+                "is_searching": bool(query_string),
+                "search_form": form,
+                "user_can_add": True,
+                "collections": None,
+                "current_collection": None,
+            },
+        )
+
+
+def add(request: AuthenticatedHttpRequest) -> HttpResponse:
+    if request.POST:
+        transcript = Transcript()
+        form = TranscriptForm(request.POST, request.FILES, instance=transcript)
+        if form.is_valid():
+            form.save()
+
+            # Reindex the media entry to make sure all tags are indexed
+            for backend in get_search_backends():
+                backend.add(transcript)
+
+            messages.success(
+                request,
+                _("Transcript file '{0}' added.").format(transcript.pk),
+                buttons=[messages.button(reverse("cast-transcript:edit", args=(transcript.id,)), _("Edit"))],
+            )
+            return redirect("cast-transcript:index")
+        else:
+            messages.error(request, _("The transcript file could not be saved due to errors."))
+    else:
+        transcript = Transcript()
+        form = TranscriptForm(instance=transcript)
+
+    return render(
+        request,
+        "cast/transcript/add.html",
+        {"form": form},
+    )
+
+
+def edit(request: HttpRequest, transcript_id: int) -> HttpResponse:
+    transcript = get_object_or_404(Transcript, id=transcript_id)
+
+    if request.method == "POST":
+        form = TranscriptForm(request.POST, request.FILES, instance=transcript)
+        if form.is_valid():
+            transcript = form.save()
+
+            # Reindex the media entry to make sure all tags are indexed
+            for backend in get_search_backends():
+                backend.add(transcript)
+
+            messages.success(
+                request,
+                _("Transcript file '{0}' updated").format(transcript.pk),
+                buttons=[messages.button(reverse("cast-transcript:edit", args=(transcript.id,)), _("Edit"))],
+            )
+            return redirect("cast-transcript:index")
+        else:
+            messages.error(request, _("The transcript could not be saved due to errors."))
+    else:
+        form = TranscriptForm(instance=transcript)
+
+    return render(
+        request,
+        "cast/transcript/edit.html",
+        {
+            "transcript": transcript,
+            "form": form,
+            "user_can_delete": True,
+        },
+    )
+
+
+def delete(request: HttpRequest, transcript_id: int) -> HttpResponse:
+    transcript = get_object_or_404(Transcript, id=transcript_id)
+
+    if request.POST:
+        transcript.delete()
+        messages.success(request, _("Transcript '{0}' deleted.").format(transcript.pk))
+        return redirect("cast-transcript:index")
+
+    return render(request, "cast/transcript/confirm_delete.html", {"transcript": transcript})
+
+
+def chooser(request: HttpRequest) -> HttpResponse:
+    transcripts = Transcript.objects.all()
+
+    upload_form = TranscriptForm(prefix="media-chooser-upload")
+
+    if "q" in request.GET or "p" in request.GET:
+        search_form = NonEmptySearchForm(request.GET)
+        if search_form.is_valid():
+            q = search_form.cleaned_data["q"]
+
+            transcripts = transcripts.search(q)
+            is_searching = True
+        else:
+            q = None
+            is_searching = False
+
+        paginator, transcripts = paginate(request, transcripts, per_page=CHOOSER_PAGINATION)
+        return render(
+            request,
+            "cast/transcript/chooser_results.html",
+            {
+                "transcripts": transcripts,
+                "query_string": q,
+                "is_searching": is_searching,
+                "pagination_template": pagination_template,
+            },
+        )
+    else:
+        search_form = NonEmptySearchForm()
+        paginator, transcripts = paginate(request, transcripts, per_page=CHOOSER_PAGINATION)
+
+    return render_modal_workflow(
+        request,
+        "cast/transcript/chooser_chooser.html",
+        None,
+        {
+            "transcripts": transcripts,
+            "uploadform": upload_form,
+            "searchform": search_form,
+            "is_searching": False,
+            "pagination_template": pagination_template,
+        },
+        json_data={
+            "step": "chooser",
+            "error_label": "Server Error",
+            "error_message": "Report this error to your webmaster with the following information:",
+            "tag_autocomplete_url": reverse("wagtailadmin_tag_autocomplete"),
+        },
+    )
+
+
+def get_transcript_data(transcript: Transcript) -> dict[str, Any]:
+    """
+    helper function: given a transcript, return the json to pass back to the
+    chooser panel - move to model FIXME
+    """
+    return {
+        "id": transcript.id,
+        "edit_link": reverse("cast-transcript:edit", args=(transcript.id,)),
+    }
+
+
+def chosen(request, transcript_id: int) -> HttpResponse:
+    transcript = get_object_or_404(Transcript, id=transcript_id)
+
+    return render_modal_workflow(
+        request,
+        None,
+        None,
+        None,
+        json_data={"step": "transcript_chosen", "result": get_transcript_data(transcript)},
+    )
+
+
+def chooser_upload(request: AuthenticatedHttpRequest) -> HttpResponse:
+    if request.method == "POST":
+        transcript = Transcript()
+        form = TranscriptForm(request.POST, request.FILES, instance=transcript, prefix="media-chooser-upload")
+
+        if form.is_valid():
+            form.save()
+
+            # Reindex the media entry to make sure all tags are indexed
+            for backend in get_search_backends():
+                backend.add(transcript)
+
+            return render_modal_workflow(
+                request,
+                None,
+                None,
+                None,
+                json_data={"step": "transcript_chosen", "result": get_transcript_data(transcript)},
+            )
+        else:
+            messages.error(request, _("The transcript could not be saved due to errors."))
+
+    transcripts = Transcript.objects.all()
+
+    search_form = NonEmptySearchForm()
+
+    paginator, transcripts = paginate(request, transcripts, per_page=CHOOSER_PAGINATION)
+
+    context = {
+        "transcripts": transcripts,
+        "searchform": search_form,
+        # "collections": collections,
+        "uploadform": TranscriptForm(),
+        "is_searching": False,
+        "pagination_template": "wagtailadmin/shared/ajax_pagination_nav.html",
+    }
+    return render_modal_workflow(
+        request,
+        "cast/transcript/chooser_chooser.html",
+        None,
+        context,
+        json_data={"step": "chooser"},
+    )
+
+
+def podlove_transcript_json(_request: HttpRequest, pk):
+    """Return the podlove transcript content as JSON because of CORS restrictions."""
+    transcript = get_object_or_404(Transcript, pk=pk)
+    if transcript.podlove:
+        # Open the file and load its contents as JSON
+        with transcript.podlove.open("r") as file:
+            try:
+                data = json.load(file)  # assumes the file content is JSON
+            except json.JSONDecodeError:
+                return HttpResponse("Invalid JSON format in podlove file", status=400)
+        return JsonResponse(data)
+    return HttpResponse("Podlove file not available", status=404)
diff --git a/docs/transcript.rst b/docs/transcript.rst
new file mode 100644
index 00000000..ba68ac51
--- /dev/null
+++ b/docs/transcript.rst
@@ -0,0 +1,22 @@
+.. _transcript_overview:
+
+***********
+Transcripts
+***********
+
+You can upload transcript files to the server and display them
+alongside the audio player. They will be also included in the feed
+and can be used by podcast clients to display the transcript while
+listening to the episode.
+
+Transcript Models
+=================
+
+Transcript files are represented by the `Transcript` model. Transcripts have an
+audio file they belong to, a `podlove` field that contains the transcript in
+the form that the `Podlove Web Player <https://podlove.org/podlove-web-player/>`_
+can use. And two other file formats that are used for to be referenced in the
+feed:
+
+* `vtt` - WebVTT, a subtitle format in plain text
+* `dote` - DOTE, a json transcript format