Skip to content

Commit

Permalink
let the user select speech to text for media objects
Browse files Browse the repository at this point in the history
  • Loading branch information
peetucket committed Oct 11, 2024
1 parent 39e7961 commit de05e0f
Show file tree
Hide file tree
Showing 22 changed files with 161 additions and 44 deletions.
1 change: 1 addition & 0 deletions app/controllers/batch_contexts_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def batch_contexts_params
.permit(:project_name, :content_structure, :staging_style_symlink,
:processing_configuration, :staging_location, :all_files_public,
:run_ocr, :manually_corrected_ocr, :ocr_available,
:run_stt, :manually_corrected_stt, :stt_available,
:using_file_manifest, job_runs_attributes: [:job_type], ocr_languages: [])
.merge(user: current_user)
end
Expand Down
27 changes: 24 additions & 3 deletions app/javascript/controllers/caption_controller.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { Controller } from '@hotwired/stimulus'

export default class extends Controller {
static targets = ['contentStructure', 'ocrSettings', 'ocrAvailable', 'sttSettings',
'manuallyCorrectedOcr', 'runOcr', 'ocrLanguages', 'ocrDropdown', 'runOcrDocumentNotes',
'runOcrImageNotes', 'selectedLanguages', 'languageWarning', 'dropdownContent', 'ocrLanguageWrapper']
static targets = ['contentStructure', 'ocrSettings', 'ocrAvailable', 'sttSettings', 'sttAvailable', 'runStt',
'manuallyCorrectedOcr', 'manuallyCorrectedStt', 'runOcr', 'ocrLanguages', 'ocrDropdown', 'runOcrDocumentNotes',
'runOcrImageNotes', 'runSttNotes', 'selectedLanguages', 'languageWarning', 'dropdownContent', 'ocrLanguageWrapper']

static values = { languages: Array }

Expand Down Expand Up @@ -54,6 +54,10 @@ export default class extends Controller {
return 'Do the PDF documents comply with accessibility standards? More info: <a target=_blank href="https://uit.stanford.edu/accessibility/guides/pdf">PDF Accessibility</a>.'
}

labelMediaManuallyCorrected () {
return 'Have the caption/transcript files been corrected to comply with accessibility standards? More info: <a target=_blank href="https://www.w3.org/WAI/media/av/captions/">Captions</a> and <a target=_blank href="https://www.w3.org/WAI/media/av/transcripts/">Transcripts</a>.'
}

labelRunOcr () {
return `Do you want to auto-generate OCR files for the ${this.ocrFileTypeLabel()}?`
}
Expand All @@ -65,6 +69,7 @@ export default class extends Controller {
contentStructureChanged () {
// Hide the OCR and speech to text settings by default; we will show them if the content structure allows them to
this.ocrSettingsTarget.hidden = true
this.sttSettingsTarget.hidden = true

if (this.ocrAvailable()) {
this.showOcrControls()
Expand Down Expand Up @@ -102,6 +107,22 @@ export default class extends Controller {
// Show the Speech to text settings and controls
showSttControls () {
this.sttSettingsTarget.hidden = false

this.manuallyCorrectedSttTarget.querySelector('legend').innerHTML = this.labelMediaManuallyCorrected()
}

// if the user indicates they have speech to text available, show/hide the manually corrected and run stt options (for media)
sttAvailableChanged () {
const sttAvailable = this.sttAvailableTarget.querySelector('input[type="radio"]:checked').value === 'true'
this.manuallyCorrectedSttTarget.hidden = !sttAvailable
this.runSttTarget.hidden = sttAvailable
this.runSttChanged()
}

// if the user indicates they want to run SDR speech to text, show any relevant notes/warnings
runSttChanged () {
const runstt = this.runSttTarget.querySelector('input[type="radio"]:checked').value === 'true'
this.runSttNotesTarget.hidden = !runstt
}

// if the user indicates they have ocr available, show/hide the manually corrected and run OCR option (for images/books)
Expand Down
1 change: 1 addition & 0 deletions app/lib/pre_assembly/batch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class Batch
:project_name,
:staging_style_symlink,
:ocr_available,
:stt_available,
to: :batch_context

def initialize(job_run, file_manifest: nil)
Expand Down
9 changes: 6 additions & 3 deletions app/lib/pre_assembly/digital_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class DigitalObject
:processing_configuration,
:content_structure,
:ocr_available,
:stt_available,
:project_name,
:file_manifest,
to: :batch
Expand Down Expand Up @@ -121,6 +122,7 @@ def build_structural
build_from_staging_location(objects: object_files.sort,
processing_configuration:,
ocr_available:,
stt_available:,
reading_order:)
end
end
Expand Down Expand Up @@ -161,13 +163,14 @@ def update_structural_metadata
object_client.update(params: updated_cocina)
end

def build_from_staging_location(objects:, processing_configuration:, reading_order:, ocr_available:)
filesets = FromStagingLocation::FileSetBuilder.build(processing_configuration:, ocr_available:, objects:, style: content_md_creation_style)
def build_from_staging_location(objects:, processing_configuration:, reading_order:, ocr_available:, stt_available:)
filesets = FromStagingLocation::FileSetBuilder.build(processing_configuration:, ocr_available:, stt_available:, objects:, style: content_md_creation_style)
FromStagingLocation::StructuralBuilder.build(cocina_dro: existing_cocina_object,
filesets:,
all_files_public: batch.batch_context.all_files_public?,
reading_order:,
manually_corrected_ocr: batch.batch_context.manually_corrected_ocr)
manually_corrected_ocr: batch.batch_context.manually_corrected_ocr,
manually_corrected_stt: batch.batch_context.manually_corrected_stt)
end

# The reading order for books is determined by what the user set when registering the object.
Expand Down
12 changes: 10 additions & 2 deletions app/lib/pre_assembly/from_staging_location/file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,17 @@ class File
'application/xml' => { preserve: 'yes', shelve: 'yes', publish: 'yes', role: 'transcription', corrected_for_accessibility: true }
}.freeze

# TODO: which files represent the speech to text files when provided and thus need to the transcription role?
# if the user tells us they are providing speech to text, we will set the transcription role and possibly also corrected_for_accessibility
ATTRIBUTES_FOR_TYPE_WITH_STT = {
'application/xml' => { preserve: 'yes', shelve: 'yes', publish: 'yes', role: 'transcription', corrected_for_accessibility: true }
}.freeze

# @param [Assembly::ObjectFile] file
def initialize(file:, ocr_available:)
def initialize(file:, ocr_available:, stt_available:)
@file = file
@ocr_available = ocr_available
@stt_available = stt_available
end

delegate :sha1, :md5, :provider_md5, :mimetype, :filesize, :relative_path, to: :file
Expand All @@ -51,10 +58,11 @@ def file_attributes

private

attr_reader :file, :ocr_available
attr_reader :file, :ocr_available, :stt_available

def file_attributes_for_mimetype(mimetype)
return ATTRIBUTES_FOR_TYPE_WITH_OCR[mimetype] if ocr_available && ATTRIBUTES_FOR_TYPE_WITH_OCR.key?(mimetype)
return ATTRIBUTES_FOR_TYPE_WITH_STT[mimetype] if stt_available && ATTRIBUTES_FOR_TYPE_WITH_STT.key?(mimetype)

ATTRIBUTES_FOR_TYPE[mimetype] || ATTRIBUTES_FOR_TYPE['default']
end
Expand Down
8 changes: 5 additions & 3 deletions app/lib/pre_assembly/from_staging_location/file_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ class FileSet
# @param [Array<Assembly::ObjectFile>] resource_files
# @param [Symbol] style one of: :simple_image, :file, :simple_book, :book_as_image, :book_with_pdf, :map, :geo, or :'3d'
# @param [Boolean] ocr_available
def initialize(resource_files:, style:, ocr_available:)
# @param [Boolean] stt_available
def initialize(resource_files:, style:, ocr_available:, stt_available:)
@resource_files = resource_files
@style = style
@ocr_available = ocr_available
@stt_available = stt_available
end

# otherwise look at the style to determine the resource_type_description
Expand All @@ -25,12 +27,12 @@ def label_from_file(default:)
end

def files
resource_files.map { |file| File.new(file:, ocr_available:) }
resource_files.map { |file| File.new(file:, ocr_available:, stt_available:) }
end

private

attr_reader :resource_files, :style, :ocr_available
attr_reader :resource_files, :style, :ocr_available, :stt_available

# rubocop:disable Metrics/AbcSize
# rubocop:disable Metrics/CyclomaticComplexity
Expand Down
14 changes: 8 additions & 6 deletions app/lib/pre_assembly/from_staging_location/file_set_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@ class FileSetBuilder
# @param [Symbol] processing_configuration one of: :default or :filename
# @param [Array<Assembly::ObjectFile>] objects
# @param [Symbol] style one of: :simple_image, :file, :simple_book, :book_as_image, :book_with_pdf, :map, :geo, or :'3d'
def self.build(processing_configuration:, objects:, style:, ocr_available:)
new(processing_configuration:, objects:, style:, ocr_available:).build
def self.build(processing_configuration:, objects:, style:, ocr_available:, stt_available:)
new(processing_configuration:, objects:, style:, ocr_available:, stt_available:).build
end

def initialize(processing_configuration:, objects:, style:, ocr_available:)
def initialize(processing_configuration:, objects:, style:, ocr_available:, stt_available:)
@processing_configuration = processing_configuration.to_sym
@objects = objects
@style = style
@ocr_available = ocr_available
@stt_available = stt_available
end

# @return [Array<FileSet>] a list of filesets in the object
Expand All @@ -24,7 +25,7 @@ def build

case processing_configuration
when :default # one resource per object
objects.collect { |obj| FileSet.new(resource_files: [obj], style:, ocr_available:) }
objects.collect { |obj| FileSet.new(resource_files: [obj], style:, ocr_available:, stt_available:) }
when :filename, :filename_with_ocr # one resource per distinct filename (excluding extension)
build_for_filename
else
Expand All @@ -34,7 +35,7 @@ def build

private

attr_reader :processing_configuration, :objects, :style
attr_reader :processing_configuration, :objects, :style, :stt_available

# until the new OCR settings are available, we have to look in the processing configuration
def ocr_available
Expand All @@ -52,7 +53,8 @@ def build_for_filename
distinct_filenames.map do |distinct_filename|
FileSet.new(resource_files: objects.collect { |obj| obj if obj.filename_without_ext == distinct_filename }.compact,
style:,
ocr_available:)
ocr_available:,
stt_available:)
end
end
end
Expand Down
15 changes: 11 additions & 4 deletions app/lib/pre_assembly/from_staging_location/structural_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,32 @@ class StructuralBuilder
# @param [String] reading_order
# @param [Boolean] all_files_public
# @param [Boolean] manually_corrected_ocr set by user when creating the job
def self.build(filesets:, cocina_dro:, all_files_public:, reading_order: nil, manually_corrected_ocr: false, ocr_available: false)
# @param [Boolean] manually_corrected_stt set by user when creating the job
# @param [Boolean] ocr_available set by user when creating the job
# @param [Boolean] stt_available set by user when creating the job
def self.build(filesets:, cocina_dro:, all_files_public:, reading_order: nil, manually_corrected_ocr: false, ocr_available: false, manually_corrected_stt: false, stt_available: false)
new(filesets:,
cocina_dro:,
reading_order:,
all_files_public:,
manually_corrected_ocr:,
ocr_available:).build
ocr_available:,
stt_available:,
manually_corrected_stt:).build
end

def initialize(filesets:, cocina_dro:, all_files_public:, reading_order:, manually_corrected_ocr:, ocr_available:)
def initialize(filesets:, cocina_dro:, all_files_public:, reading_order:, manually_corrected_ocr:, ocr_available:, manually_corrected_stt:, stt_available:)
@filesets = filesets
@cocina_dro = cocina_dro
@reading_order = reading_order
@all_files_public = all_files_public
@manually_corrected_ocr = manually_corrected_ocr
@ocr_available = ocr_available
@manually_corrected_stt = manually_corrected_stt
@stt_available = stt_available
end
# rubocop:enable Metrics/ParameterLists
attr_reader :filesets, :cocina_dro, :reading_order, :all_files_public, :manually_corrected_ocr, :ocr_available
attr_reader :filesets, :cocina_dro, :reading_order, :all_files_public, :manually_corrected_ocr, :ocr_available, :manually_corrected_stt, :stt_available

# rubocop:disable Metrics/AbcSize
# rubocop:disable Metrics/MethodLength
Expand Down
2 changes: 1 addition & 1 deletion app/models/batch_context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def verify_output_dir_no_exists
def verify_file_manifest_selected_for_media
return unless content_structure == 'media' && !using_file_manifest

errors.add(:content_structure, 'requires a file manifest. Please select the checkbox and ensure a file manifest is present.')
errors.add(:content_structure, 'requires a file manifest. Please indicate you have a file manifest and ensure a file manifest is present.')
end

def verify_staging_location
Expand Down
8 changes: 6 additions & 2 deletions app/services/start_accession.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,21 @@
# Starts the accession workflow by calling dor-services-app
# See https://sul-dlss.github.io/dor-services-app/#operation/objects#accession
class StartAccession
# rubocop:disable Metrics/AbcSize
def self.run(druid:, batch_context:, workflow: nil)
object_client = Dor::Services::Client.object(druid)

workflow_context = {}
workflow_context[:runOCR] = batch_context.run_ocr
workflow_context[:manuallyCorrectedOCR] = batch_context.manually_corrected_ocr
workflow_context[:runOCR] = batch_context.run_ocr if batch_context.run_ocr
workflow_context[:manuallyCorrectedOCR] = batch_context.manually_corrected_ocr if batch_context.manually_corrected_ocr
workflow_context[:runSpeechToText] = batch_context.run_stt if batch_context.run_stt
workflow_context[:manuallyCorrectedStt] = batch_context.manually_corrected_stt if batch_context.manually_corrected_stt
workflow_context[:ocrLanguages] = batch_context.ocr_languages unless batch_context.ocr_languages.empty?

params = { description: 'pre-assembly re-accession', opening_user_name: batch_context.user.sunet_id, workflow: }
params[:context] = workflow_context if workflow_context.present?

object_client.accession.start(params)
end
# rubocop:enable Metrics/AbcSize
end
15 changes: 10 additions & 5 deletions app/views/batch_contexts/_new_bc_form.erb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
collection: [['Yes', true], ['No', false]] %>
</div>
<div data-caption-target="runOcr" hidden>
<%= form.input :run_ocr, input_html: { data: { action: "change->caption#runOcrChanged", "ocr-target": "runOcrOptions" } },
<%= form.input :run_ocr, input_html: { data: { action: "change->caption#runOcrChanged", "caption-target": "runOcrOptions" } },
as: :radio_buttons,
collection: [['Yes', true], ['No', false]] %>
<div data-caption-target="runOcrImageNotes" hidden>
Expand Down Expand Up @@ -82,19 +82,24 @@
</div>
</div>

<% unless Settings.ocr.enabled # if OCR is not enabled, keep the processing configuration menu %>
<%= form.input :processing_configuration, collection: processing_configuration %>
<% end %>
<div data-caption-target="sttSettings" hidden>
<div data-caption-target="sttAvailable" hidden>
<div data-caption-target="sttAvailable">
<%= form.input :stt_available, label: 'Do you have caption/transcript files for the media?', input_html: { data: { action: "change->caption#sttAvailableChanged" } },
as: :radio_buttons,
collection: [['Yes', true], ['No', false]] %>
</div>
<div data-caption-target="manuallyCorrectedStt" hidden>
<%= form.input :manually_corrected_stt, input_html: { data: { action: "change->caption#manuallyCorrectedSttChanged" } },
<%= form.input :manually_corrected_stt,
as: :radio_buttons,
collection: [['Yes', true], ['No', false]] %>
</div>
<div data-caption-target="runStt" hidden>
<%= form.input :run_stt, input_html: { data: { action: "change->caption#runSttChanged", "caption-target": "runSttOptions" } },
<div data-caption-target="runStt">
<%= form.input :run_stt, label: 'Would you like to auto-generate caption/transcript files for the media?',
input_html: { data: { action: "change->caption#runSttChanged" } },
as: :radio_buttons,
collection: [['Yes', true], ['No', false]] %>
<div data-caption-target="runSttNotes" hidden>
Expand Down
7 changes: 7 additions & 0 deletions db/migrate/20241011190309_stt.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class Stt < ActiveRecord::Migration[7.1]
def change
add_column :batch_contexts, :run_stt, :boolean, default: false
add_column :batch_contexts, :manually_corrected_stt, :boolean, default: false
add_column :batch_contexts, :stt_available, :boolean, default: false
end
end
5 changes: 4 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[7.1].define(version: 2024_05_29_215747) do
ActiveRecord::Schema[7.1].define(version: 2024_10_11_190309) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"

Expand Down Expand Up @@ -40,6 +40,9 @@
t.boolean "manually_corrected_ocr", default: false
t.jsonb "ocr_languages", default: []
t.boolean "ocr_available", default: false
t.boolean "run_stt", default: false
t.boolean "manually_corrected_stt", default: false
t.boolean "stt_available", default: false
t.index ["user_id", "project_name"], name: "index_batch_contexts_on_user_id_and_project_name", unique: true
t.index ["user_id"], name: "index_batch_contexts_on_user_id"
end
Expand Down
3 changes: 2 additions & 1 deletion spec/features/preassembly_run/dark_file_object_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@
filesets: Array,
all_files_public: false,
reading_order: nil,
manually_corrected_ocr: false)
manually_corrected_ocr: false,
manually_corrected_stt: false)
expect(dsc_object).to have_received(:update).with(params: item)
expect(StartAccession).to have_received(:run).with(druid: "druid:#{bare_druid}", batch_context: BatchContext.last, workflow: 'assemblyWF')
end
Expand Down
3 changes: 2 additions & 1 deletion spec/features/preassembly_run/document_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@
filesets: Array,
all_files_public: false,
reading_order: nil,
manually_corrected_ocr: false)
manually_corrected_ocr: false,
manually_corrected_stt: false)
expect(dsc_object).to have_received(:update).with(params: item)
expect(StartAccession).to have_received(:run).with(druid: "druid:#{bare_druid}", batch_context: BatchContext.last, workflow: 'assemblyWF')
end
Expand Down
3 changes: 2 additions & 1 deletion spec/features/preassembly_run/geo_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@
filesets: [], # a blank array for any geo object
all_files_public: false,
reading_order: nil,
manually_corrected_ocr: false)
manually_corrected_ocr: false,
manually_corrected_stt: false)
expect(dsc_object).to have_received(:update).with(params: item)
expect(StartAccession).to have_received(:run).with(druid: "druid:#{bare_druid}", batch_context: BatchContext.last, workflow: 'gisAssemblyWF')
end
Expand Down
3 changes: 2 additions & 1 deletion spec/features/preassembly_run/hierarchical_files_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@
filesets: Array,
all_files_public: false,
reading_order: nil,
manually_corrected_ocr: false)
manually_corrected_ocr: false,
manually_corrected_stt: false)
expect(dsc_object).to have_received(:update).with(params: item)
expect(StartAccession).to have_received(:run).with(druid: "druid:#{bare_druid}", batch_context: BatchContext.last, workflow: 'assemblyWF')
end
Expand Down
Loading

0 comments on commit de05e0f

Please sign in to comment.