diff --git a/CHANGELOG.md b/CHANGELOG.md index cb546ebbbf..d767a0340f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ useful summary for people upgrading their application, not a replication of the commit log. +## Unreleased + +* Iterate FAQ schema to split content around h2 headings. [PR #1127](https://github.com/alphagov/govuk_publishing_components/pull/1127) + ## 20.5.2 * Update to [govuk-frontend to 3.2.0](https://github.com/alphagov/govuk-frontend/releases/tag/v3.2.0) ([PR #1113](https://github.com/alphagov/govuk_publishing_components/pull/1113)) diff --git a/app/views/govuk_publishing_components/components/_machine_readable_metadata.html.erb b/app/views/govuk_publishing_components/components/_machine_readable_metadata.html.erb index d7eb0515dc..b55c29891f 100644 --- a/app/views/govuk_publishing_components/components/_machine_readable_metadata.html.erb +++ b/app/views/govuk_publishing_components/components/_machine_readable_metadata.html.erb @@ -4,6 +4,7 @@ image_url("govuk_publishing_components/govuk-schema-placeholder-4x3.png"), image_url("govuk_publishing_components/govuk-schema-placeholder-16x9.png"), ] %> +<% local_assigns[:request_path] = request.path %> <% page = GovukPublishingComponents::Presenters::Page.new(local_assigns) %> <% structured_data = GovukPublishingComponents::Presenters::SchemaOrg.new(page).structured_data %> diff --git a/app/views/govuk_publishing_components/components/docs/machine_readable_metadata.yml b/app/views/govuk_publishing_components/components/docs/machine_readable_metadata.yml index 52a99b49f1..56321912c7 100644 --- a/app/views/govuk_publishing_components/components/docs/machine_readable_metadata.yml +++ b/app/views/govuk_publishing_components/components/docs/machine_readable_metadata.yml @@ -54,6 +54,12 @@ examples: - slug: treating-injuries body: Get to know a good blacksmith title: Treating injuries + body: > +
Training scaly beasts can be tricky
+Prepare by meditating and writing a will
+Your next of kin may need a strong stomach
schema: :faq canonical_url: https://www.gov.uk/how-to-train-your-dragon person_schema: diff --git a/lib/govuk_publishing_components/presenters/machine_readable/faq_page_schema.rb b/lib/govuk_publishing_components/presenters/machine_readable/faq_page_schema.rb index 9cfee9674d..15cb7b3e8b 100644 --- a/lib/govuk_publishing_components/presenters/machine_readable/faq_page_schema.rb +++ b/lib/govuk_publishing_components/presenters/machine_readable/faq_page_schema.rb @@ -19,36 +19,72 @@ def structured_data def main_entity { - "mainEntity" => questions_and_answers + "mainEntity" => questions_and_answers_markup } end - def questions_and_answers - page.parts.each_with_index.map do |part, index| - part_url = part_url(part, index) + def questions_and_answers_markup + question_and_answers(page.body).map do |question, value| + q_and_a_url = section_url(value[:anchor]) { "@type" => "Question", - "name" => part['title'], - "url" => part_url, + "name" => question, + "url" => q_and_a_url, "acceptedAnswer" => { "@type" => "Answer", - "url" => part_url, - "text" => part['body'] + "url" => q_and_a_url, + "text" => value[:answer] } } end end - def part_url(part, index) - if index.zero? - guide_url - else - guide_url + "/" + part["slug"] + + # Generates a hash of questions and associated information: + # - question: the text in the h2 tag preceding other markup. Questions are + # used to key the hash. "Summary" is set as the default, as + # there is often a preamble in guides before any h2 is set. + # + # - :answer: the markup that is not an h2 tag. It is associated with the + # preceding h2 header. + # + # - :anchor: the id of the h2 (autogenerated by the markdown converter). + # This is used to build links directly to the section in question + def question_and_answers(html) + doc = Nokogiri::HTML(html) + + question = "Summary" + + doc.xpath("html/body").children.each_with_object({}) do |element, q_and_as| + if question_element?(element) + question = element.text + q_and_as[question] = { anchor: element["id"] } + else + if q_and_as[question].nil? + q_and_as[question] = { answer: element.to_s } + elsif q_and_as[question][:answer].nil? + q_and_as[question][:answer] = element.to_s + else + q_and_as[question][:answer] << element.to_s + end + end end end - def guide_url + # we use H2 tags as the "question" and the html between them as the "answer" + QUESTION_TAG = "h2".freeze + + def question_element?(element) + element.name == QUESTION_TAG + end + + def section_url(anchor) + return page_url + "#" + anchor if anchor.present? + page_url + end + + def page_url Plek.new.website_root + page.base_path end end diff --git a/lib/govuk_publishing_components/presenters/machine_readable/page.rb b/lib/govuk_publishing_components/presenters/machine_readable/page.rb index 943d2065a5..31635142b6 100644 --- a/lib/govuk_publishing_components/presenters/machine_readable/page.rb +++ b/lib/govuk_publishing_components/presenters/machine_readable/page.rb @@ -59,8 +59,8 @@ def logo_url local_assigns[:logo_url] end - def parts - content_item.dig("details", "parts") || [] + def requested_path + local_assigns[:request_path] end end end diff --git a/spec/lib/govuk_publishing_components/presenters/schema_org_spec.rb b/spec/lib/govuk_publishing_components/presenters/schema_org_spec.rb index 34cc506335..5350f9385a 100644 --- a/spec/lib/govuk_publishing_components/presenters/schema_org_spec.rb +++ b/spec/lib/govuk_publishing_components/presenters/schema_org_spec.rb @@ -62,44 +62,96 @@ expect(structured_data['@type']).to eql("NewsArticle") end - it "generates schema.org FAQPages" do - content_item = GovukSchemas::RandomExample.for_schema( - frontend_schema: "guide", - canonical_url: "http://www.dev.gov.uk/how-to-train-your-dragon/insurance" - ) do |random_item| - random_item.merge( - "base_path" => "/how-to-train-your-dragon", - "details" => { - "parts" => [ - { - "title" => "Overview", - "slug" => "overview", - "body" => "First catch your dragon
" - }, - { - "title" => "Insurance", - "slug" => "insurance", - "body" => "Contact the Berk insurance bureau for more details.
" - } - ] - } - ) + context "schema.org FAQPages" do + it "parses html into questions and answers" do + part_body = "First catch your dragon
+Give it a treat
+Give it a pat (wear gloves)
" + + content_item = guide_with(part_body) + + structured_data = generate_structured_data( + content_item: content_item, + schema: :faq, + body: part_body + ).structured_data + + expect(structured_data['@type']).to eql("FAQPage") + + q_and_a = structured_data['mainEntity'] + expect(q_and_a.count).to eq(3) + + expect(q_and_a.first["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon#step-one") + expect(q_and_a.second["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon#step-two") + + expect(q_and_a.first["name"]).to eq("Step one") + expect(q_and_a.first["acceptedAnswer"]["text"].strip).to eq("First catch your dragon
") + + expect(q_and_a.second["name"]).to eq("Step two") + expect(q_and_a.second["acceptedAnswer"]["text"].strip).to eq("Give it a treat
") end - structured_data = generate_structured_data( - content_item: content_item, - schema: :faq, - ).structured_data + it "handles missing h2s at the start of the body" do + part_body = "First catch your dragon
+Give it a treat
+Give it a pat (wear gloves)
" + + content_item = guide_with(part_body) + + + q_and_a = generate_structured_data( + content_item: content_item, + schema: :faq, + body: part_body + ).structured_data['mainEntity'] - expect(structured_data['@type']).to eql("FAQPage") + expect(q_and_a.first["name"]).to eq("Summary") + expect(q_and_a.first["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon") + expect(q_and_a.first["acceptedAnswer"]["text"].strip).to eq("First catch your dragon
") + end + + it "handles an empty body to ensure that preview works OK" do + empty_part_body = "" + content_item = guide_with(empty_part_body) - q_and_a_pairs = structured_data['mainEntity'] - expect(q_and_a_pairs.count).to eq(2) - expect(q_and_a_pairs.first["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon") - expect(q_and_a_pairs.second["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon/insurance") + q_and_a = generate_structured_data( + content_item: content_item, + schema: :faq, + body: empty_part_body + ).structured_data['mainEntity'] + + expect(q_and_a).to eq([]) + end - expect(q_and_a_pairs.first["name"]).to eq("Overview") - expect(q_and_a_pairs.first["acceptedAnswer"]["text"]).to eq("First catch your dragon
") + def guide_with(body) + content_item = GovukSchemas::RandomExample.for_schema( + frontend_schema: "guide", + canonical_url: "http://www.dev.gov.uk/how-to-train-your-dragon/insurance" + ) do |random_item| + random_item.merge( + "base_path" => "/how-to-train-your-dragon", + "details" => { + "parts" => [ + { + "title" => "Overview", + "slug" => "overview", + "body" => body + }, + { + "title" => "Insurance", + "slug" => "insurance", + "body" => "Contact the Berk insurance bureau for more details.
" + } + ] + } + ) + end + end end it "generates schema.org Person" do