Skip to content

Commit

Permalink
Merge pull request #1127 from alphagov/faq-iteration
Browse files Browse the repository at this point in the history
Generate FAQ schema from page body
  • Loading branch information
sihugh authored Sep 19, 2019
2 parents 03ec9ce + a3882d8 commit c7e6dd0
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 49 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
useful summary for people upgrading their application, not a replication
of the commit log.

## Unreleased

* Iterate FAQ schema to split content around h2 headings. [PR #1127](https://github.com/alphagov/govuk_publishing_components/pull/1127)

## 20.5.2

* Update to [govuk-frontend to 3.2.0](https://github.com/alphagov/govuk-frontend/releases/tag/v3.2.0) ([PR #1113](https://github.com/alphagov/govuk_publishing_components/pull/1113))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
image_url("govuk_publishing_components/govuk-schema-placeholder-4x3.png"),
image_url("govuk_publishing_components/govuk-schema-placeholder-16x9.png"),
] %>
<% local_assigns[:request_path] = request.path %>
<% page = GovukPublishingComponents::Presenters::Page.new(local_assigns) %>
<% structured_data = GovukPublishingComponents::Presenters::SchemaOrg.new(page).structured_data %>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ examples:
- slug: treating-injuries
body: Get to know a good blacksmith
title: Treating injuries
body: >
<p>Training scaly beasts can be tricky</p>
<h2 id='preparation'>Preparation</h2>
<p>Prepare by meditating and writing a will</p>
<h2 id='afterwards'>Afterwards<h2>
<p>Your next of kin may need a strong stomach</p>
schema: :faq
canonical_url: https://www.gov.uk/how-to-train-your-dragon
person_schema:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,82 @@ def structured_data

def main_entity
{
"mainEntity" => questions_and_answers
"mainEntity" => questions_and_answers_markup
}
end

def questions_and_answers
page.parts.each_with_index.map do |part, index|
part_url = part_url(part, index)
def questions_and_answers_markup
question_and_answers(page.body).map do |question, value|
q_and_a_url = section_url(value[:anchor])

{
"@type" => "Question",
"name" => part['title'],
"url" => part_url,
"name" => question,
"url" => q_and_a_url,
"acceptedAnswer" => {
"@type" => "Answer",
"url" => part_url,
"text" => part['body']
"url" => q_and_a_url,
"text" => value[:answer]
}
}
end
end

def part_url(part, index)
if index.zero?
guide_url
else
guide_url + "/" + part["slug"]
# Generates a hash of questions and associated information:
# - question: the text in the h2 tag preceding other markup. Questions are
# used to key the hash. "Summary" is set as the default, as
# there is often a preamble in guides before any h2 is set.
#
# - :answer: the markup that is not an h2 tag. It is associated with the
# preceding h2 header.
#
# - :anchor: the id of the h2 (autogenerated by the markdown converter).
# This is used to build links directly to the section in question
def question_and_answers(html)
doc = Nokogiri::HTML(html)

question = "Summary"

# rubocop:disable Style/IfInsideElse
doc.xpath("html/body").children.each_with_object({}) do |element, q_and_as|
if question_element?(element)
question = element.text
q_and_as[question] = { anchor: element["id"] }
else
if question_hash_is_unset?(q_and_as, question)
q_and_as[question] = { answer: element.to_s }
elsif answer_is_unset?(q_and_as, question)
q_and_as[question][:answer] = element.to_s
else
q_and_as[question][:answer] << element.to_s
end
end
end
# rubocop:enable Style/IfInsideElse
end

def question_hash_is_unset?(q_and_as, question)
q_and_as[question].nil?
end

def answer_is_unset?(q_and_as, question)
!q_and_as[question].has_key?(:answer)
end

# we use H2 tags as the "question" and the html between them as the "answer"
QUESTION_TAG = "h2".freeze

def question_element?(element)
element.name == QUESTION_TAG
end

def section_url(anchor)
return page_url + "#" + anchor if anchor.present?

page_url
end

def guide_url
def page_url
Plek.new.website_root + page.base_path
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ def logo_url
local_assigns[:logo_url]
end

def parts
content_item.dig("details", "parts") || []
def requested_path
local_assigns[:request_path]
end
end
end
Expand Down
104 changes: 71 additions & 33 deletions spec/lib/govuk_publishing_components/presenters/schema_org_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,44 +62,82 @@
expect(structured_data['@type']).to eql("NewsArticle")
end

it "generates schema.org FAQPages" do
content_item = GovukSchemas::RandomExample.for_schema(
frontend_schema: "guide",
canonical_url: "http://www.dev.gov.uk/how-to-train-your-dragon/insurance"
) do |random_item|
random_item.merge(
"base_path" => "/how-to-train-your-dragon",
"details" => {
"parts" => [
{
"title" => "Overview",
"slug" => "overview",
"body" => "<p>First catch your dragon</p>"
},
{
"title" => "Insurance",
"slug" => "insurance",
"body" => "<p>Contact the Berk insurance bureau for more details.</p>"
}
]
}
)
context "schema.org FAQPages" do
it "parses html into questions and answers" do
part_body = "<h2 id='step-one'>Step one</h2>
<p>First catch your dragon</p>
<h2 id='step-two'>Step two</h2>
<p>Give it a treat</p>
<h2 id='step-three'>Step three</h2>
<p>Give it a pat (wear gloves)</p>"

content_item = dragon_guide

structured_data = generate_structured_data(
content_item: content_item,
schema: :faq,
body: part_body
).structured_data

expect(structured_data['@type']).to eql("FAQPage")

q_and_a = structured_data['mainEntity']
expect(q_and_a.count).to eq(3)

expect(q_and_a.first["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon#step-one")
expect(q_and_a.second["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon#step-two")

expect(q_and_a.first["name"]).to eq("Step one")
expect(q_and_a.first["acceptedAnswer"]["text"].strip).to eq("<p>First catch your dragon</p>")

expect(q_and_a.second["name"]).to eq("Step two")
expect(q_and_a.second["acceptedAnswer"]["text"].strip).to eq("<p>Give it a treat</p>")
end

structured_data = generate_structured_data(
content_item: content_item,
schema: :faq,
).structured_data
it "handles missing h2s at the start of the body" do
part_body = "<p>First catch your dragon</p>
<h2 id='step-two'>Step two</h2>
<p>Give it a treat</p>
<h2 id='step-three'>Step three</h2>
<p>Give it a pat (wear gloves)</p>"

content_item = dragon_guide

expect(structured_data['@type']).to eql("FAQPage")

q_and_a_pairs = structured_data['mainEntity']
expect(q_and_a_pairs.count).to eq(2)
expect(q_and_a_pairs.first["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon")
expect(q_and_a_pairs.second["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon/insurance")
q_and_a = generate_structured_data(
content_item: content_item,
schema: :faq,
body: part_body
).structured_data['mainEntity']

expect(q_and_a_pairs.first["name"]).to eq("Overview")
expect(q_and_a_pairs.first["acceptedAnswer"]["text"]).to eq("<p>First catch your dragon</p>")
expect(q_and_a.first["name"]).to eq("Summary")
expect(q_and_a.first["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon")
expect(q_and_a.first["acceptedAnswer"]["text"].strip).to eq("<p>First catch your dragon</p>")
end

it "handles an empty body to ensure that preview works OK" do
empty_part_body = ""
content_item = dragon_guide

q_and_a = generate_structured_data(
content_item: content_item,
schema: :faq,
body: empty_part_body
).structured_data['mainEntity']

expect(q_and_a).to eq([])
end

def dragon_guide
GovukSchemas::RandomExample.for_schema(
frontend_schema: "guide",
canonical_url: "http://www.dev.gov.uk/how-to-train-your-dragon/insurance"
) do |random_item|
random_item.merge(
"base_path" => "/how-to-train-your-dragon"
)
end
end
end

it "generates schema.org Person" do
Expand Down

0 comments on commit c7e6dd0

Please sign in to comment.