Skip to content

Commit

Permalink
Generate FAQ schema from page body
Browse files Browse the repository at this point in the history
At present, we're presenting a whole part of a guide as an FAQ question/answer.
This limits the impact of the content design we apply, because it doesn't
break up the content in rich results in the same way as we do on GOV.UK.

The code now parses the govspeak generated HTML and uses the text in h2
headings as the "question", the id of the h2 to generate URLs with anchors,
and the other markup as the "answer".

This enables us to display structured markup on more pages (such as mainstream
answers), and also on every part in a guide.

We will still avoid using this markup on travel advice pages because of the
latency in results being updated.

It now requires the body to be passed in directly to the machine readable
component rather than the component having to know too much about parts.
  • Loading branch information
sihugh committed Sep 19, 2019
1 parent 03ec9ce commit a8d7a65
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 49 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
useful summary for people upgrading their application, not a replication
of the commit log.

## Unreleased

* Iterate FAQ schema to split content around h2 headings. [PR #1127](https://github.com/alphagov/govuk_publishing_components/pull/1127)

## 20.5.2

* Update to [govuk-frontend to 3.2.0](https://github.com/alphagov/govuk-frontend/releases/tag/v3.2.0) ([PR #1113](https://github.com/alphagov/govuk_publishing_components/pull/1113))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
image_url("govuk_publishing_components/govuk-schema-placeholder-4x3.png"),
image_url("govuk_publishing_components/govuk-schema-placeholder-16x9.png"),
] %>
<% local_assigns[:request_path] = request.path %>
<% page = GovukPublishingComponents::Presenters::Page.new(local_assigns) %>
<% structured_data = GovukPublishingComponents::Presenters::SchemaOrg.new(page).structured_data %>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ examples:
- slug: treating-injuries
body: Get to know a good blacksmith
title: Treating injuries
body: >
<p>Training scaly beasts can be tricky</p>
<h2 id='preparation'>Preparation</h2>
<p>Prepare by meditating and writing a will</p>
<h2 id='afterwards'>Afterwards<h2>
<p>Your next of kin may need a strong stomach</p>
schema: :faq
canonical_url: https://www.gov.uk/how-to-train-your-dragon
person_schema:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,72 @@ def structured_data

def main_entity
{
"mainEntity" => questions_and_answers
"mainEntity" => questions_and_answers_markup
}
end

def questions_and_answers
page.parts.each_with_index.map do |part, index|
part_url = part_url(part, index)
def questions_and_answers_markup
question_and_answers(page.body).map do |question, value|
q_and_a_url = section_url(value[:anchor])

{
"@type" => "Question",
"name" => part['title'],
"url" => part_url,
"name" => question,
"url" => q_and_a_url,
"acceptedAnswer" => {
"@type" => "Answer",
"url" => part_url,
"text" => part['body']
"url" => q_and_a_url,
"text" => value[:answer]
}
}
end
end

def part_url(part, index)
if index.zero?
guide_url
else
guide_url + "/" + part["slug"]

# Generates a hash of questions and associated information:
# - question: the text in the h2 tag preceding other markup. Questions are
# used to key the hash. "Summary" is set as the default, as
# there is often a preamble in guides before any h2 is set.
#
# - :answer: the markup that is not an h2 tag. It is associated with the
# preceding h2 header.
#
# - :anchor: the id of the h2 (autogenerated by the markdown converter).
# This is used to build links directly to the section in question
def question_and_answers(html)
doc = Nokogiri::HTML(html)

question = "Summary"

doc.xpath("html/body").children.each_with_object({}) do |element, q_and_as|
if question_element?(element)
question = element.text
q_and_as[question] = { anchor: element["id"] }
else
if q_and_as[question].nil?
q_and_as[question] = { answer: element.to_s }
elsif q_and_as[question][:answer].nil?
q_and_as[question][:answer] = element.to_s
else
q_and_as[question][:answer] << element.to_s
end
end
end
end

def guide_url
# we use H2 tags as the "question" and the html between them as the "answer"
QUESTION_TAG = "h2".freeze

def question_element?(element)
element.name == QUESTION_TAG
end

def section_url(anchor)
return page_url + "#" + anchor if anchor.present?
page_url
end

def page_url
Plek.new.website_root + page.base_path
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ def logo_url
local_assigns[:logo_url]
end

def parts
content_item.dig("details", "parts") || []
def requested_path
local_assigns[:request_path]
end
end
end
Expand Down
118 changes: 85 additions & 33 deletions spec/lib/govuk_publishing_components/presenters/schema_org_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,44 +62,96 @@
expect(structured_data['@type']).to eql("NewsArticle")
end

it "generates schema.org FAQPages" do
content_item = GovukSchemas::RandomExample.for_schema(
frontend_schema: "guide",
canonical_url: "http://www.dev.gov.uk/how-to-train-your-dragon/insurance"
) do |random_item|
random_item.merge(
"base_path" => "/how-to-train-your-dragon",
"details" => {
"parts" => [
{
"title" => "Overview",
"slug" => "overview",
"body" => "<p>First catch your dragon</p>"
},
{
"title" => "Insurance",
"slug" => "insurance",
"body" => "<p>Contact the Berk insurance bureau for more details.</p>"
}
]
}
)
context "schema.org FAQPages" do
it "parses html into questions and answers" do
part_body = "<h2 id='step-one'>Step one</h2>
<p>First catch your dragon</p>
<h2 id='step-two'>Step two</h2>
<p>Give it a treat</p>
<h2 id='step-three'>Step three</h2>
<p>Give it a pat (wear gloves)</p>"

content_item = guide_with(part_body)

structured_data = generate_structured_data(
content_item: content_item,
schema: :faq,
body: part_body
).structured_data

expect(structured_data['@type']).to eql("FAQPage")

q_and_a = structured_data['mainEntity']
expect(q_and_a.count).to eq(3)

expect(q_and_a.first["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon#step-one")
expect(q_and_a.second["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon#step-two")

expect(q_and_a.first["name"]).to eq("Step one")
expect(q_and_a.first["acceptedAnswer"]["text"].strip).to eq("<p>First catch your dragon</p>")

expect(q_and_a.second["name"]).to eq("Step two")
expect(q_and_a.second["acceptedAnswer"]["text"].strip).to eq("<p>Give it a treat</p>")
end

structured_data = generate_structured_data(
content_item: content_item,
schema: :faq,
).structured_data
it "handles missing h2s at the start of the body" do
part_body = "<p>First catch your dragon</p>
<h2 id='step-two'>Step two</h2>
<p>Give it a treat</p>
<h2 id='step-three'>Step three</h2>
<p>Give it a pat (wear gloves)</p>"

content_item = guide_with(part_body)


q_and_a = generate_structured_data(
content_item: content_item,
schema: :faq,
body: part_body
).structured_data['mainEntity']

expect(structured_data['@type']).to eql("FAQPage")
expect(q_and_a.first["name"]).to eq("Summary")
expect(q_and_a.first["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon")
expect(q_and_a.first["acceptedAnswer"]["text"].strip).to eq("<p>First catch your dragon</p>")
end

it "handles an empty body to ensure that preview works OK" do
empty_part_body = ""
content_item = guide_with(empty_part_body)

q_and_a_pairs = structured_data['mainEntity']
expect(q_and_a_pairs.count).to eq(2)
expect(q_and_a_pairs.first["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon")
expect(q_and_a_pairs.second["url"]).to eq("http://www.dev.gov.uk/how-to-train-your-dragon/insurance")
q_and_a = generate_structured_data(
content_item: content_item,
schema: :faq,
body: empty_part_body
).structured_data['mainEntity']

expect(q_and_a).to eq([])
end

expect(q_and_a_pairs.first["name"]).to eq("Overview")
expect(q_and_a_pairs.first["acceptedAnswer"]["text"]).to eq("<p>First catch your dragon</p>")
def guide_with(body)
content_item = GovukSchemas::RandomExample.for_schema(
frontend_schema: "guide",
canonical_url: "http://www.dev.gov.uk/how-to-train-your-dragon/insurance"
) do |random_item|
random_item.merge(
"base_path" => "/how-to-train-your-dragon",
"details" => {
"parts" => [
{
"title" => "Overview",
"slug" => "overview",
"body" => body
},
{
"title" => "Insurance",
"slug" => "insurance",
"body" => "<p>Contact the Berk insurance bureau for more details.</p>"
}
]
}
)
end
end
end

it "generates schema.org Person" do
Expand Down

0 comments on commit a8d7a65

Please sign in to comment.