-
-
Notifications
You must be signed in to change notification settings - Fork 114
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add HTML & XML Inspectors API using Nokogiri (#546)
* feat: add HTML Inspectors API using Nokogiri * test: fix lingering hooks * Update bridgetown-website/src/_docs/plugins/html-inspectors.md fix typo Co-authored-by: Andrew Mason <[email protected]> * Update bridgetown-website/src/_docs/plugins/html-inspectors.md Co-authored-by: Andrew Mason <[email protected]> * Refactor inspectors so XML is supported as well as HTML * Finishing updating documentation for Inspectors Co-authored-by: Andrew Mason <[email protected]>
- Loading branch information
1 parent
03ab947
commit d8cc141
Showing
13 changed files
with
392 additions
and
32 deletions.
There are no files selected for viewing
137 changes: 137 additions & 0 deletions
137
bridgetown-builder/lib/bridgetown-builder/dsl/inspectors.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
# frozen_string_literal: true | ||
|
||
module Bridgetown | ||
module Builders | ||
module DSL | ||
module Inspectors | ||
# Add a couple familar DOM API features | ||
module QuerySelection | ||
def query_selector(selector) | ||
css(selector).first | ||
end | ||
|
||
def query_selector_all(selector) | ||
css(selector) | ||
end | ||
end | ||
|
||
# HTML inspector type | ||
module HTML | ||
# Are there inspectors available? Is it an .htm* file? | ||
def self.can_run?(resource, inspectors) | ||
inspectors && | ||
resource.destination&.output_ext&.starts_with?(".htm") && | ||
!resource.data.bypass_inspectors | ||
end | ||
|
||
# Process the resource with the available inspectors and return the output HTML | ||
# | ||
# @return [String] transformed HTML | ||
def self.call(resource, inspectors) | ||
doc = Nokogiri.HTML5(resource.output) | ||
|
||
inspectors.each do |block| | ||
block.call(doc, resource) | ||
end | ||
|
||
doc.to_html | ||
end | ||
end | ||
|
||
# XML inspector type | ||
module XML | ||
# Strip the resource's initial extension dot. `.rss` => `rss` | ||
def self.resource_ext(resource) | ||
resource.destination&.output_ext&.delete_prefix(".") | ||
end | ||
|
||
# Are there any inspectors available which match the resource extension? | ||
def self.can_run?(resource, inspectors) | ||
inspectors && | ||
inspectors[resource_ext(resource)] && | ||
!resource.data.bypass_inspectors | ||
end | ||
|
||
# Process the resource with the available inspectors and return the output XML | ||
# | ||
# @return [String] transformed XML | ||
def self.call(resource, inspectors) | ||
doc = Nokogiri::XML(resource.output) | ||
|
||
inspectors[resource_ext(resource)].each do |block| | ||
block.call(doc, resource) | ||
end | ||
|
||
doc.to_xml | ||
end | ||
end | ||
|
||
class << self | ||
# Require the Nokogiri gem if necessary and add the `QuerySelection` mixin | ||
def setup_nokogiri | ||
unless defined?(Nokogiri) | ||
Bridgetown::Utils::RequireGems.require_with_graceful_fail "nokogiri" | ||
end | ||
|
||
return if Nokogiri::XML::Node <= QuerySelection | ||
|
||
Nokogiri::XML::Node.include QuerySelection | ||
end | ||
|
||
# Shorthand for `HTML.call` | ||
def process_html(...) | ||
HTML.call(...) | ||
end | ||
|
||
# Shorthand for `XML.call` | ||
def process_xml(...) | ||
XML.call(...) | ||
end | ||
end | ||
|
||
# Set up an inspector to review or manipulate HTML resources | ||
# @yield the block to be called after the resource has been rendered | ||
# @yieldparam [Nokogiri::HTML5::Document] the Nokogiri document | ||
def inspect_html(&block) | ||
unless @_html_inspectors | ||
@_html_inspectors = [] | ||
|
||
Inspectors.setup_nokogiri | ||
|
||
hook :resources, :post_render do |resource| | ||
next unless HTML.can_run?(resource, @_html_inspectors) | ||
|
||
resource.output = Inspectors.process_html(resource, @_html_inspectors) | ||
end | ||
end | ||
|
||
@_html_inspectors << block | ||
end | ||
|
||
# Set up an inspector to review or manipulate XML resources | ||
# @param extension [String] defaults to `xml` | ||
# @yield the block to be called after the resource has been rendered | ||
# @yieldparam [Nokogiri::XML::Document] the Nokogiri document | ||
def inspect_xml(extension = "xml", &block) | ||
unless @_xml_inspectors | ||
@_xml_inspectors = {} | ||
|
||
Inspectors.setup_nokogiri | ||
|
||
hook :resources, :post_render do |resource| | ||
next unless Inspectors::XML.can_run?(resource, @_xml_inspectors) | ||
|
||
resource.output = Inspectors.process_xml(resource, @_xml_inspectors) | ||
end | ||
end | ||
|
||
(@_xml_inspectors[extension.to_s] ||= []).tap do |arr| | ||
arr << block | ||
end | ||
|
||
@_xml_inspectors | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
# frozen_string_literal: true | ||
|
||
require "helper" | ||
|
||
Bridgetown::Builder # trigger autoload | ||
|
||
class TestInspectors < BridgetownUnitTest | ||
include Bridgetown::Builders::DSL::Hooks | ||
include Bridgetown::Builders::DSL::Inspectors | ||
include Bridgetown::Builders::DSL::Resources | ||
|
||
def functions # stub to get hooks working | ||
@_test_functions | ||
end | ||
|
||
context "a resource after being transformed" do | ||
setup do | ||
Bridgetown.sites.clear | ||
@site = Site.new(site_configuration) | ||
@_test_functions = [] | ||
|
||
inspect_html do |document| | ||
document.query_selector_all("h1").each do |heading| | ||
heading.content = heading.content.sub("World", "Universe") | ||
heading.add_class "universal" | ||
end | ||
end | ||
|
||
inspect_xml "atom" do |document, resource| | ||
title = document.query_selector("entry > title") | ||
title.content = title.content.upcase | ||
|
||
assert_equal ".atom", resource.extname | ||
end | ||
end | ||
|
||
teardown do | ||
@_html_inspectors = nil | ||
@_xml_inspectors = nil | ||
end | ||
|
||
should "allow manipulation via Nokogiri" do | ||
add_resource :posts, "html-inspectors.md" do | ||
title "I'm a Markdown post!" | ||
content <<~MARKDOWN | ||
# Hello World! | ||
MARKDOWN | ||
end | ||
|
||
resource = @site.collections.posts.resources.first | ||
assert_equal 1, @site.collections.posts.resources.length | ||
assert_equal "# Hello World!", resource.content.strip | ||
resource.transform! | ||
assert_equal %(<html><head></head><body><h1 id="hello-world" class="universal">Hello Universe!</h1>\n</body></html>), | ||
resource.output.strip | ||
end | ||
|
||
should "bypass inspectors with special front matter variable" do | ||
add_resource :posts, "html-inspectors-bypass.md" do | ||
title "I'm a Markdown post!" | ||
bypass_inspectors true | ||
content <<~MARKDOWN | ||
# Hello World! | ||
MARKDOWN | ||
end | ||
|
||
resource = @site.collections.posts.resources.first | ||
assert_equal 1, @site.collections.posts.resources.length | ||
assert_equal "# Hello World!", resource.content.strip | ||
resource.transform! | ||
refute_equal %(<html><head></head><body><h1 id="hello-world" class="universal">Hello Universe!</h1>\n</body></html>), | ||
resource.output.strip | ||
end | ||
|
||
should "not mess up non-HTML resources" do | ||
add_resource :posts, "no-html-inspectors.json" do | ||
content <<~JSON | ||
{ a: 1, b: "2" } | ||
JSON | ||
end | ||
|
||
resource = @site.collections.posts.resources.first | ||
assert_equal 1, @site.collections.posts.resources.length | ||
assert_equal %({ a: 1, b: "2" }), resource.content.strip | ||
resource.transform! | ||
assert_equal %({ a: 1, b: "2" }), | ||
resource.output.strip | ||
end | ||
|
||
should "work with XML resources too" do | ||
add_resource :pages, "sample-feed.atom" do | ||
content <<~XML | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<feed xmlns="http://www.w3.org/2005/Atom"> | ||
<title>Example Feed</title> | ||
<link href="http://example.org/"/> | ||
<updated>2003-12-13T18:30:02Z</updated> | ||
<author> | ||
<name>John Doe</name> | ||
</author> | ||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id> | ||
<entry> | ||
<title>Atom-Powered Robots Run Amok</title> | ||
<link href="http://example.org/2003/12/13/atom03"/> | ||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> | ||
<updated>2003-12-13T18:30:02Z</updated> | ||
<summary>Some text.</summary> | ||
</entry> | ||
</feed> | ||
XML | ||
end | ||
|
||
resource = @site.collections.pages.resources.first | ||
assert_equal 1, @site.collections.pages.resources.length | ||
assert_includes resource.content, "<title>Atom-Powered Robots Run Amok</title>" | ||
resource.transform! | ||
assert_includes resource.output, "<title>ATOM-POWERED ROBOTS RUN AMOK</title>" | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
class Builders::Inspectors < SiteBuilder | ||
def build | ||
inspect_html do |document| | ||
document.query_selector_all("article h2[id], article h3[id]").each do |heading| | ||
heading << document.create_text_node(" ") | ||
heading << document.create_element( | ||
"a", "#", | ||
href: "##{heading[:id]}", | ||
class: "heading-anchor" | ||
) | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.