Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AO3-6464 Allow ruby, rt, and rp HTML tags where HTML is allowed #4443

Merged
merged 4 commits into from
Feb 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions app/helpers/application_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,11 @@ def link_to_rss(link_to_feed)
link_to content_tag(:span, ts("RSS Feed")), link_to_feed, title: ts("RSS Feed"), class: "rss"
end

#1: default shows just the link to help
#2: show_text = true: shows "plain text with limited html" and link to help
#3 show_list = true: plain text and limited html, link to help, list of allowed html
def allowed_html_instructions(show_list = false, show_text=true)
(show_text ? h(ts("Plain text with limited HTML")) : ''.html_safe) +
link_to_help("html-help") + (show_list ?
"<code>a, abbr, acronym, address, [alt], [axis], b, big, blockquote, br, caption, center, cite, [class], code,
col, colgroup, dd, del, dfn, [dir], div, dl, dt, em, h1, h2, h3, h4, h5, h6, [height], hr, [href], i, img,
ins, kbd, li, [name], ol, p, pre, q, s, samp, small, span, [src], strike, strong, sub, sup, table, tbody, td,
tfoot, th, thead, [title], tr, tt, u, ul, var, [width]</code>" : "").html_safe
# 1: default shows just the link to help
# 2: show_text = true: shows "plain text with limited html" and link to help
def allowed_html_instructions(show_text = true)
(show_text ? h(ts("Plain text with limited HTML")) : "".html_safe) +
link_to_help("html-help")
end

# Byline helpers
Expand Down
2 changes: 1 addition & 1 deletion app/views/bookmarks/_bookmark_form.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
<% if bookmarkable.class != ExternalWork %>
<%= ts("The creator's summary is added automatically.") %>
<% end %>
<%= allowed_html_instructions(show_list = false) %>
<%= allowed_html_instructions %>
</p>
<%= f.text_area :bookmarker_notes, rows: 4, id: notes_id, class: "observe_textlength",
"aria-describedby" => "notes-field-description" %>
Expand Down
2 changes: 1 addition & 1 deletion app/views/comments/_comment_form.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
<span><%= ts("to") %> <%= get_commenter_pseud_or_name(commentable) %> <%= ts("on") %> <%= commentable_description_link(commentable) %></span>
<% end %>
<% end %>
(<%= allowed_html_instructions(show_list=false) %>)
(<%= allowed_html_instructions %>)
</h4>
<% else %>
<h4 class="heading"><%= ts("Comment as") %> <span class="byline"><%= current_user.default_pseud.name %></span>
Expand Down
41 changes: 20 additions & 21 deletions config/initializers/gem-plugin_config/sanitizer_config.rb
Original file line number Diff line number Diff line change
@@ -1,40 +1,39 @@
# Sanitize: http://github.com/rgrove/sanitize.git
class Sanitize

# This defines the configuration we use for HTML tags and attributes allowed in the archive.
module Config
ARCHIVE = freeze_config(
elements: %w[
a abbr acronym address b big blockquote br caption center cite code col
colgroup details figcaption figure dd del dfn div dl dt em h1 h2 h3 h4 h5 h6 hr
i img ins kbd li ol p pre q s samp small span strike strong
i img ins kbd li ol p pre q rp rt ruby s samp small span strike strong
sub summary sup table tbody td tfoot th thead tr tt u ul var
],
attributes: {
all: ['align', 'title', 'dir'],
'a' => ['href', 'name'],
'blockquote' => ['cite'],
'col' => ['span', 'width'],
'colgroup' => ['span', 'width'],
'hr' => ['align', 'width'],
'img' => ['align', 'alt', 'border', 'height', 'src', 'width'],
'ol' => ['start', 'type'],
'q' => ['cite'],
'table' => ['border', 'summary', 'width'],
'td' => ['abbr', 'axis', 'colspan', 'height', 'rowspan', 'width'],
'th' => ['abbr', 'axis', 'colspan', 'height', 'rowspan', 'scope', 'width'],
'ul' => ['type'],
all: %w[align title dir],
"a" => %w[href name],
"blockquote" => %w[cite],
"col" => %w[span width],
"colgroup" => %w[span width],
"hr" => %w[align width],
"img" => %w[align alt border height src width],
"ol" => %w[start type],
"q" => %w[cite],
"table" => %w[border summary width],
"td" => %w[abbr axis colspan height rowspan width],
"th" => %w[abbr axis colspan height rowspan scope width],
"ul" => %w[type]
},

add_attributes: {
'a' => {'rel' => 'nofollow'}
"a" => { "rel" => "nofollow" }
},

protocols: {
'a' => {'href' => ['ftp', 'http', 'https', 'mailto', :relative]},
'blockquote' => {'cite' => ['http', 'https', :relative]},
'img' => {'src' => ['http', 'https', :relative]},
'q' => {'cite' => ['http', 'https', :relative]}
"a" => { "href" => ["ftp", "http", "https", "mailto", :relative] },
"blockquote" => { "cite" => ["http", "https", :relative] },
"img" => { "src" => ["http", "https", :relative] },
"q" => { "cite" => ["http", "https", :relative] }
},

# TODO: This can be removed once we upgrade sanitizer gem, AO3-5801
Expand All @@ -45,7 +44,7 @@ module Config
CLASS_ATTRIBUTE = freeze_config(
# see in the Transformers section for what classes we strip
attributes: {
all: ARCHIVE[:attributes][:all] + ['class']
all: ARCHIVE[:attributes][:all] + ["class"]
}
)

Expand Down
4 changes: 2 additions & 2 deletions lib/paragraph_maker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ module ParagraphMaker

# Tags that need to go inside p tags
TAG_NAMES_TO_WRAP = (%w[
a abbr acronym b big br cite code del dfn em i img ins kbd q s samp small
span strike strong sub sup tt u var
a abbr acronym b big br cite code del dfn em i img ins kbd q rp rt ruby
s samp small span strike strong sub sup tt u var
] + INLINE_INVALID_TAGS).freeze

# Tags that can't be inside p tags
Expand Down
2 changes: 1 addition & 1 deletion public/help/html-help.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ <h3>Allowed HTML</h3>
<p>
<code>a, abbr, acronym, address, [align], [alt], [axis], b, big, blockquote, br, caption, center, cite, [class], code,
col, colgroup, dd, del, details, dfn, div, dl, dt, em, figcaption, figure, h1, h2, h3, h4, h5, h6, [height], hr, [href], i, img,
ins, kbd, li, [name], ol, p, pre, q, s, samp, small, span, [src], strike, strong, sub, summary, sup, table, tbody, td,
ins, kbd, li, [name], ol, p, pre, q, rp, rt, ruby, s, samp, small, span, [src], strike, strong, sub, summary, sup, table, tbody, td,
tfoot, th, thead, [title], tr, tt, u, ul, var, [width]
</code>
</p>
Expand Down
32 changes: 31 additions & 1 deletion spec/lib/html_cleaner_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,18 @@
end
end
end

ArchiveConfig.FIELDS_ALLOWING_HTML.each do |field|
it "preserves ruby-annotated HTML in #{field}" do
result = sanitize_value(field, "<ruby>BigText<rp>(</rp><rt>small_text</rt><rp>)</rp></ruby>")
expect(result).to include("<ruby>BigText<rp>(</rp><rt>small_text</rt><rp>)</rp></ruby>")
end

it "preserves ruby-annotated HTML without rp in #{field}" do
result = sanitize_value(field, "<ruby>BigText<rt>small_text</rt></ruby>")
expect(result).to include("<ruby>BigText<rt>small_text</rt></ruby>")
end
end
end

describe "fix_bad_characters" do
Expand Down Expand Up @@ -903,6 +915,12 @@
expect(doc.xpath("./p[3]").children.to_s.strip).to eq("yadda")
end

it "wraps ruby-annotated text in p tags" do
result = add_paragraphs_to_text("text with <ruby>ルビ<rp> (</rp><rt>RUBY</rt><rp>)</rp></ruby>")
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./p[1]").children.to_s.strip).to eq("text with <ruby>ルビ<rp> (</rp><rt>RUBY</rt><rp>)</rp></ruby>")
end

it "should keep attributes of block elements" do
result = add_paragraphs_to_text("<div class='foo'>some\n\ntext</div>")
doc = Nokogiri::HTML.fragment(result)
Expand All @@ -924,12 +942,24 @@
expect(doc.xpath("./p[contains(@class, 'bar')]").children.to_s.strip).to eq("foobar")
end

it "should close unclosed tag within other tag" do
it "closes unclosed tag within other tag" do
html = "<strong><em>unclosed</strong>"
doc = Nokogiri::HTML.fragment(add_paragraphs_to_text(html))
expect(doc.xpath("./p/strong/em").children.to_s.strip).to eq("unclosed")
end

it "closes unclosed rt tags" do
html = "<ruby>big text<rt>small text</ruby>"
result = add_paragraphs_to_text(html)
expect(result).to include("<ruby>big text<rt>small text</rt></ruby>")
end

it "closes unclosed rp tag" do
html = "<ruby>big text<rp>(</rp><rt>small text</rt><rp>)</ruby>"
result = add_paragraphs_to_text(html)
expect(result).to include("<ruby>big text<rp>(</rp><rt>small text</rt><rp>)</rp></ruby>")
end

it "should re-nest mis-nested tags" do
html = "some <em><strong>text</em></strong>"
doc = Nokogiri::HTML.fragment(add_paragraphs_to_text(html))
Expand Down