otwcode · redsummernight · Feb 20, 2023 · Jan 25, 2023 · Feb 3, 2023 · Feb 4, 2023
diff --git a/config/initializers/gem-plugin_config/sanitizer_config.rb b/config/initializers/gem-plugin_config/sanitizer_config.rb
@@ -1,40 +1,39 @@
 # Sanitize: http://github.com/rgrove/sanitize.git
 class Sanitize
-
   # This defines the configuration we use for HTML tags and attributes allowed in the archive.
   module Config
     ARCHIVE = freeze_config(
       elements: %w[
         a abbr acronym address b big blockquote br caption center cite code col
         colgroup figcaption figure dd del dfn div dl dt em h1 h2 h3 h4 h5 h6 hr
-        i img ins kbd li ol p pre q s samp small span strike strong
+        i img ins kbd li ol p pre q rp rt ruby s samp small span strike strong
         sub sup table tbody td tfoot th thead tr tt u ul var
       ],
       attributes: {
-        all: ['align', 'title', 'dir'],
-        'a' => ['href', 'name'],
-        'blockquote' => ['cite'],
-        'col' => ['span', 'width'],
-        'colgroup' => ['span', 'width'],
-        'hr' => ['align', 'width'],
-        'img' => ['align', 'alt', 'border', 'height', 'src', 'width'],
-        'ol' => ['start', 'type'],
-        'q' => ['cite'],
-        'table' => ['border', 'summary', 'width'],
-        'td' => ['abbr', 'axis', 'colspan', 'height', 'rowspan', 'width'],
-        'th' => ['abbr', 'axis', 'colspan', 'height', 'rowspan', 'scope', 'width'],
-        'ul' => ['type'],
+        all: %w[align title dir],
+        "a" => %w[href name],
+        "blockquote" => %w[cite],
+        "col" => %w[span width],
+        "colgroup" => %w[span width],
+        "hr" => %w[align width],
+        "img" => %w[align alt border height src width],
+        "ol" => %w[start type],
+        "q" => %w[cite],
+        "table" => %w[border summary width],
+        "td" => %w[abbr axis colspan height rowspan width],
+        "th" => %w[abbr axis colspan height rowspan scope width],
+        "ul" => %w[type]
       },
 
       add_attributes: {
-        'a' => {'rel' => 'nofollow'}
+        "a" => { "rel" => "nofollow" }
       },
 
       protocols: {
-        'a' => {'href' => ['ftp', 'http', 'https', 'mailto', :relative]},
-        'blockquote' => {'cite' => ['http', 'https', :relative]},
-        'img' => {'src' => ['http', 'https', :relative]},
-        'q' => {'cite' => ['http', 'https', :relative]}
+        "a" => { "href" => ["ftp", "http", "https", "mailto", :relative] },
+        "blockquote" => { "cite" => ["http", "https", :relative] },
+        "img" => { "src" => ["http", "https", :relative] },
+        "q" => { "cite" => ["http", "https", :relative] }
       },
 
       # TODO: This can be removed once we upgrade sanitizer gem, AO3-5801
@@ -45,7 +44,7 @@ module Config
     CLASS_ATTRIBUTE = freeze_config(
       # see in the Transformers section for what classes we strip
       attributes: {
-        all: ARCHIVE[:attributes][:all] + ['class']
+        all: ARCHIVE[:attributes][:all] + ["class"]
       }
     )
 

diff --git a/lib/paragraph_maker.rb b/lib/paragraph_maker.rb
@@ -24,8 +24,8 @@ module ParagraphMaker
 
   # Tags that need to go inside p tags
   TAG_NAMES_TO_WRAP = (%w[
-    a abbr acronym b big br cite code del dfn em i img ins kbd q s samp small
-    span strike strong sub sup tt u var
+    a abbr acronym b big br cite code del dfn em i img ins kbd q rp rt ruby
+    s samp small span strike strong sub sup tt u var
   ] + INLINE_INVALID_TAGS).freeze
 
   # Tags that can't be inside p tags

diff --git a/public/help/html-help.html b/public/help/html-help.html
@@ -4,7 +4,7 @@ <h3>Allowed HTML</h3>
 <p>
   <code>a, abbr, acronym, address, [align], [alt], [axis], b, big, blockquote, br, caption, center, cite, [class], code, 
     col, colgroup, dd, del, dfn, div, dl, dt, em, figcaption, figure, h1, h2, h3, h4, h5, h6, [height], hr, [href], i, img, 
-    ins, kbd, li, [name], ol, p, pre, q, s, samp, small, span, [src], strike, strong, sub, sup, table, tbody, td, 
+    ins, kbd, li, [name], ol, p, pre, q, rp, rt, ruby, s, samp, small, span, [src], strike, strong, sub, sup, table, tbody, td, 
   def allowed_html_instructions(show_list = false, show_text=true) 
     (show_text ? h(ts("Plain text with limited HTML")) : ''.html_safe) + 
     link_to_help("html-help") + (show_list ? 
     "<code>a, abbr, acronym, address, [alt], [axis], b, big, blockquote, br, caption, center, cite, [class], code, 
       col, colgroup, dd, del, dfn, [dir], div, dl, dt, em, h1, h2, h3, h4, h5, h6, [height], hr, [href], i, img, 
       ins, kbd, li, [name], ol, p, pre, q, s, samp, small, span, [src], strike, strong, sub, sup, table, tbody, td, 
       tfoot, th, thead, [title], tr, tt, u, ul, var, [width]</code>" : "").html_safe 
   end 
   def allowed_html_instructions(show_list = false, show_text=true) 
     (show_text ? h(ts("Plain text with limited HTML")) : ''.html_safe) + 
     link_to_help("html-help") + (show_list ? 
     "<code>a, abbr, acronym, address, [alt], [axis], b, big, blockquote, br, caption, center, cite, [class], code, 
       col, colgroup, dd, del, dfn, [dir], div, dl, dt, em, h1, h2, h3, h4, h5, h6, [height], hr, [href], i, img, 
       ins, kbd, li, [name], ol, p, pre, q, s, samp, small, span, [src], strike, strong, sub, sup, table, tbody, td, 
       tfoot, th, thead, [title], tr, tt, u, ul, var, [width]</code>" : "").html_safe 
   end 
     tfoot, th, thead, [title], tr, tt, u, ul, var, [width]
   </code>
 </p>

diff --git a/spec/lib/html_cleaner_spec.rb b/spec/lib/html_cleaner_spec.rb
@@ -486,6 +486,18 @@
         end
       end
     end
+
+    ArchiveConfig.FIELDS_ALLOWING_HTML.each do |field|
+      it "preserves ruby-annotated HTML in #{field}" do
+        result = sanitize_value(field, "<ruby>BigText<rp>(</rp><rt>small_text</rt><rp>)</rp></ruby>")
+        expect(result).to include("<ruby>BigText<rp>(</rp><rt>small_text</rt><rp>)</rp></ruby>")
+      end
+
+      it "preserves ruby-annotated HTML without rp in #{field}" do
+        result = sanitize_value(field, "<ruby>BigText<rt>small_text</rt></ruby>")
+        expect(result).to include("<ruby>BigText<rt>small_text</rt></ruby>")
+      end
+    end
   end
 
   describe "fix_bad_characters" do
@@ -843,6 +855,12 @@
       expect(doc.xpath("./p[3]").children.to_s.strip).to eq("yadda")
     end
 
+    it "wraps ruby-annotated text in p tags" do
+      result = add_paragraphs_to_text("text with <ruby>ルビ<rp> (</rp><rt>RUBY</rt><rp>)</rp></ruby>")
+      doc = Nokogiri::HTML.fragment(result)
+      expect(doc.xpath("./p[1]").children.to_s.strip).to eq("text with <ruby>ルビ<rp> (</rp><rt>RUBY</rt><rp>)</rp></ruby>")
+    end
+
     it "should keep attributes of block elements" do
       result = add_paragraphs_to_text("<div class='foo'>some\n\ntext</div>")
       doc = Nokogiri::HTML.fragment(result)
@@ -864,12 +882,24 @@
       expect(doc.xpath("./p[contains(@class, 'bar')]").children.to_s.strip).to eq("foobar")
     end
 
-    it "should close unclosed tag within other tag" do
+    it "closes unclosed tag within other tag" do
       html = "<strong><em>unclosed</strong>"
       doc = Nokogiri::HTML.fragment(add_paragraphs_to_text(html))
       expect(doc.xpath("./p/strong/em").children.to_s.strip).to eq("unclosed")
     end
 
+    it "closes unclosed rt tags" do
+      html = "<ruby>big text<rt>small text</ruby>"
+      result = add_paragraphs_to_text(html)
+      expect(result).to include("<ruby>big text<rt>small text</rt></ruby>")
+    end
+
+    it "closes unclosed rp tag" do
+      html = "<ruby>big text<rp>(</rp><rt>small text</rt><rp>)</ruby>"
+      result = add_paragraphs_to_text(html)
+      expect(result).to include("<ruby>big text<rp>(</rp><rt>small text</rt><rp>)</rp></ruby>")
+    end
+
     it "should re-nest mis-nested tags" do
       html = "some <em><strong>text</em></strong>"
       doc = Nokogiri::HTML.fragment(add_paragraphs_to_text(html))

diff --git a/spec/lib/word_counter_spec.rb b/spec/lib/word_counter_spec.rb
@@ -85,4 +85,9 @@
     word_counter.text = "\“嘿Bob,\” Alice说，‘啊？！？’"
     expect(word_counter.count).to eq(5)
   end
+
+  it "doesn't count parentheses in rp" do
+    word_counter.text = "<ruby>日本語<rp>（</rp><rt>にほんご</rt><rp>）</rp></ruby>"
+    expect(word_counter.count).to eq(7)
+  end
 end