AO3-6625 Don't include whitespace in download filenames

otwcode · neuroalien · Oct 29, 2023 · Oct 23, 2023 · Oct 23, 2023 · Oct 24, 2023
commit afdf8e1e4c3a7d247631afae83b6ec4520c3e402
diff --git a/app/models/download.rb b/app/models/download.rb
@@ -51,10 +51,15 @@ def file_type_from_mime(mime)
     end
   end
 
-  # The base name of the file (eg, "War and Peace")
+  # The base name of the file (e.g., "War_and_Peace")
   def file_name
     name = clean(work.title)
-    name += " Work #{work.id}" if name.length < 3
+    extender = "Work_#{work.id}"
+    if name.length == 0
+      name = extender
+    elsif name.length < 3
+      name += "_#{extender}"
+    end
     name.strip
   end
 
@@ -125,6 +130,7 @@ def chapters
   # squash spaces
   # strip all non-alphanumeric
   # truncate to 24 chars at a word boundary
+  # replace whitespace with underscore for bug with epub table of contents on Kindle (AO3-6625)
   def clean(string)
     # get rid of any HTML entities to avoid things like "amp" showing up in titles
     string = string.gsub(/\&(\w+)\;/, '')
@@ -133,6 +139,7 @@ def clean(string)
     string = string.gsub(/ +/, " ")
     string = string.strip
     string = string.truncate(24, separator: ' ', omission: '')
+    string = string.gsub(/\s/, "_")
     string
   end
 end
diff --git a/spec/mailers/user_mailer_spec.rb b/spec/mailers/user_mailer_spec.rb
@@ -1110,10 +1110,11 @@
     end
 
     it "has the correct attachments" do
+      filename = work.title.gsub(/\s/, "_")
       expect(email.attachments.length).to eq(2)
       expect(email.attachments).to contain_exactly(
-        an_object_having_attributes(filename: "#{work.title}.html"),
-        an_object_having_attributes(filename: "#{work.title}.txt")
+        an_object_having_attributes(filename: "#{filename}.html"),
+        an_object_having_attributes(filename: "#{filename}.txt")
       )
     end
 
@@ -1162,10 +1163,11 @@
     end
 
     it "has the correct attachments" do
+      filename = work.title.gsub(/\s/, "_")
       expect(email.attachments.length).to eq(2)
       expect(email.attachments).to contain_exactly(
-        an_object_having_attributes(filename: "#{work.title}.html"),
-        an_object_having_attributes(filename: "#{work.title}.txt")
+        an_object_having_attributes(filename: "#{filename}.html"),
+        an_object_having_attributes(filename: "#{filename}.txt")
       )
     end
 

diff --git a/spec/models/download_spec.rb b/spec/models/download_spec.rb
@@ -13,36 +13,36 @@
 
       # Arabic
       work.title = "هذا عمل جديد"
-      expect(Download.new(work).file_name).to eq("hdh ml jdyd")
+      expect(Download.new(work).file_name).to eq("hdh_ml_jdyd")
 
       # Chinese
       work.title = "我哥好像被奇怪的人盯上了怎么破"
-      expect(Download.new(work).file_name).to eq("Wo Ge Hao Xiang Bei Qi")
+      expect(Download.new(work).file_name).to eq("Wo_Ge_Hao_Xiang_Bei_Qi")
 
       # Japanese
       work.title = "二重スパイは接点を持つ"
-      expect(Download.new(work).file_name).to eq("Er Zhong supaihaJie Dian")
+      expect(Download.new(work).file_name).to eq("Er_Zhong_supaihaJie_Dian")
 
       # Hebrew
       work.title = "לחזור הביתה"
-      expect(Download.new(work).file_name).to eq("lkhzvr hbyth")
+      expect(Download.new(work).file_name).to eq("lkhzvr_hbyth")
     end
 
     it "removes HTML entities and emojis" do
       work.title = "Two of Hearts <3 &amp; >.< &"
-      expect(Download.new(work).file_name).to eq("Two of Hearts 3")
+      expect(Download.new(work).file_name).to eq("Two_of_Hearts_3")
 
       work.title = "Emjoi 🤩 Yay 🥳"
-      expect(Download.new(work).file_name).to eq("Emjoi Yay")
+      expect(Download.new(work).file_name).to eq("Emjoi_Yay")
     end
 
     it "appends work ID if too short" do
       work.id = 999_999
       work.title = "Uh"
-      expect(Download.new(work).file_name).to eq("Uh Work 999999")
+      expect(Download.new(work).file_name).to eq("Uh_Work_999999")
 
       work.title = ""
-      expect(Download.new(work).file_name).to eq("Work 999999")
+      expect(Download.new(work).file_name).to eq("Work_999999")
 
       work.title = "wat"
       expect(Download.new(work).file_name).to eq("wat")
@@ -53,7 +53,7 @@
       expect(Download.new(work).file_name).to eq("123456789-123456789-1234")
 
       work.title = "123456789 123456789 123456789"
-      expect(Download.new(work).file_name).to eq("123456789 123456789")
+      expect(Download.new(work).file_name).to eq("123456789_123456789")
     end
   end
 

diff --git a/spec/support/shared_examples/mailer_shared_examples.rb b/spec/support/shared_examples/mailer_shared_examples.rb
@@ -41,16 +41,18 @@
 
 shared_examples "an email with a deleted work with draft chapters attached" do
   it "has html and txt attachments" do
+    filename = work.title.gsub(/\s/, "_")
     expect(email.attachments.length).to eq(2)
     expect(email.attachments).to contain_exactly(
-      an_object_having_attributes(filename: "#{work.title}.html"),
-      an_object_having_attributes(filename: "#{work.title}.txt")
+      an_object_having_attributes(filename: "#{filename}.html"),
+      an_object_having_attributes(filename: "#{filename}.txt")
     )
   end
 
   it "includes draft chapters in attachments" do
-    html_attachment = email.attachments["#{work.title}.html"].body.raw_source
-    txt_attachment = email.attachments["#{work.title}.txt"].body.raw_source
+    filename = work.title.gsub(/\s/, "_")
+    html_attachment = email.attachments["#{filename}.html"].body.raw_source
+    txt_attachment = email.attachments["#{filename}.txt"].body.raw_source
     decoded_html_content = Base64.decode64(html_attachment)
     decoded_txt_content = Base64.decode64(txt_attachment)