Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AO3-6625 Replace whitespace in download filenames with underscores #4645

Merged
merged 3 commits into from
Oct 29, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
AO3-6625 Don't include whitespace in download filenames
sarken committed Oct 23, 2023
commit afdf8e1e4c3a7d247631afae83b6ec4520c3e402
11 changes: 9 additions & 2 deletions app/models/download.rb
Original file line number Diff line number Diff line change
@@ -51,10 +51,15 @@ def file_type_from_mime(mime)
end
end

# The base name of the file (eg, "War and Peace")
# The base name of the file (e.g., "War_and_Peace")
def file_name
name = clean(work.title)
name += " Work #{work.id}" if name.length < 3
extender = "Work_#{work.id}"
if name.length == 0
name = extender
elsif name.length < 3
name += "_#{extender}"
end
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My JS-addled immediately thought of "simplifying" that using join:

name = [name, "Work_#{work.id}"].compact_blank.join("_") if name.length < 3

But not exactly sure this is indeed better.

name.strip
end

@@ -125,6 +130,7 @@ def chapters
# squash spaces
# strip all non-alphanumeric
# truncate to 24 chars at a word boundary
# replace whitespace with underscore for bug with epub table of contents on Kindle (AO3-6625)
def clean(string)
# get rid of any HTML entities to avoid things like "amp" showing up in titles
string = string.gsub(/\&(\w+)\;/, '')
@@ -133,6 +139,7 @@ def clean(string)
string = string.gsub(/ +/, " ")
string = string.strip
string = string.truncate(24, separator: ' ', omission: '')
string = string.gsub(/\s/, "_")
string
end
end
10 changes: 6 additions & 4 deletions spec/mailers/user_mailer_spec.rb
Original file line number Diff line number Diff line change
@@ -1110,10 +1110,11 @@
end

it "has the correct attachments" do
filename = work.title.gsub(/\s/, "_")
expect(email.attachments.length).to eq(2)
expect(email.attachments).to contain_exactly(
an_object_having_attributes(filename: "#{work.title}.html"),
an_object_having_attributes(filename: "#{work.title}.txt")
an_object_having_attributes(filename: "#{filename}.html"),
an_object_having_attributes(filename: "#{filename}.txt")
)
end

@@ -1162,10 +1163,11 @@
end

it "has the correct attachments" do
filename = work.title.gsub(/\s/, "_")
expect(email.attachments.length).to eq(2)
expect(email.attachments).to contain_exactly(
an_object_having_attributes(filename: "#{work.title}.html"),
an_object_having_attributes(filename: "#{work.title}.txt")
an_object_having_attributes(filename: "#{filename}.html"),
an_object_having_attributes(filename: "#{filename}.txt")
)
end

18 changes: 9 additions & 9 deletions spec/models/download_spec.rb
Original file line number Diff line number Diff line change
@@ -13,36 +13,36 @@

# Arabic
work.title = "هذا عمل جديد"
expect(Download.new(work).file_name).to eq("hdh ml jdyd")
expect(Download.new(work).file_name).to eq("hdh_ml_jdyd")

# Chinese
work.title = "我哥好像被奇怪的人盯上了怎么破"
expect(Download.new(work).file_name).to eq("Wo Ge Hao Xiang Bei Qi")
expect(Download.new(work).file_name).to eq("Wo_Ge_Hao_Xiang_Bei_Qi")

# Japanese
work.title = "二重スパイは接点を持つ"
expect(Download.new(work).file_name).to eq("Er Zhong supaihaJie Dian")
expect(Download.new(work).file_name).to eq("Er_Zhong_supaihaJie_Dian")

# Hebrew
work.title = "לחזור הביתה"
expect(Download.new(work).file_name).to eq("lkhzvr hbyth")
expect(Download.new(work).file_name).to eq("lkhzvr_hbyth")
end

it "removes HTML entities and emojis" do
work.title = "Two of Hearts <3 &amp; >.< &"
expect(Download.new(work).file_name).to eq("Two of Hearts 3")
expect(Download.new(work).file_name).to eq("Two_of_Hearts_3")

work.title = "Emjoi 🤩 Yay 🥳"
expect(Download.new(work).file_name).to eq("Emjoi Yay")
expect(Download.new(work).file_name).to eq("Emjoi_Yay")
end

it "appends work ID if too short" do
work.id = 999_999
work.title = "Uh"
expect(Download.new(work).file_name).to eq("Uh Work 999999")
expect(Download.new(work).file_name).to eq("Uh_Work_999999")

work.title = ""
expect(Download.new(work).file_name).to eq("Work 999999")
expect(Download.new(work).file_name).to eq("Work_999999")

work.title = "wat"
expect(Download.new(work).file_name).to eq("wat")
@@ -53,7 +53,7 @@
expect(Download.new(work).file_name).to eq("123456789-123456789-1234")

work.title = "123456789 123456789 123456789"
expect(Download.new(work).file_name).to eq("123456789 123456789")
expect(Download.new(work).file_name).to eq("123456789_123456789")
end
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe an additional test for the edge case of a work title having several consecutive spaces like A mystery?

end

10 changes: 6 additions & 4 deletions spec/support/shared_examples/mailer_shared_examples.rb
Original file line number Diff line number Diff line change
@@ -41,16 +41,18 @@

shared_examples "an email with a deleted work with draft chapters attached" do
it "has html and txt attachments" do
filename = work.title.gsub(/\s/, "_")
expect(email.attachments.length).to eq(2)
expect(email.attachments).to contain_exactly(
an_object_having_attributes(filename: "#{work.title}.html"),
an_object_having_attributes(filename: "#{work.title}.txt")
an_object_having_attributes(filename: "#{filename}.html"),
an_object_having_attributes(filename: "#{filename}.txt")
)
end

it "includes draft chapters in attachments" do
html_attachment = email.attachments["#{work.title}.html"].body.raw_source
txt_attachment = email.attachments["#{work.title}.txt"].body.raw_source
filename = work.title.gsub(/\s/, "_")
html_attachment = email.attachments["#{filename}.html"].body.raw_source
txt_attachment = email.attachments["#{filename}.txt"].body.raw_source
decoded_html_content = Base64.decode64(html_attachment)
decoded_txt_content = Base64.decode64(txt_attachment)