Skip to content

Commit

Permalink
Fix encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
md-hamed committed Sep 24, 2021
1 parent 6c34dc1 commit 5f49fc2
Showing 1 changed file with 12 additions and 31 deletions.
43 changes: 12 additions & 31 deletions lib/griddler/email.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,6 @@ def extract_subject
clean_invalid_utf8_bytes(params[:subject])
end


##to delete
def extract_body
text = EmailParser.extract_reply_body(text_or_sanitized_html)
text = clean_raw_text(text) if params.fetch(:text, '').presence
text
end

def extract_headers
if params[:headers].is_a?(Hash)
deep_clean_invalid_utf8_bytes(params[:headers])
Expand All @@ -71,28 +63,9 @@ def text_or_sanitized_html
text.presence || clean_raw_html(params.fetch(:html, '')).presence
end


##to delete
def clean_raw_text(text)
cleaned_text = clean_invalid_utf8_bytes(text)
full_sanitizer = Rails::Html::FullSanitizer.new
cleaned_text = full_sanitizer.sanitize(cleaned_text)
cleaned_text = HTMLEntities.new.decode(cleaned_text)
cleaned_text
end

##to delete
def clean_raw_html(html)
Loofah::HTML5::WhiteList::ACCEPTABLE_PROTOCOLS.add('cid')
Loofah::HTML5::WhiteList::ACCEPTABLE_PROTOCOLS.add('data')
cleaned_html = clean_invalid_utf8_bytes(html)
cleaned_html = sanitize(cleaned_html)
cleaned_html = HTMLEntities.new.decode(cleaned_html)
cleaned_html
end

def clean_raw_utf8(text)
clean_invalid_utf8_bytes(text) || ''
source_encoding = 'EUC-KR'
clean_invalid_utf8_bytes(text, source_encoding: source_encoding) || ''
end

def deep_clean_invalid_utf8_bytes(object)
Expand All @@ -111,8 +84,16 @@ def deep_clean_invalid_utf8_bytes(object)
end
end

def clean_invalid_utf8_bytes(text)
text.encode!('UTF-8', 'EUC-KR')
def clean_invalid_utf8_bytes(text, source_encoding: nil)
if source_encoding
text.force_encoding(source_encoding).encode!('UTF-8', source_encoding)
else
if text && !text.valid_encoding?
text.force_encoding('ISO-8859-1').encode!('UTF-8')
else
text
end
end
end
end
end

0 comments on commit 5f49fc2

Please sign in to comment.