diff --git a/Gemfile b/Gemfile index 93d8f930..a03f21cf 100644 --- a/Gemfile +++ b/Gemfile @@ -25,7 +25,7 @@ gem "rails-html-sanitizer", "~> 1.6" gem 'email_reply_trimmer' gem 'griddler-mailgun', '~> 1.1', '>= 1.1.1' gem 'griddler', '~> 1.5.2' -gem "charlock_holmes", "~> 0.7.7" # text encoding detection for email parsing +gem "charlock_holmes", "~> 0.7.9" # text encoding detection for email parsing gem 'mailgun_rails' gem "ruby-openai" diff --git a/Gemfile.lock b/Gemfile.lock index 6d9c7362..882b5b96 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -102,7 +102,7 @@ GEM image_processing (~> 1.1) marcel (~> 1.0.0) ssrf_filter (~> 1.0) - charlock_holmes (0.7.7) + charlock_holmes (0.7.9) chartkick (5.0.2) chronic (0.10.2) cloudflare-rails (3.0.0) @@ -494,7 +494,7 @@ DEPENDENCIES byebug capybara carrierwave (~> 3) - charlock_holmes (~> 0.7.7) + charlock_holmes (~> 0.7.9) chartkick (~> 5) cloudflare-rails combined_time_select diff --git a/app/lib/email_processor.rb b/app/lib/email_processor.rb index 18524e9e..ab0a9988 100644 --- a/app/lib/email_processor.rb +++ b/app/lib/email_processor.rb @@ -326,7 +326,11 @@ def to_utf8(content) begin detection = CharlockHolmes::EncodingDetector.detect(content) - CharlockHolmes::Converter.convert content, detection[:encoding].gsub("IBM424_ltr", "UTF-8"), "UTF-8" + if detection[:confidence] > 95 + CharlockHolmes::Converter.convert content, detection[:encoding].gsub("IBM424_ltr", "UTF-8"), "UTF-8" + else + content + end rescue content end diff --git a/app/models/entry.rb b/app/models/entry.rb index d0793703..0d79f991 100644 --- a/app/models/entry.rb +++ b/app/models/entry.rb @@ -79,7 +79,9 @@ def formatted_body formatted_body = body begin detection = CharlockHolmes::EncodingDetector.detect(body) - formatted_body = CharlockHolmes::Converter.convert formatted_body, detection[:encoding].gsub("IBM424_ltr", "UTF-8"), "UTF-8" + if detection[:confidence] > 95 + formatted_body = CharlockHolmes::Converter.convert formatted_body, detection[:encoding].gsub("IBM424_ltr", "UTF-8"), "UTF-8" + end rescue => e end fix_encoding(formatted_body) @@ -107,7 +109,9 @@ def sanitized_body begin detection = CharlockHolmes::EncodingDetector.detect(body_sanitized) - body_sanitized = CharlockHolmes::Converter.convert body_sanitized, detection[:encoding].gsub("IBM424_ltr", "UTF-8"), "UTF-8" + if detection[:confidence] > 95 + body_sanitized = CharlockHolmes::Converter.convert body_sanitized, detection[:encoding].gsub("IBM424_ltr", "UTF-8"), "UTF-8" + end rescue => e end fix_encoding(body_sanitized) diff --git a/app/views/layouts/application.html.haml b/app/views/layouts/application.html.haml index 56116648..4dd31038 100644 --- a/app/views/layouts/application.html.haml +++ b/app/views/layouts/application.html.haml @@ -3,6 +3,7 @@ %head %title= yield_or_default :title, action_name.titlecase %meta{ charset: 'utf-8' } + %meta{ httpEquiv: "Content-Type", content: "text/html; charset=ISO-8859-1"} %meta{ name: 'viewport', content: 'width=device-width, initial-scale=1.0, minimum-scale=1.0, maximum-scale=1.0' } %meta{ content: 'IE=edge', 'http-equiv' => 'X-UA-Compatible' } = csrf_meta_tags