diff --git a/Gemfile b/Gemfile index 2745929e8b..37dfc524ab 100644 --- a/Gemfile +++ b/Gemfile @@ -13,8 +13,8 @@ gem "govuk_app_config" gem "govuk_personalisation" gem "govuk_publishing_components" gem "htmlentities" -gem "invalid_utf8_rejector" gem "plek" +gem "rack-utf8_sanitizer" gem "rails-i18n" gem "rails_translation_manager" gem "slimmer" diff --git a/Gemfile.lock b/Gemfile.lock index 44b8d5cd8b..d6434e0e96 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -203,7 +203,6 @@ GEM rails-i18n rainbow (>= 2.2.2, < 4.0) terminal-table (>= 1.5.1) - invalid_utf8_rejector (0.0.4) io-console (0.7.2) irb (1.14.1) rdoc (>= 4.0.0) @@ -516,6 +515,8 @@ GEM rack (>= 3.0.0) rack-test (2.1.0) rack (>= 1.3) + rack-utf8_sanitizer (1.9.1) + rack (>= 1.0, < 4.0) rackup (2.1.0) rack (>= 3) webrick (~> 1.8) @@ -724,12 +725,12 @@ DEPENDENCIES govuk_test htmlentities i18n-coverage - invalid_utf8_rejector listen pact pact_broker-client plek pry-byebug + rack-utf8_sanitizer rails (= 7.2.1.1) rails-controller-testing rails-i18n diff --git a/config/application.rb b/config/application.rb index 703d9b7690..466ad566d0 100644 --- a/config/application.rb +++ b/config/application.rb @@ -14,6 +14,7 @@ require "action_view/railtie" # require "action_cable/engine" require "rails/test_unit/railtie" +require_relative "../lib/sanitiser/strategy" # Require the gems listed in Gemfile, including any gems # you've limited to :test, :development, or :production. @@ -29,7 +30,7 @@ class Application < Rails::Application # Please, add to the `ignore` list any other `lib` subdirectories that do # not contain `.rb` files, or that should not be reloaded or eager loaded. # Common ones are `templates`, `generators`, or `middleware`, for example. - config.autoload_lib(ignore: %w[assets tasks]) + config.autoload_lib(ignore: %w[assets tasks sanitiser]) # Custom directories with classes and modules you want to be autoloadable. config.eager_load_paths += %W[#{config.root}/app/presenters] @@ -76,5 +77,15 @@ class Application < Rails::Application # to use CSS that has same function names as SCSS such as max. # https://github.com/alphagov/govuk-frontend/issues/1350 config.assets.css_compressor = nil + + # Protect from "invalid byte sequence in UTF-8" errors, + # when a query or a cookie is a string with incorrect UTF-8 encoding. + config.middleware.insert_before( + Rack::Head, + Rack::UTF8Sanitizer, + sanitizable_content_types: [], + only: %w[QUERY_STRING], + strategy: Sanitiser::Strategy, + ) end end diff --git a/lib/sanitiser/strategy.rb b/lib/sanitiser/strategy.rb new file mode 100644 index 0000000000..fc2414343e --- /dev/null +++ b/lib/sanitiser/strategy.rb @@ -0,0 +1,18 @@ +module Sanitiser + class Strategy + class SanitisingError < StandardError; end + + class << self + def call(input, sanitize_null_bytes: false) + input + .force_encoding(Encoding::ASCII_8BIT) + .encode!(Encoding::UTF_8) + if sanitize_null_bytes && Rack::UTF8Sanitizer::NULL_BYTE_REGEX.match?(input) + raise NullByteInString + end + rescue StandardError + raise SanitisingError, "Non-UTF-8 (or null) character in the query or in the cookie" + end + end + end +end diff --git a/spec/requests/sanitiser_spec.rb b/spec/requests/sanitiser_spec.rb new file mode 100644 index 0000000000..f190abf296 --- /dev/null +++ b/spec/requests/sanitiser_spec.rb @@ -0,0 +1,84 @@ +RSpec.describe "Sanitiser" do + context "with query being correct percent-encoded UTF-8 string does not raise exception" do + it "does not raise error" do + get "/development?%41" + expect(response).to have_http_status(:ok) + end + end + + context "with query being incorrect percent-encoded UTF-8 string raises exception" do + it "raises Sanitiser::Strategy::SanitisingError" do + expect { get "/development?%AD" }.to raise_error(Sanitiser::Strategy::SanitisingError) + end + end + + context "with cookie key being correct UTF-8" do + it "does not raise error" do + get "/development", headers: { Cookie: "\x41=value" } + expect(response).to have_http_status(:ok) + end + end + + context "with cookie key being incorrect UTF-8" do + it "raises ArgumentError" do + expect { get "/development", headers: { Cookie: "\xAD=value" } } + .to raise_error(ArgumentError, "invalid byte sequence in UTF-8") + end + end + + context "with cookie value being correct UTF-8" do + it "does not raise error" do + get "/development", headers: { Cookie: "key=\x41" } + expect(response).to have_http_status(:ok) + end + end + + context "with cookie value being incorrect UTF-8" do + it "raises ArgumentError" do + expect { get "/development", headers: { Cookie: "key=\xAD" } } + .to raise_error(ArgumentError, "invalid byte sequence in UTF-8") + end + end + + context "with cookie path being correct UTF-8" do + it "does not raise error" do + get "/development", headers: { Cookie: "key=value; Path=/\x41" } + expect(response).to have_http_status(:ok) + end + end + + context "with cookie path being incorrect UTF-8" do + it "raises ArgumentError" do + expect { get "/development", headers: { Cookie: "key=value; Path=/\xAD" } } + .to raise_error(ArgumentError, "invalid byte sequence in UTF-8") + end + end + + context "with cookie path being correct percent-encoded UTF-8" do + it "does not raise error" do + get "/development", headers: { Cookie: "key=value; Path=/%41" } + expect(response).to have_http_status(:ok) + end + end + + context "with cookie path being incorrect percent-encoded UTF-8" do + it "raises Sanitiser::Strategy::SanitisingError" do + expect { get "/development", headers: { Cookie: "key=value; Path=/%AD" } } + .to raise_error(Sanitiser::Strategy::SanitisingError) + end + end + + context "with referrer header being correct percent-encoded UTF-8" do + it "does not raise error" do + get "/development", headers: { Referer: "http://example.com/?%41" } + expect(response).to have_http_status(:ok) + end + end + + context "with referrer header being incorrect percent-encoded UTF-8" do + it "does not raise error" do + get "/development", headers: { Referer: "http://example.com/?%AD" } + expect(response).to have_http_status(:ok) + end + end +end