From 89903486cfa1228bff38b1109ac0df599bd545c3 Mon Sep 17 00:00:00 2001 From: Nick Muerdter Date: Thu, 1 Dec 2016 20:22:04 -0700 Subject: [PATCH] Improve resiliency of proxy during MongoDB replicaset changes. If connecting to MongoDB that's part of a replicaset, these changes help improve the failover handling during replicaset changes (either planned or unplanned). The default read preference is now "primaryPreferred," which allows connections to secondaries during replicaset changes when a primary has not been elected yet. Mora was previously lacking this read preference functionality, so a pull request to Mora has been submitted to provide support. We're also handling more potential edge-cases that might crop up during replicaset changes, and retrying the queries in those cases. This better ensures no connections are dropped, even in the event of unexpected replicaset changes. The updates to the testing suite helped uncover these failover edge-cases, so these improvements should also resolve the possibility of sporadic test failures. --- Gemfile | 3 + Gemfile.lock | 3 +- build/cmake/mora.cmake | 8 ++- build/cmake/versions.cmake | 4 +- build/mora/glide.lock | 18 ++---- build/mora/glide.yaml | 10 +-- build/scripts/outdated | 4 +- config/default.yml | 9 +-- src/api-umbrella/utils/mongo.lua | 37 ++++++++--- .../web-app/config/application.rb | 22 +++++++ src/api-umbrella/web-app/config/mongoid.yml | 19 ------ templates/etc/mora.properties.mustache | 1 + test/proxy/test_mongodb_replica_set.rb | 64 ++++++++++++++----- .../api_umbrella_test_helpers/setup.rb | 6 +- 14 files changed, 126 insertions(+), 82 deletions(-) delete mode 100644 src/api-umbrella/web-app/config/mongoid.yml diff --git a/Gemfile b/Gemfile index a25a44cb2..2caa8a97c 100644 --- a/Gemfile +++ b/Gemfile @@ -64,5 +64,8 @@ gem "lazyhash", "~> 0.1.1" # Generating fake strings and data. gem "faker", "~> 1.6.6" +# Concurrency helpers. +gem "concurrent-ruby", "~> 1.0.2" + # Debug printing gem "awesome_print", "~> 1.7.0" diff --git a/Gemfile.lock b/Gemfile.lock index 0c483a37f..a058ac52a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -146,6 +146,7 @@ DEPENDENCIES capybara (~> 2.10.1) capybara-screenshot (~> 1.0.14) childprocess (~> 0.5.9) + concurrent-ruby (~> 1.0.2) database_cleaner (~> 1.5.3) elasticsearch (~> 2.0.0) elasticsearch-persistence (~> 0.1.9) @@ -168,4 +169,4 @@ DEPENDENCIES typhoeus (~> 1.1.0) BUNDLED WITH - 1.13.1 + 1.13.6 diff --git a/build/cmake/mora.cmake b/build/cmake/mora.cmake index a8d9ffaa7..0e50e90b9 100644 --- a/build/cmake/mora.cmake +++ b/build/cmake/mora.cmake @@ -26,15 +26,17 @@ set(GLIDE_SOURCE_DIR ${SOURCE_DIR}) ExternalProject_Add( mora DEPENDS glide - URL https://github.com/emicklei/mora/archive/${MORA_VERSION}.tar.gz + # Use fork for read preference support: + # https://github.com/emicklei/mora/pull/44 + URL https://github.com/GUI/mora/archive/${MORA_VERSION}.tar.gz URL_HASH MD5=${MORA_HASH} SOURCE_DIR ${WORK_DIR}/gocode/src/github.com/emicklei/mora BUILD_IN_SOURCE 1 CONFIGURE_COMMAND "" BUILD_COMMAND cp ${CMAKE_SOURCE_DIR}/build/mora/glide.yaml /glide.yaml COMMAND cp ${CMAKE_SOURCE_DIR}/build/mora/glide.lock /glide.lock - COMMAND env PATH=${GOLANG_SOURCE_DIR}/bin:${GLIDE_SOURCE_DIR}:${WORK_DIR}/gocode/bin:$ENV{PATH} GOPATH=${WORK_DIR}/gocode GOROOT=${GOLANG_SOURCE_DIR} GO15VENDOREXPERIMENT=1 glide install - COMMAND env PATH=${GOLANG_SOURCE_DIR}/bin:${GLIDE_SOURCE_DIR}:${WORK_DIR}/gocode/bin:$ENV{PATH} GOPATH=${WORK_DIR}/gocode GOROOT=${GOLANG_SOURCE_DIR} GO15VENDOREXPERIMENT=1 go install + COMMAND env PATH=${GOLANG_SOURCE_DIR}/bin:${GLIDE_SOURCE_DIR}:${WORK_DIR}/gocode/bin:$ENV{PATH} GOPATH=${WORK_DIR}/gocode GOROOT=${GOLANG_SOURCE_DIR} glide install + COMMAND env PATH=${GOLANG_SOURCE_DIR}/bin:${GLIDE_SOURCE_DIR}:${WORK_DIR}/gocode/bin:$ENV{PATH} GOPATH=${WORK_DIR}/gocode GOROOT=${GOLANG_SOURCE_DIR} go install INSTALL_COMMAND install -D -m 755 ${WORK_DIR}/gocode/bin/mora ${STAGE_EMBEDDED_DIR}/bin/mora ) ExternalProject_Add_Step( diff --git a/build/cmake/versions.cmake b/build/cmake/versions.cmake index 75d5b790b..0c0c5eb34 100644 --- a/build/cmake/versions.cmake +++ b/build/cmake/versions.cmake @@ -68,8 +68,8 @@ set(MAILHOG_HASH 6602fd7f69276e7efba310362e958133) set(MONGO_ORCHESTRATION_VERSION 0.6.7) set(MONGODB_VERSION 3.2.11) set(MONGODB_HASH 9916a076bd2e2fa8e8fbad94bb083fae) -set(MORA_VERSION 4cae0b86a440356cc3b669fb76343ac514c99655) -set(MORA_HASH 6764886ca9b8c5302e93597c4500bfd3) +set(MORA_VERSION 02c69fb82839e4fc2c8415c763f1934b7cf7dd4f) +set(MORA_HASH 8030e2869ac1e9b6ef90c770cd4e946f) set(NGX_DYUPS_VERSION d4b3e053dee10e2879882eb4c346ac7d534e2d14) set(NGX_DYUPS_HASH bdf4408599602afa38365a426e126d21) set(NGX_TXID_VERSION f1c197cb9c42e364a87fbb28d5508e486592ca42) diff --git a/build/mora/glide.lock b/build/mora/glide.lock index 9c154b2b4..bd89a6df9 100644 --- a/build/mora/glide.lock +++ b/build/mora/glide.lock @@ -1,5 +1,5 @@ -hash: 1ff0351176d51da4f684e0e7621f3ee5fce0c74c44d7460be4a8107a4a549ee8 -updated: 2016-03-26T17:43:18.223474817-06:00 +hash: f341223f9b59f72bf4242c21178a6e272612d03abaf460892a1bf246c9967c3f +updated: 2016-12-01T10:25:55.897154602-07:00 imports: - name: github.com/compose/mejson version: afcf51c7c640b1e09c17065d4e5b8de3259d6261 @@ -7,17 +7,13 @@ imports: version: 89af920d613f1e3f771f6460b2629632e7a36ae9 subpackages: - swagger -- name: github.com/emicklei/mora - version: fea22d544a961ef5cd66dde405cf452b1ae33d2c - subpackages: - - api/documents - - api/response - - api/statistics - - session - name: github.com/magiconair/properties version: c265cfa48dda6474e208715ca93e987829f572f8 - name: gopkg.in/mgo.v2 - version: d90005c5262a3463800497ea5a89aed5fe22c886 + version: b6121c6199b7beecde08e6853f256a6475fe8031 subpackages: - bson -devImports: [] + - internal/json + - internal/sasl + - internal/scram +testImports: [] diff --git a/build/mora/glide.yaml b/build/mora/glide.yaml index 7d26e861b..8fdc0087e 100644 --- a/build/mora/glide.yaml +++ b/build/mora/glide.yaml @@ -1,4 +1,4 @@ -package: . +package: github.com/emicklei/mora import: - package: github.com/compose/mejson - package: github.com/emicklei/go-restful @@ -7,15 +7,9 @@ import: version: ~1.1.3 subpackages: - swagger -- package: github.com/emicklei/mora - subpackages: - - api/documents - - api/response - - api/statistics - - session - package: github.com/magiconair/properties version: ~1.7.0 - package: gopkg.in/mgo.v2 - version: r2016.02.04 + version: r2016.08.01 subpackages: - bson diff --git a/build/scripts/outdated b/build/scripts/outdated index ccc0e5615..26b218b16 100755 --- a/build/scripts/outdated +++ b/build/scripts/outdated @@ -118,7 +118,9 @@ repos = { :constraint => "~> 3.2.9", }, "mora" => { - :git => "https://github.com/emicklei/mora.git", + # Use fork for read preference support: + # https://github.com/emicklei/mora/pull/44 + :git => "https://github.com/GUI/mora.git", :git_ref => "master", }, "ngx_dyups" => { diff --git a/config/default.yml b/config/default.yml index 7f412bed0..a2b0b2614 100644 --- a/config/default.yml +++ b/config/default.yml @@ -128,14 +128,7 @@ dns_resolver: retries: 3 mongodb: url: "mongodb://127.0.0.1:14001/api_umbrella" - options: - server: - auto_reconnect: true - socketOptions: - keepAlive: 500 - replset: - socketOptions: - keepAlive: 500 + read_preference: primaryPreferred embedded_server_config: processManagement: fork: false diff --git a/src/api-umbrella/utils/mongo.lua b/src/api-umbrella/utils/mongo.lua index 04f225507..179ee4081 100644 --- a/src/api-umbrella/utils/mongo.lua +++ b/src/api-umbrella/utils/mongo.lua @@ -1,8 +1,10 @@ local cjson = require "cjson" local http = require "resty.http" +local stringx = require "pl.stringx" local types = require "pl.types" local is_empty = types.is_empty +local startswith = stringx.startswith local _M = {} @@ -68,22 +70,39 @@ local function perform_query(path, query_options, http_options) local response, err = try_query(path, http_options) - -- If we get an "EOF" error from Mora, this means our query occurred during - -- the middle of a server or replicaset change. In this case, retry the - -- request a couple more times. + -- If we certain types of errors from Mora, this means our query occurred + -- during the middle of a server or replicaset change. In this case, retry + -- the request a few more times. -- -- This should be less likely in mora since -- https://github.com/emicklei/mora/pull/29, but it's still possible for this -- to crop up if the socket gets closed sometime between the request starting - -- and the query actually executing. After more research, this seems to be + -- and the query actually executing. This can also happen in case of + -- unexpected mongod shutdowns. After more research, this seems to be -- expected mgo behavior, and it's up to the app to handle these type of -- errors. I'm not entirely sure whether we should try to address the issue -- in mora itself, but in the meantime, we'll retry here. - if err and err == "mongodb error: EOF" then - response, err = try_query(path, http_options) - if err and err == "mongodb error: EOF" then - ngx.sleep(0.5) - response, err = try_query(path, http_options) + if err then + -- Loop to retry a few times until no errors occurs or we give up, since we + -- don't want to wait forever. + local retries = 0 + while err and retries < 5 do + if err == "mongodb error: EOF" + or err == "mongodb error: node is recovering" + or err == "mongodb error: interrupted at shutdown" + or err == "mongodb error: Closed explicitly" + or startswith(err, "mongodb error: read tcp") + then + -- Retry immediately, then sleep between further retries. + retries = retries + 1 + if retries > 1 then + ngx.sleep(0.5) + end + + response, err = try_query(path, http_options) + else + break + end end end diff --git a/src/api-umbrella/web-app/config/application.rb b/src/api-umbrella/web-app/config/application.rb index bdc28c877..dec403d6d 100644 --- a/src/api-umbrella/web-app/config/application.rb +++ b/src/api-umbrella/web-app/config/application.rb @@ -87,6 +87,28 @@ class Application < Rails::Application require "js_locale_helper" end + # Instead of loading from a mongoid.yml file, load the Mongoid config in + # code, where it's easier to merge settings from our API Umbrella + # configuration. + initializer "mongoid-config", :after => "mongoid.load-config" do + config = { + :clients => { + :default => { + :uri => ApiUmbrellaConfig[:mongodb][:url], + :options => { + :read => { + :mode => ApiUmbrellaConfig[:mongodb][:read_preference].underscore.to_sym, + }, + }, + }, + }, + } + + Mongoid::Clients.disconnect + Mongoid::Clients.clear + Mongoid.load_configuration(config) + end + # Settings in config/environments/* take precedence over those specified here. # Application configuration should go into files in config/initializers # -- all .rb files in that directory are automatically loaded. diff --git a/src/api-umbrella/web-app/config/mongoid.yml b/src/api-umbrella/web-app/config/mongoid.yml deleted file mode 100644 index 62774c088..000000000 --- a/src/api-umbrella/web-app/config/mongoid.yml +++ /dev/null @@ -1,19 +0,0 @@ -defaults: &defaults - clients: - default: - uri: <%= ApiUmbrellaConfig[:mongodb][:url] %> - -development: - <<: *defaults - -test: - clients: - default: - uri: <%= ApiUmbrellaConfig[:mongodb][:url] %> - options: - read: - mode: :primary - max_pool_size: 1 - -production: - <<: *defaults diff --git a/templates/etc/mora.properties.mustache b/templates/etc/mora.properties.mustache index f092f802f..7cf43d238 100644 --- a/templates/etc/mora.properties.mustache +++ b/templates/etc/mora.properties.mustache @@ -1,4 +1,5 @@ http.server.host={{mora.host}} http.server.port={{mora.port}} mongod.api_umbrella.uri={{mongodb.url}} +mongod.api_umbrella.mode={{mongodb.read_preference}} mongod.api_umbrella.timeout={{mora.timeout}} diff --git a/test/proxy/test_mongodb_replica_set.rb b/test/proxy/test_mongodb_replica_set.rb index b03a43667..3bfa43f2a 100644 --- a/test/proxy/test_mongodb_replica_set.rb +++ b/test/proxy/test_mongodb_replica_set.rb @@ -35,9 +35,9 @@ def setup Mongoid::Clients.disconnect Mongoid::Clients.clear Mongoid.load_configuration({ - "clients" => { - "default" => { - "uri" => mongodb_url, + :clients => { + :default => { + :uri => mongodb_url, }, }, }) @@ -71,9 +71,9 @@ def after_all Mongoid::Clients.disconnect Mongoid::Clients.clear Mongoid.load_configuration({ - "clients" => { - "default" => { - "uri" => $config["mongodb"]["url"], + :clients => { + :default => { + :uri => $config["mongodb"]["url"], }, }, }) @@ -101,15 +101,29 @@ def test_no_dropped_connections_during_replica_set_elections })) assert_response_code(403, response) + # Pre-create an array of unique API keys to be used during tests. + # + # If more than 100 unique API keys are needed during the test, the number + # we create may need to be increased, but currently we're using far less + # than this on average. + # + # We do this before the tests start, so we're not dealing with the + # edge-case of inserts being attempted right as a primary server changes or + # shuts down. We're more interested with testing the read-only + # functionality of the proxy when the primary changes (eg, that the proxy + # can continue querying for valid API keys). + @users = Concurrent::Array.new(FactoryGirl.create_list(:api_user, 100, { + :settings => { + :rate_limit_mode => "unlimited", + }, + })) + # Perform parallel requests constantly in the background of this tests. # This ensures that no connections are dropped during any point of the # replica set changes we'll make later on. request_thread = Thread.new do - user = FactoryGirl.create(:api_user, { - :settings => { - :rate_limit_mode => "unlimited", - }, - }) + # Pop a new unique API key off to use for this set of tests. + user = @users.shift loop do hydra = Typhoeus::Hydra.new(:max_concurrency => 5) @@ -165,6 +179,7 @@ def test_no_dropped_connections_during_replica_set_elections mongo_orchestration(:patch, "/v1/replica_sets/test-cluster/members/#{@initial_primary_replica_id}", { :rsParams => { :priority => 99 }, }) + wait_for_num_tests(100) request_thread.exit end @@ -200,11 +215,8 @@ def wait_for_primary_change # just slow down during replica set changes (in which case the background # requests might not end up performing many requests). def wait_for_num_tests(count) - user = FactoryGirl.create(:api_user, { - :settings => { - :rate_limit_mode => "unlimited", - }, - }) + # Pop a new unique API key off to use for this set of tests. + user = @users.shift hydra = Typhoeus::Hydra.new(:max_concurrency => 5) count.times do @@ -219,7 +231,12 @@ def wait_for_num_tests(count) hydra.run end + class MongoOrchestrationError < StandardError + end + def mongo_orchestration(http_method, path, data = {}) + retries ||= 0 + http_opts = http_options.merge(:method => http_method) if(data.present?) http_opts.deep_merge!({ @@ -229,10 +246,23 @@ def mongo_orchestration(http_method, path, data = {}) end response = Typhoeus::Request.new("http://127.0.0.1:13089#{path}", http_opts).run - assert_response_code(200, response) + if(response.code != 200) + raise MongoOrchestrationError + end assert_equal("application/json", response.headers["content-type"]) MultiJson.load(response.body) + rescue MongoOrchestrationError + # If the request to mongo-orchestration failed, retry a few times. This can + # happen in certain cases when mongo-orchestration's Python client also + # gets confused by the ongoing replicaset changes. + retries += 1 + if(retries <= 4) + sleep 0.5 + retry + else + assert_response_code(200, response) + end end def setup_mongo_orchestration diff --git a/test/support/api_umbrella_test_helpers/setup.rb b/test/support/api_umbrella_test_helpers/setup.rb index 60a494813..65e749341 100644 --- a/test/support/api_umbrella_test_helpers/setup.rb +++ b/test/support/api_umbrella_test_helpers/setup.rb @@ -52,9 +52,9 @@ def setup_server self.setup_mutex.synchronize do unless self.setup_complete Mongoid.load_configuration({ - "clients" => { - "default" => { - "uri" => $config["mongodb"]["url"], + :clients => { + :default => { + :uri => $config["mongodb"]["url"], }, }, })