Skip to content

Commit

Permalink
refactor to use only the unversioned cache entry
Browse files Browse the repository at this point in the history
* uses single cache key
* stores the headers in the cache to make the etag available
* validates against the etag for strict cache hit
* soft etag matches based on TTL for a stale-while-revalidate
  • Loading branch information
colinbendell committed Mar 27, 2023
1 parent 3e57809 commit b317df3
Show file tree
Hide file tree
Showing 10 changed files with 285 additions and 210 deletions.
2 changes: 1 addition & 1 deletion .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ inherit_from:
- https://shopify.github.io/ruby-style-guide/rubocop.yml

AllCops:
TargetRubyVersion: 2.4
TargetRubyVersion: 2.7
Exclude:
- vendor/bundle/**/*
4 changes: 2 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ source "https://rubygems.org"
gemspec

gem 'rails', '~> 7.0.4'
gem 'rubocop', '1.48.0', require: false
gem 'rubocop', require: false, group: :test
gem 'mocha', require: false, group: :test
gem 'simplecov', require: false, group: :test

13 changes: 5 additions & 8 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
response_bank (1.1.0)
response_bank (1.2.0)
msgpack
useragent

Expand Down Expand Up @@ -151,7 +151,7 @@ GEM
rake (13.0.6)
regexp_parser (2.7.0)
rexml (3.2.5)
rubocop (1.48.0)
rubocop (1.48.1)
json (~> 2.3)
parallel (~> 1.10)
parser (>= 3.2.0.0)
Expand All @@ -175,8 +175,6 @@ GEM
timeout (0.3.2)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
tzinfo-data (1.2023.2)
tzinfo (>= 1.0.0)
unicode-display_width (2.4.2)
useragent (0.16.10)
websocket-driver (0.7.5)
Expand All @@ -188,14 +186,13 @@ PLATFORMS
ruby

DEPENDENCIES
minitest (>= 5.13.0)
mocha (>= 1.10.0)
minitest (>= 5.18.0)
mocha
rails (~> 7.0.4)
rake
response_bank!
rubocop (= 1.48.0)
rubocop
simplecov
tzinfo-data (>= 1.2019.3)

BUNDLED WITH
2.3.11
15 changes: 7 additions & 8 deletions lib/response_bank/middleware.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@

module ResponseBank
class Middleware
# Limit the cached headers
# TODO: Make this lowercase/case-insentitive as per rfc2616 §4.2
CACHEABLE_HEADERS = ["Location", "Content-Type", "ETag", "Content-Encoding", "Last-Modified", "Cache-Control", "Expires", "Surrogate-Keys", "Cache-Tags"].freeze

REQUESTED_WITH = "HTTP_X_REQUESTED_WITH"
ACCEPT = "HTTP_ACCEPT"
USER_AGENT = "HTTP_USER_AGENT"
Expand All @@ -20,7 +24,6 @@ def call(env)
if env['cacheable.cache']
if [200, 404, 301, 304].include?(status)
headers['ETag'] = env['cacheable.key']
headers['X-Alternate-Cache-Key'] = env['cacheable.unversioned-key']

if ie_ajax_request?(env)
headers["Expires"] = "-1"
Expand All @@ -38,22 +41,18 @@ def call(env)

body_gz = ResponseBank.compress(body_string)

cached_headers = headers.slice(*CACHEABLE_HEADERS)
# Store result
cache_data = [status, headers['Content-Type'], body_gz, timestamp]
cache_data << headers['Location'] if status == 301
cache_data = [status, cached_headers, body_gz, timestamp]

ResponseBank.write_to_cache(env['cacheable.key']) do
payload = MessagePack.dump(cache_data)
ResponseBank.write_to_backing_cache_store(
env,
env['cacheable.key'],
env['cacheable.unversioned-key'],
payload,
expires_in: env['cacheable.versioned-cache-expiry'],
)

if env['cacheable.unversioned-key']
ResponseBank.write_to_backing_cache_store(env, env['cacheable.unversioned-key'], payload)
end
end

# since we had to generate the gz version above already we may
Expand Down
158 changes: 81 additions & 77 deletions lib/response_bank/response_cache_handler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ def initialize(

def run!
@env['cacheable.cache'] = true
@env['cacheable.key'] = versioned_key_hash
@env['cacheable.unversioned-key'] = unversioned_key_hash
@env['cacheable.key'] = entity_tag_hash
@env['cacheable.unversioned-key'] = cache_key_hash

ResponseBank.log(cacheable_info_dump)

Expand All @@ -41,32 +41,32 @@ def run!
end
end

def versioned_key_hash
@versioned_key_hash ||= key_hash(versioned_key)
def entity_tag_hash
@entity_tag_hash ||= hash(entity_tag)
end

def unversioned_key_hash
@unversioned_key_hash ||= key_hash(unversioned_key)
def cache_key_hash
@cache_key_hash ||= hash(cache_key)
end

private

def key_hash(key)
def hash(key)
"cacheable:#{Digest::MD5.hexdigest(key)}"
end

def versioned_key
@versioned_key ||= ResponseBank.cache_key_for(key: @key_data, version: @version_data)
def entity_tag
@entity_tag ||= ResponseBank.cache_key_for(key: @key_data, version: @version_data)
end

def unversioned_key
@unversioned_key ||= ResponseBank.cache_key_for(key: @key_data)
def cache_key
@cache_key ||= ResponseBank.cache_key_for(key: @key_data)
end

def cacheable_info_dump
log_info = [
"Raw cacheable.key: #{versioned_key}",
"cacheable.key: #{versioned_key_hash}",
"Raw cacheable.key: #{entity_tag}",
"cacheable.key: #{entity_tag_hash}",
]

if @env['HTTP_IF_NONE_MATCH']
Expand All @@ -78,68 +78,32 @@ def cacheable_info_dump

def try_to_serve_from_cache
# Etag
response = serve_from_browser_cache(versioned_key_hash)

response = serve_from_browser_cache(entity_tag_hash, @env['HTTP_IF_NONE_MATCH'])
return response if response

# Memcached
response = if @serve_unversioned
serve_from_cache(unversioned_key_hash, "Cache hit: server (unversioned)")
else
serve_from_cache(versioned_key_hash, "Cache hit: server")
end

response = serve_from_cache(cache_key_hash, entity_tag_hash, @cache_age_tolerance)
return response if response

@env['cacheable.locked'] ||= false

if @env['cacheable.locked'] || ResponseBank.acquire_lock(versioned_key_hash)
# execute if we can get the lock
@env['cacheable.locked'] = true
elsif serving_from_noncurrent_but_recent_version_acceptable?
# serve a stale version
response = serve_from_cache(unversioned_key_hash, "Cache hit: server (recent)", @cache_age_tolerance)

return response if response
end

# No cache hit; this request cannot be handled from cache.
# Yield to the controller and mark for writing into cache.
refill_cache
end

def serving_from_noncurrent_but_recent_version_acceptable?
@cache_age_tolerance > 0
end

def serve_from_browser_cache(cache_key_hash)
# Support for Etag variations including:
# If-None-Match: abc
# If-None-Match: "abc"
# If-None-Match: W/"abc"
# If-None-Match: "abc", "def"
if (if_none_match = @env["HTTP_IF_NONE_MATCH"])
etags = if_none_match.split(",")
etags.each do |tag|
tag.sub!(/\"?\s*\z/, "")
tag.sub!(/\A\s*(W\/)?\"?/, "")
end

if etags.include?(cache_key_hash)
@env['cacheable.miss'] = false
@env['cacheable.store'] = 'client'
def serve_from_browser_cache(entity_tag, if_none_match)
if etag_matches?(entity_tag, if_none_match)
@env['cacheable.miss'] = false
@env['cacheable.store'] = 'client'

@headers.delete('Content-Type')
@headers.delete('Content-Length')
@headers.delete('Content-Type')
@headers.delete('Content-Length')

ResponseBank.log("Cache hit: client")
ResponseBank.log("Cache hit: client")

[304, @headers, []]
end
[304, @headers, []]
end
end

def serve_from_cache(cache_key_hash, message, cache_age_tolerance = nil)
def serve_from_cache(cache_key_hash, match_entity_tag = "*", cache_age_tolerance = nil)
raw = ResponseBank.read_from_backing_cache_store(@env, cache_key_hash, backing_cache_store: @cache_store)

if raw
Expand All @@ -148,37 +112,77 @@ def serve_from_cache(cache_key_hash, message, cache_age_tolerance = nil)
@env['cacheable.miss'] = false
@env['cacheable.store'] = 'server'

status, content_type, body, timestamp, location = hit
status, headers, body, timestamp, location = hit

if cache_age_tolerance && page_too_old?(timestamp, cache_age_tolerance)
ResponseBank.log("Found an unversioned cache entry, but it was too old (#{timestamp})")

nil
else
@headers['Content-Type'] = content_type
# polyfill headers for legacy versions
headers = { 'Content-Type' => headers.to_s } if headers.is_a? String
headers['Location'] = location if location

@headers['Location'] = location if location
@env['cacheable.locked'] ||= false

if @env["gzip"]
@headers['Content-Encoding'] = "gzip"
# to preserve the unversioned/versioned logging messages from past releases we split the match_entity_tag test
if match_entity_tag == "*"
ResponseBank.log("Cache hit: server (unversioned)")
# page tolerance only applies for versioned + etag mismatch
elsif etag_matches?(headers['ETag'], match_entity_tag)
ResponseBank.log("Cache hit: server")
else
# cache miss; check to see if any parallel requests already are regenerating the cache
if ResponseBank.acquire_lock(match_entity_tag)
# execute if we can get the lock
@env['cacheable.locked'] = true
return nil
elsif stale_while_revalidate?(timestamp, cache_age_tolerance)
# cache is being regenerated, can we avoid piling on and use a stale version in the interim?
ResponseBank.log("Cache hit: server (recent)")
else
# we have to uncompress because the client doesn't support gzip
ResponseBank.log("uncompressing for client without gzip")
body = ResponseBank.decompress(body)
ResponseBank.log("Found an unversioned cache entry, but it was too old (#{timestamp})")
return nil
end
end

ResponseBank.log(message)
# version check
# unversioned but tolerance threshold
# regen
@headers = @headers.merge(headers)

[status, @headers, [body]]
if @env["gzip"]
@headers['Content-Encoding'] = "gzip"
else
# we have to uncompress because the client doesn't support gzip
ResponseBank.log("uncompressing for client without gzip")
body = ResponseBank.decompress(body)
end
[status, @headers, [body]]
end
end

def page_too_old?(timestamp, cache_age_tolerance)
!timestamp || timestamp < (Time.now.to_i - cache_age_tolerance)
def etag_matches?(entity_tag, if_none_match)
# Support for Etag variations including:
# If-None-Match: abc
# If-None-Match: "abc"
# If-None-Match: W/"abc"
# If-None-Match: "abc", "def"
# If-None-Match: "*"
return false unless entity_tag
return false unless if_none_match

# strictly speaking an unquoted etag is not valid, yet common
# to avoid unintended greedy matches in we check for naked entity then includes with quoted entity values
if_none_match == "*" || if_none_match == entity_tag || if_none_match.include?(%{"#{entity_tag}"})
end

def stale_while_revalidate?(timestamp, cache_age_tolerance)
return false if !cache_age_tolerance
return false if !timestamp

timestamp >= (Time.now.to_i - cache_age_tolerance)
end

def refill_cache
# non cache hits do not yet have the lock
ResponseBank.acquire_lock(entity_tag_hash) unless @env['cacheable.locked']
@env['cacheable.locked'] = true
@env['cacheable.miss'] = true

ResponseBank.log("Refilling cache")
Expand Down
2 changes: 1 addition & 1 deletion lib/response_bank/version.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# frozen_string_literal: true
module ResponseBank
VERSION = "1.1.0"
VERSION = "1.2.0"
end
9 changes: 4 additions & 5 deletions response_bank.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,15 @@ Gem::Specification.new do |s|
s.files = Dir["lib/**/*.rb", "README.md", "LICENSE.txt"]
s.require_paths = ["lib"]

s.required_ruby_version = ">= 2.4.0"
s.required_ruby_version = ">= 2.7.0"

s.metadata["allowed_push_host"] = "https://rubygems.org"

s.add_runtime_dependency("useragent")
s.add_runtime_dependency("msgpack")

s.add_development_dependency("minitest", ">= 5.13.0")
s.add_development_dependency("mocha", ">= 1.10.0")
s.add_development_dependency("minitest", ">= 5.18.0")
s.add_development_dependency("mocha", ">= 2.0.0")
s.add_development_dependency("rake")
s.add_development_dependency("rails", ">= 5.0")
s.add_development_dependency("tzinfo-data", ">= 1.2019.3")
s.add_development_dependency("rails", ">= 6.1")
end
5 changes: 3 additions & 2 deletions test/controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,15 @@ def test_cache_control_no_store_set_for_uncacheable_requests
def test_server_cache_hit
controller.request.env['gzip'] = false
@cache_store.expects(:read).returns(page_serialized)
ResponseBank::ResponseCacheHandler.any_instance.expects(:entity_tag_hash).returns('*').at_least_once
controller.expects(:render).with(plain: '<body>hi.</body>', status: 200)

controller.send(:response_cache) {}
end

def test_client_cache_hit
controller.request.env['HTTP_IF_NONE_MATCH'] = 'deadbeef'
ResponseBank::ResponseCacheHandler.any_instance.expects(:versioned_key_hash).returns('deadbeef').at_least_once
ResponseBank::ResponseCacheHandler.any_instance.expects(:entity_tag_hash).returns('deadbeef').at_least_once
controller.expects(:head).with(:not_modified)

controller.send(:response_cache) {}
Expand All @@ -77,6 +78,6 @@ def controller
end

def page_serialized
MessagePack.dump([200, "text/html", ResponseBank.compress("<body>hi.</body>"), 1331765506])
MessagePack.dump([200, {"Content-Type" => "text/html"}, ResponseBank.compress("<body>hi.</body>"), 1331765506])
end
end
Loading

0 comments on commit b317df3

Please sign in to comment.