Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache byte range requests #215

Merged
merged 6 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile.buildkit.plus
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ ENV XSLT_VERSION 30-1

ENV PROXY_CACHE_MAX_SIZE "10g"
ENV PROXY_CACHE_INACTIVE "60m"
ENV PROXY_CACHE_SLICE_SIZE "1m"
ENV PROXY_CACHE_VALID_OK "1h"
ENV PROXY_CACHE_VALID_NOTFOUND "1m"
ENV PROXY_CACHE_VALID_FORBIDDEN "30s"
Expand Down
1 change: 1 addition & 0 deletions Dockerfile.oss
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ ENV NJS_VERSION "0.8.2"

ENV PROXY_CACHE_MAX_SIZE "10g"
ENV PROXY_CACHE_INACTIVE "60m"
ENV PROXY_CACHE_SLICE_SIZE "1m"
ENV PROXY_CACHE_VALID_OK "1h"
ENV PROXY_CACHE_VALID_NOTFOUND "1m"
ENV PROXY_CACHE_VALID_FORBIDDEN "30s"
Expand Down
1 change: 1 addition & 0 deletions Dockerfile.plus
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ ENV XSLT_VERSION 30-1

ENV PROXY_CACHE_MAX_SIZE "10g"
ENV PROXY_CACHE_INACTIVE "60m"
ENV PROXY_CACHE_SLICE_SIZE "1m"
ENV PROXY_CACHE_VALID_OK "1h"
ENV PROXY_CACHE_VALID_NOTFOUND "1m"
ENV PROXY_CACHE_VALID_FORBIDDEN "30s"
Expand Down
7 changes: 6 additions & 1 deletion common/etc/nginx/include/s3gateway.js
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,12 @@ function redirectToS3(r) {
} else if (!ALLOW_LISTING && !PROVIDE_INDEX_PAGE && uriPath === "/") {
r.internalRedirect("@error404");
} else {
r.internalRedirect("@s3");
if (r.headersIn["Range"]) {
r.internalRedirect("@s3_sliced");
} else {
r.internalRedirect("@s3");
}

}
}

Expand Down
2 changes: 2 additions & 0 deletions common/etc/nginx/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ env APPEND_SLASH_FOR_POSSIBLE_DIRECTORY;
env DIRECTORY_LISTING_PATH_PREFIX;
env PROXY_CACHE_MAX_SIZE;
env PROXY_CACHE_INACTIVE;
env PROXY_CACHE_SLICE_SIZE;
env PROXY_CACHE_VALID_OK;
env PROXY_CACHE_SLICE_SIZE;
env PROXY_CACHE_VALID_NOTFOUND;
env PROXY_CACHE_VALID_FORBIDDEN;
env HEADER_PREFIXES_TO_STRIP;
Expand Down
8 changes: 8 additions & 0 deletions common/etc/nginx/templates/cache.conf.template
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,11 @@ keys_zone=s3_cache:10m
max_size=$PROXY_CACHE_MAX_SIZE
inactive=$PROXY_CACHE_INACTIVE
use_temp_path=off;


proxy_cache_path /var/cache/nginx/s3_proxy_slices
levels=1:2
keys_zone=s3_cache_slices:10m
max_size=$PROXY_CACHE_MAX_SIZE
inactive=$PROXY_CACHE_INACTIVE
use_temp_path=off;
57 changes: 14 additions & 43 deletions common/etc/nginx/templates/default.conf.template
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ server {
# CORS is implemented by returning the appropriate headers as part of
# the response to an OPTIONS request. If you want to customize the
# CORS response, the cors.conf.template file can be overwritten and
# extended to meet one's needs.
# extended to meet your needs.
include /etc/nginx/conf.d/gateway/cors.conf;

auth_request /aws/credentials/retrieve;
Expand All @@ -101,51 +101,22 @@ server {
include /etc/nginx/conf.d/gateway/js_fetch_trusted_certificate.conf;
}

# This is the primary location that proxies the request to s3
# See the included s3_location_common.conf file for all logic
location @s3 {
# We include only the headers needed for the authentication signatures that
# we plan to use.
include /etc/nginx/conf.d/gateway/v${AWS_SIGS_VERSION}_headers.conf;

# The CORS configuration needs to be imported in several places in order for
# it to be applied within different contexts.
include /etc/nginx/conf.d/gateway/cors.conf;

# Don't allow any headers from the client - we don't want them messing
# with S3 at all.
proxy_pass_request_headers off;

# Enable passing of the server name through TLS Server Name Indication extension.
proxy_ssl_server_name on;
proxy_ssl_name ${S3_SERVER};

# Set the Authorization header to the AWS Signatures credentials
proxy_set_header Authorization $s3auth;
proxy_set_header X-Amz-Security-Token $awsSessionToken;

# We set the host as the bucket name to inform the S3 API of the bucket
proxy_set_header Host $s3_host_hdr;

# Use keep alive connections in order to improve performance
proxy_http_version 1.1;
proxy_set_header Connection '';

# We strip off all of the AWS specific headers from the server so that
# there is nothing identifying the object as having originated in an
# object store.
js_header_filter s3gateway.editHeaders;

# Catch all errors from S3 and sanitize them so that the user can't
# gain intelligence about the S3 bucket being proxied.
proxy_intercept_errors on;

# Comment out this line to receive the error messages returned by S3
error_page 400 401 402 403 405 406 407 408 409 410 411 412 413 414 415 416 417 418 420 422 423 424 426 428 429 431 444 449 450 451 500 501 502 503 504 505 506 507 508 509 510 511 =404 @error404;

error_page 404 @trailslashControl;
include /etc/nginx/conf.d/gateway/s3_location_common.conf;
}

proxy_pass ${S3_SERVER_PROTO}://storage_urls$s3uri;
# Same as the primary location above but handling and caching
# byte range requests efficiently
location @s3_sliced {
proxy_cache s3_cache_slices;
proxy_cache_valid 200 302 206 ${PROXY_CACHE_VALID_OK};
proxy_cache_key "$request_method$host$uri$slice_range";

include /etc/nginx/conf.d/gateway/s3_location.conf;
slice ${PROXY_CACHE_SLICE_SIZE};
proxy_set_header Range $slice_range;
include /etc/nginx/conf.d/gateway/s3_location_common.conf;
}

location @s3PreListing {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# We include only the headers needed for the authentication signatures that
# we plan to use.
include /etc/nginx/conf.d/gateway/v${AWS_SIGS_VERSION}_headers.conf;

# The CORS configuration needs to be imported in several places in order for
# it to be applied within different contexts.
include /etc/nginx/conf.d/gateway/cors.conf;

# Don't allow any headers from the client - we don't want them messing
# with S3 at all.
proxy_pass_request_headers off;

# Enable passing of the server name through TLS Server Name Indication extension.
proxy_ssl_server_name on;
proxy_ssl_name ${S3_SERVER};

# Set the Authorization header to the AWS Signatures credentials
proxy_set_header Authorization $s3auth;
proxy_set_header X-Amz-Security-Token $awsSessionToken;

# We set the host as the bucket name to inform the S3 API of the bucket
proxy_set_header Host $s3_host_hdr;

# Use keep alive connections in order to improve performance
proxy_http_version 1.1;
proxy_set_header Connection '';

# We strip off all of the AWS specific headers from the server so that
# there is nothing identifying the object as having originated in an
# object store.
js_header_filter s3gateway.editHeaders;

# Catch all errors from S3 and sanitize them so that the user can't
# gain intelligence about the S3 bucket being proxied.
proxy_intercept_errors on;

# Comment out this line to receive the error messages returned by S3
error_page 400 401 402 403 405 406 407 408 409 410 411 412 413 414 415 416 417 418 420 422 423 424 426 428 429 431 444 449 450 451 500 501 502 503 504 505 506 507 508 509 510 511 =404 @error404;

error_page 404 @trailslashControl;

proxy_pass ${S3_SERVER_PROTO}://storage_urls$s3uri;

include /etc/nginx/conf.d/gateway/s3_location.conf;
14 changes: 13 additions & 1 deletion docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ running as a Container or as a Systemd service.
| `DIRECTORY_LISTING_PATH_PREFIX` | No | | | In `ALLOW_DIRECTORY_LIST=true` mode [adds defined prefix to links](#configuring-directory-listing) |
| `DNS_RESOLVERS` | No | | | DNS resolvers (separated by single spaces) to configure NGINX with |
| `PROXY_CACHE_MAX_SIZE` | No | | `10g` | Limits cache size |
| `PROXY_CACHE_INACTIVE` | No | | `60m` | Cached data that are not accessed during the time specified by the parameter get removed from the cache regardless of their freshness |
| `PROXY_CACHE_INACTIVE` | No | | `60m` | Cached data that are not accessed during the time specified by the parameter get removed from the cache regardless of their freshness
| `PROXY_CACHE_SLICE_SIZE` | No | | `1m` | For requests with a `Range` header included, determines the size of the chunks in which the file is fetched. Values much smaller than the requests can lead to inefficiencies due to reading and writing many files. See [below for more details](#byte-range-requests-and-caching) | |
| `PROXY_CACHE_VALID_OK` | No | | `1h` | Sets caching time for response code 200 and 302 |
| `PROXY_CACHE_VALID_NOTFOUND` | No | | `1m` | Sets caching time for response code 404 |
| `PROXY_CACHE_VALID_FORBIDDEN` | No | | `30s` | Sets caching time for response code 403 |
Expand Down Expand Up @@ -112,6 +113,17 @@ S3 bucket in a subfolder on an ALB. For example, if you wanted to expose the
root of a bucket under the path "www.mysite.com/somepath", you would set this
variable to "/somepath".

## Byte-Range Requests and Caching
The gateway caches [byte-range](https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests) (requests sent with a `Range` header) requests differently than normal requests.

The gateway is configured to cache such requests in chunks of size `PROXY_CACHE_SLICE_SIZE`. If you don't provide this configuration value it will default to 1 megabyte.

This means that if you request 2.5 megabytes of a 1 gigabyte file, the gateway will cache 3 megabytes and nothing else.

Setting your slice size too small can have performance impacts since NGINX performs a subrequest for each slice. For more details see the [official reference](http://nginx.org/en/docs/http/ngx_http_slice_module.html).

You may make byte-range requests and normal requests for the same file and NGINX will automatically handle them differently. The caches for file chunks and normal file requests are separate on disk.

## Running as a Systemd Service

An [install script](/standalone_ubuntu_oss_install.sh) for the gateway shows
Expand Down
1 change: 1 addition & 0 deletions settings.example
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ PROVIDE_INDEX_PAGE=false
APPEND_SLASH_FOR_POSSIBLE_DIRECTORY=false
DIRECTORY_LISTING_PATH_PREFIX=""
PROXY_CACHE_MAX_SIZE=10g
ENV PROXY_CACHE_SLICE_SIZE="1m"
PROXY_CACHE_INACTIVE=60m
PROXY_CACHE_VALID_OK=1h
PROXY_CACHE_VALID_NOTFOUND=1m
Expand Down
3 changes: 3 additions & 0 deletions standalone_ubuntu_oss_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ echo "Directory Listing Enabled: ${ALLOW_DIRECTORY_LIST}"
echo "Directory Listing path prefix: ${DIRECTORY_LISTING_PATH_PREFIX}"
echo "Cache size limit: ${PROXY_CACHE_MAX_SIZE}"
echo "Cache inactive timeout: ${PROXY_CACHE_INACTIVE}"
echo "Slice of slice for byte range requests: ${PROXY_CACHE_SLICE_SIZE}"
echo "Proxy Caching Time for Valid Response: ${PROXY_CACHE_VALID_OK}"
echo "Proxy Caching Time for Not Found Response: ${PROXY_CACHE_VALID_NOTFOUND}"
echo "Proxy Caching Time for Forbidden Response: ${PROXY_CACHE_VALID_FORBIDDEN}"
Expand Down Expand Up @@ -167,6 +168,8 @@ DEBUG=${DEBUG:-'false'}
PROXY_CACHE_MAX_SIZE=${PROXY_CACHE_MAX_SIZE:-'10g'}
# Cached data that are not accessed during the time get removed
PROXY_CACHE_INACTIVE=${PROXY_CACHE_INACTIVE:-'60m'}
# Request slice size
PROXY_CACHE_SLICE_SIZE=${PROXY_CACHE_SLICE_SIZE:-'1m'}
# Proxy caching time for response code 200 and 302
PROXY_CACHE_VALID_OK=${PROXY_CACHE_VALID_OK:-'1h'}
# Proxy caching time for response code 404
Expand Down
1 change: 1 addition & 0 deletions test/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ services:
AWS_SIGS_VERSION:
STATIC_SITE_HOSTING:
PROXY_CACHE_MAX_SIZE: "10g"
PROXY_CACHE_SLICE_SIZE: "1m"
PROXY_CACHE_INACTIVE: "60m"
PROXY_CACHE_VALID_OK: "1h"
PROXY_CACHE_VALID_NOTFOUND: "1m"
Expand Down
33 changes: 32 additions & 1 deletion test/integration/test_api.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ if ! [ -x "${checksum_cmd}" ]; then
exit ${no_dep_exit_code}
fi


file_convert_command="$(command -v dd || true)"

if ! [ -x "${file_convert_command}" ]; then
e "required dependency not found: dd not found in the path or not executable"
exit ${no_dep_exit_code}
fi

# If we are using the `md5` executable
# then use the -r flag which makes it behave the same as `md5sum`
# this is done after the `-x` check for ability to execute
Expand Down Expand Up @@ -140,6 +148,27 @@ assertHttpRequestEquals() {
exit ${test_fail_exit_code}
fi
fi
# Not a real method but better than making a whole new helper or massively refactoring this one
elif [ "${method}" = "GET_RANGE" ]; then
# Call format to check for a range of byte 30 to 1000:
# assertHttpRequestEquals "GET_RANGE" "a.txt" "data/bucket-1/a.txt" 30 1000 "206"
body_data_path="${test_dir}/$3"
range_start="$4"
range_end="$5"
byte_count=$((range_end - range_start + 1)) # add one since we read through the last byte
expected_response_code="$6"

file_checksum=$(${file_convert_command} if="$body_data_path" bs=1 skip="$range_start" count="$byte_count" 2>/dev/null | ${checksum_cmd})
expected_checksum="${file_checksum:0:${checksum_length}}"

curl_checksum_output="$(${curl_cmd} -X "GET" -r "${range_start}"-"${range_end}" "${uri}" ${extra_arg} | ${checksum_cmd})"
s3_file_checksum="${curl_checksum_output:0:${checksum_length}}"

if [ "${expected_checksum}" != "${s3_file_checksum}" ]; then
e "Checksum doesn't match expectation. Request [GET ${uri} Range: "${range_start}"-"${range_end}"] Expected [${expected_checksum}] Actual [${s3_file_checksum}]"
e "curl command: ${curl_cmd} -X "GET" -r "${range_start}"-"${range_end}" "${uri}" ${extra_arg} | ${checksum_cmd}"
exit ${test_fail_exit_code}
fi
else
e "Method unsupported: [${method}]"
fi
Expand Down Expand Up @@ -175,7 +204,6 @@ if [ -n "${prefix_leading_directory_path}" ]; then
fi

# Ordinary filenames

assertHttpRequestEquals "HEAD" "a.txt" "200"
assertHttpRequestEquals "HEAD" "a.txt?some=param&that=should&be=stripped#aaah" "200"
assertHttpRequestEquals "HEAD" "b/c/d.txt" "200"
Expand All @@ -184,6 +212,9 @@ assertHttpRequestEquals "HEAD" "b/e.txt" "200"
assertHttpRequestEquals "HEAD" "b//e.txt" "200"
assertHttpRequestEquals "HEAD" "a/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.txt" "200"

# Byte range requests
assertHttpRequestEquals "GET_RANGE" 'a/plus%2Bplus.txt' "data/bucket-1/a/plus+plus.txt" 30 1000 "206"

# We try to request URLs that are properly encoded as well as URLs that
# are not properly encoded to understand what works and what does not.

Expand Down
Loading