-
-
Notifications
You must be signed in to change notification settings - Fork 626
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add http proxy support to request_uri() #112
Changes from all commits
0939833
5c96e1f
b0e6fc4
af8b08b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,8 @@ local tbl_concat = table.concat | |
local tbl_insert = table.insert | ||
local ngx_encode_args = ngx.encode_args | ||
local ngx_re_match = ngx.re.match | ||
local ngx_re_gmatch = ngx.re.gmatch | ||
local ngx_re_sub = ngx.re.sub | ||
local ngx_re_gsub = ngx.re.gsub | ||
local ngx_re_find = ngx.re.find | ||
local ngx_log = ngx.log | ||
|
@@ -787,7 +789,6 @@ function _M.request_pipeline(self, requests) | |
return responses | ||
end | ||
|
||
|
||
function _M.request_uri(self, uri, params) | ||
params = tbl_copy(params or {}) -- Take by value | ||
|
||
|
@@ -800,11 +801,55 @@ function _M.request_uri(self, uri, params) | |
if not params.path then params.path = path end | ||
if not params.query then params.query = query end | ||
|
||
local c, err = self:connect(host, port) | ||
-- See if we should use a proxy to make this request | ||
local proxy_uri = self:get_proxy_uri(scheme, host) | ||
|
||
-- Make the connection either through the proxy or directly | ||
-- to the remote host | ||
local c, err | ||
|
||
if proxy_uri then | ||
c, err = self:connect_proxy(proxy_uri, scheme, host, port) | ||
else | ||
c, err = self:connect(host, port) | ||
end | ||
|
||
if not c then | ||
return nil, err | ||
end | ||
|
||
if proxy_uri then | ||
if scheme == "http" then | ||
-- When a proxy is used, the target URI must be in absolute-form | ||
-- (RFC 7230, Section 5.3.2.). That is, it must be an absolute URI | ||
-- to the remote resource with the scheme, host and an optional port | ||
-- in place. | ||
-- | ||
-- Since _format_request() constructs the request line by concatenating | ||
-- params.path and params.query together, we need to modify the path | ||
-- to also include the scheme, host and port so that the final form | ||
-- in conformant to RFC 7230. | ||
if port == 80 then | ||
params.path = scheme .. "://" .. host .. path | ||
else | ||
params.path = scheme .. "://" .. host .. ":" .. port .. path | ||
end | ||
end | ||
|
||
if scheme == "https" then | ||
-- don't keep this connection alive as the next request could target | ||
-- any host and re-using the proxy tunnel for that is not possible | ||
self.keepalive = false | ||
end | ||
|
||
-- self:connect_uri() set the host and port to point to the proxy server. As | ||
-- the connection to the proxy has been established, set the host and port | ||
-- to point to the actual remote endpoint at the other end of the tunnel to | ||
-- ensure the correct Host header added to the requests. | ||
self.host = host | ||
self.port = port | ||
end | ||
|
||
if scheme == "https" then | ||
local verify = true | ||
if params.ssl_verify == false then | ||
|
@@ -914,5 +959,106 @@ function _M.proxy_response(self, response, chunksize) | |
until not chunk | ||
end | ||
|
||
function _M.set_proxy_options(self, opts) | ||
self.proxy_opts = tbl_copy(opts) -- Take by value | ||
end | ||
|
||
function _M.get_proxy_uri(self, scheme, host) | ||
if not self.proxy_opts then | ||
return nil | ||
end | ||
|
||
-- Check if the no_proxy option matches this host. Implementation adapted | ||
-- from lua-http library (https://github.com/daurnimator/lua-http) | ||
if self.proxy_opts.no_proxy then | ||
if self.proxy_opts.no_proxy == "*" then | ||
-- all hosts are excluded | ||
return nil | ||
end | ||
|
||
local no_proxy_set = {} | ||
-- wget allows domains in no_proxy list to be prefixed by "." | ||
-- e.g. no_proxy=.mit.edu | ||
for host_suffix in ngx_re_gmatch(self.proxy_opts.no_proxy, "\\.?([^,]+)") do | ||
no_proxy_set[host_suffix[1]] = true | ||
end | ||
|
||
-- From curl docs: | ||
-- matched as either a domain which contains the hostname, or the | ||
-- hostname itself. For example local.com would match local.com, | ||
-- local.com:80, and www.local.com, but not www.notlocal.com. | ||
-- | ||
-- Therefore, we keep stripping subdomains from the host, compare | ||
-- them to the ones in the no_proxy list and continue until we find | ||
-- a match or until there's only the TLD left | ||
repeat | ||
if no_proxy_set[host] then | ||
return nil | ||
end | ||
|
||
-- Strip the next level from the domain and check if that one | ||
-- is on the list | ||
host = ngx_re_sub(host, "^[^.]+\\.", "") | ||
until not ngx_re_find(host, "\\.") | ||
end | ||
|
||
if scheme == "http" and self.proxy_opts.http_proxy then | ||
return self.proxy_opts.http_proxy | ||
end | ||
|
||
if scheme == "https" and self.proxy_opts.https_proxy then | ||
return self.proxy_opts.https_proxy | ||
end | ||
|
||
return nil | ||
end | ||
|
||
|
||
function _M.connect_proxy(self, proxy_uri, scheme, host, port) | ||
-- Parse the provided proxy URI | ||
local parsed_proxy_uri, err = self:parse_uri(proxy_uri, false) | ||
if not parsed_proxy_uri then | ||
return nil, err | ||
end | ||
|
||
-- Check that the scheme is http (https is not supported for | ||
-- connections between the client and the proxy) | ||
local proxy_scheme = parsed_proxy_uri[1] | ||
if proxy_scheme ~= "http" then | ||
return nil, "protocol " .. proxy_scheme .. " not supported for proxy connections" | ||
end | ||
|
||
-- Make the connection to the given proxy | ||
local proxy_host, proxy_port = parsed_proxy_uri[2], parsed_proxy_uri[3] | ||
local c, err = self:connect(proxy_host, proxy_port) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pintsized @sjakthol It should be possible to create keepalive pool for each unique destination proxy+host.
So setting that to If my thinking is right I'm happy to propose a patch :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does that solve a real world problem? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well yes. Any high performance system has to use keepalives or is going to run out of ephemeral ports very soon. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Of course, keepalives are vital, but does having a unique pool for the proxied destination solve a real world problem? Surely the forwarding proxy is responsible for managing keepalives to the destination? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes the proxy will be responsible for connections between proxy and upstream. But the issue is between openresty and the proxy.
Imagine 100 same requests would be made serially to openresty. Openresty would open 100 connections to the proxy. And the proxy when using keepalive would open just one connection to the upstream. Now imagine 3k requests per second. That burns through ~40k ephemeral ports in less than 15 seconds. Then openresty will no longer be able to open new connections until the previous ones are recycled. Using keepalives is vital in high performance servers that connect to external tcp services. |
||
if not c then | ||
return nil, err | ||
end | ||
|
||
if scheme == "https" then | ||
-- Make a CONNECT request to create a tunnel to the destination through | ||
-- the proxy. The request-target and the Host header must be in the | ||
-- authority-form of RFC 7230 Section 5.3.3. See also RFC 7231 Section | ||
-- 4.3.6 for more details about the CONNECT request | ||
local destination = host .. ":" .. port | ||
local res, err = self:request({ | ||
method = "CONNECT", | ||
path = destination, | ||
headers = { | ||
["Host"] = destination | ||
} | ||
}) | ||
|
||
if not res then | ||
return nil, err | ||
end | ||
|
||
if res.status < 200 or res.status > 299 then | ||
return nil, "failed to establish a tunnel through a proxy: " .. res.status | ||
end | ||
end | ||
|
||
return c, nil | ||
end | ||
|
||
return _M |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this function be added to the ToC?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, good spot, please add to the ToC.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added.