From 6152fa06a405708b1ac74a774da1ad41a180b7b3 Mon Sep 17 00:00:00 2001 From: Cliff Brake Date: Tue, 11 Jan 2022 09:38:48 -0500 Subject: [PATCH 1/8] - implement basic redirect - _redirects: add support for 200 rewrite - add support for regex in redirects --- core/corehttp/gateway_handler.go | 80 +++++++++++++++++++++++++++++++- core/corehttp/redirect.go | 69 +++++++++++++++++++++++++++ core/corehttp/redirect_test.go | 37 +++++++++++++++ 3 files changed, 184 insertions(+), 2 deletions(-) create mode 100644 core/corehttp/redirect_test.go diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index 32d2eebaef8..6a996947916 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -270,6 +270,7 @@ func (i *gatewayHandler) optionsHandler(w http.ResponseWriter, r *http.Request) func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) { begin := time.Now() + urlPath := r.URL.Path logger := log.With("from", r.RequestURI) logger.Debug("http request received") @@ -319,9 +320,25 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } } - contentPath := ipath.New(r.URL.Path) + redirects, err := i.searchUpTreeForRedirects(r, urlPath) + if err == nil { + redirected, newPath, err := i.redirect(w, r, redirects) + if err != nil { + // FIXME what to do here with errors ... + } + + if redirected { + return + } + + if newPath != "" { + urlPath = newPath + } + } + + contentPath := ipath.New(urlPath) if pathErr := contentPath.IsValid(); pathErr != nil { - if fixupSuperfluousNamespace(w, r.URL.Path, r.URL.RawQuery) { + if fixupSuperfluousNamespace(w, urlPath, r.URL.RawQuery) { // the error was due to redundant namespace, which we were able to fix // by returning error/redirect page, nothing left to do here logger.Debugw("redundant namespace; noop") @@ -410,6 +427,46 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } } +// redirect returns redirected, newPath (if rewrite), error +func (i *gatewayHandler) redirect(w http.ResponseWriter, r *http.Request, path ipath.Resolved) (bool, string, error) { + node, err := i.api.Unixfs().Get(r.Context(), path) + if err != nil { + return false, "", fmt.Errorf("could not get redirects file: %v", err) + } + + defer node.Close() + + f, ok := node.(files.File) + + if !ok { + return false, "", fmt.Errorf("redirect, could not convert node to file") + } + + redirs := newRedirs(f) + + // extract "file" part of URL, typically the part after /ipfs/CID/... + g := strings.Split(r.URL.Path, "/") + + if len(g) > 3 { + filePartPath := "/" + strings.Join(g[3:], "/") + + to, code := redirs.search(filePartPath) + if code > 0 { + if code == 200 { + // rewrite + newPath := strings.Join(g[0:3], "/") + "/" + to + return false, newPath, nil + } + + // redirect + http.Redirect(w, r, to, code) + return true, "", nil + } + } + + return false, "", nil +} + func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) bool { resolved404Path, ctype, err := i.searchUpTreeFor404(r, contentPath) if err != nil { @@ -809,6 +866,25 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] return "", nil, nil } +func (i *gatewayHandler) searchUpTreeForRedirects(r *http.Request, path string) (ipath.Resolved, error) { + pathComponents := strings.Split(path, "/") + + for idx := len(pathComponents); idx >= 3; idx-- { + rdir := gopath.Join(append(pathComponents[0:idx], "_redirects")...) + rdirPath := ipath.New("/" + rdir) + if rdirPath.IsValid() != nil { + break + } + resolvedPath, err := i.api.ResolvePath(r.Context(), rdirPath) + if err != nil { + continue + } + return resolvedPath, nil + } + + return nil, fmt.Errorf("no redirects in any parent folder") +} + func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, contentPath ipath.Path) (ipath.Resolved, string, error) { filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) if err != nil { diff --git a/core/corehttp/redirect.go b/core/corehttp/redirect.go index e7b961e604e..e7538464777 100644 --- a/core/corehttp/redirect.go +++ b/core/corehttp/redirect.go @@ -1,8 +1,14 @@ package corehttp import ( + "bufio" + "fmt" + "io" "net" "net/http" + "regexp" + "strconv" + "strings" core "github.com/ipfs/go-ipfs/core" ) @@ -26,3 +32,66 @@ type redirectHandler struct { func (i *redirectHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, i.path, 302) } + +type redirLine struct { + matcher string + to string + code int +} + +func (rdl redirLine) match(s string) (bool, error) { + re, err := regexp.Compile(rdl.matcher) + if err != nil { + return false, fmt.Errorf("Failed to compile %v: %v", rdl.matcher, err) + } + + match := re.FindString(s) + if match == "" { + return false, nil + } + + return true, nil +} + +type redirs []redirLine + +func newRedirs(f io.Reader) *redirs { + ret := redirs{} + scanner := bufio.NewScanner(f) + scanner.Split(bufio.ScanLines) + for scanner.Scan() { + t := scanner.Text() + if len(t) > 0 && t[0] == '#' { + // comment, skip line + continue + } + groups := strings.Fields(scanner.Text()) + if len(groups) >= 2 { + matcher := groups[0] + to := groups[1] + // default to 302 (temporary redirect) + code := 302 + if len(groups) >= 3 { + c, err := strconv.Atoi(groups[2]) + if err == nil { + code = c + } + } + ret = append(ret, redirLine{matcher, to, code}) + } + } + + return &ret +} + +// returns "" if no redir +func (r redirs) search(path string) (string, int) { + for _, rdir := range r { + m, err := rdir.match(path) + if m && err == nil { + return rdir.to, rdir.code + } + } + + return "", 0 +} diff --git a/core/corehttp/redirect_test.go b/core/corehttp/redirect_test.go new file mode 100644 index 00000000000..0c9a6255199 --- /dev/null +++ b/core/corehttp/redirect_test.go @@ -0,0 +1,37 @@ +package corehttp + +import ( + "fmt" + "testing" +) + +func TestRedirline(t *testing.T) { + for _, tc := range []struct { + matcher string + s string + exp bool + errExp bool + }{ + {"hi", "hi", true, false}, + {"hi", "hithere", true, false}, + {"^hi$", "hithere", false, false}, + {"^hi$", "hi", true, false}, + {"hi.*", "hithere", true, false}, + {"/hi", "/hi/there", true, false}, + {"^/hi/", "/hi/there/now", true, false}, + {"^/hi/", "/hithere", false, false}, + {"^/hi/(.*", "/hi/there/now", false, true}, + } { + r := redirLine{tc.matcher, "to", 200} + ok, err := r.match(tc.s) + if ok != tc.exp { + t.Errorf("%v %v, expected %v, got %v", tc.matcher, tc.s, tc.exp, + ok) + } + + if err != nil != tc.errExp { + fmt.Printf("regexp error %v\n", err) + t.Errorf("%v %v, expected error %v, got %v", tc.matcher, tc.s, tc.errExp, err == nil) + } + } +} From 0395527156f504c55d401fae7cbe86998ce753f5 Mon Sep 17 00:00:00 2001 From: Justin Johnson Date: Fri, 1 Apr 2022 16:42:17 -0500 Subject: [PATCH 2/8] - Update getOrHeadHandler to dispatch to getOrHeadHandlerUnixFs for Unixfs reponse format - Write functions for logic in getOrHeadHandler, to make it easier to read and to enable reuse in getOrHeadHandlerUnixfs - Move Unixfs specific functions to gateway_handler_unixfs.go - Check for _redirects file if we have origin isolation --- core/corehttp/gateway_handler.go | 366 ++++++++-------------- core/corehttp/gateway_handler_unixfs.go | 231 +++++++++++++- test/sharness/t0114-gateway-subdomains.sh | 36 +++ 3 files changed, 397 insertions(+), 236 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index 6a996947916..ef042782bee 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "html/template" - "io" "mime" "net/http" "net/url" @@ -12,7 +11,6 @@ import ( gopath "path" "regexp" "runtime/debug" - "strconv" "strings" "time" @@ -28,6 +26,7 @@ import ( prometheus "github.com/prometheus/client_golang/prometheus" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" ) const ( @@ -270,147 +269,71 @@ func (i *gatewayHandler) optionsHandler(w http.ResponseWriter, r *http.Request) func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) { begin := time.Now() - urlPath := r.URL.Path logger := log.With("from", r.RequestURI) logger.Debug("http request received") - // X-Ipfs-Gateway-Prefix was removed (https://github.com/ipfs/go-ipfs/issues/7702) - // TODO: remove this after go-ipfs 0.13 ships - if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); prfx != "" { - err := fmt.Errorf("X-Ipfs-Gateway-Prefix support was removed: https://github.com/ipfs/go-ipfs/issues/7702") - webError(w, "unsupported HTTP header", err, http.StatusBadRequest) + if !validateSupportedHeaders(w, r) { return } - // ?uri query param support for requests produced by web browsers - // via navigator.registerProtocolHandler Web API - // https://developer.mozilla.org/en-US/docs/Web/API/Navigator/registerProtocolHandler - // TLDR: redirect /ipfs/?uri=ipfs%3A%2F%2Fcid%3Fquery%3Dval to /ipfs/cid?query=val - if uriParam := r.URL.Query().Get("uri"); uriParam != "" { - u, err := url.Parse(uriParam) - if err != nil { - webError(w, "failed to parse uri query parameter", err, http.StatusBadRequest) - return - } - if u.Scheme != "ipfs" && u.Scheme != "ipns" { - webError(w, "uri query parameter scheme must be ipfs or ipns", err, http.StatusBadRequest) - return - } - path := u.Path - if u.RawQuery != "" { // preserve query if present - path = path + "?" + u.RawQuery - } - - redirectURL := gopath.Join("/", u.Scheme, u.Host, path) - logger.Debugw("uri param, redirect", "to", redirectURL, "status", http.StatusMovedPermanently) - http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) + if !protocolHandlerRegistration(w, r, logger) { return } - // Service Worker registration request - if r.Header.Get("Service-Worker") == "script" { - // Disallow Service Worker registration on namespace roots - // https://github.com/ipfs/go-ipfs/issues/4025 - matched, _ := regexp.MatchString(`^/ip[fn]s/[^/]+$`, r.URL.Path) - if matched { - err := fmt.Errorf("registration is not allowed for this scope") - webError(w, "navigator.serviceWorker", err, http.StatusBadRequest) - return - } + if !serviceWorkerRegistration(w, r) { + return } - redirects, err := i.searchUpTreeForRedirects(r, urlPath) - if err == nil { - redirected, newPath, err := i.redirect(w, r, redirects) - if err != nil { - // FIXME what to do here with errors ... - } - - if redirected { - return - } + contentPath := ipath.New(r.URL.Path) + if !fixSuperfluousNamespaces(w, r, contentPath, logger) { + return + } - if newPath != "" { - urlPath = newPath - } + // Detect when explicit Accept header or ?format parameter are present + responseFormat, formatParams, err := customResponseFormat(r) + if err != nil { + webError(w, "error while processing the Accept header", err, http.StatusBadRequest) + return } + trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat)) - contentPath := ipath.New(urlPath) - if pathErr := contentPath.IsValid(); pathErr != nil { - if fixupSuperfluousNamespace(w, urlPath, r.URL.RawQuery) { - // the error was due to redundant namespace, which we were able to fix - // by returning error/redirect page, nothing left to do here - logger.Debugw("redundant namespace; noop") - return - } - // unable to fix path, returning error - webError(w, "invalid ipfs path", pathErr, http.StatusBadRequest) + // For Unixfs, when a path can't be resolved we need to check for redirects and pretty 404 page files. + if responseFormat == "" { + logger.Debugw("dispatching to getOrHeadHandlerUnixfs") + i.getOrHeadHandlerUnixfs(w, r, begin, logger) return } // Resolve path to the final DAG node for the ETag resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath) + trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String())) + switch err { case nil: case coreiface.ErrOffline: webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable) return default: - // if Accept is text/html, see if ipfs-404.html is present - if i.servePretty404IfPresent(w, r, contentPath) { - logger.Debugw("serve pretty 404 if present") - return - } - webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound) return } - // Detect when explicit Accept header or ?format parameter are present - responseFormat, formatParams, err := customResponseFormat(r) - if err != nil { - webError(w, "error while processing the Accept header", err, http.StatusBadRequest) + if i.finishEarlyForMatchingETag(w, r, resolvedPath) { return } - trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat)) - trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String())) - // Finish early if client already has matching Etag - if r.Header.Get("If-None-Match") == getEtag(r, resolvedPath.Cid()) { - w.WriteHeader(http.StatusNotModified) + if !i.updateGlobalMetrics(w, r, begin, contentPath, resolvedPath) { return } - // Update the global metric of the time it takes to read the final root block of the requested resource - // NOTE: for legacy reasons this happens before we go into content-type specific code paths - _, err = i.api.Block().Get(r.Context(), resolvedPath) - if err != nil { - webError(w, "ipfs block get "+resolvedPath.Cid().String(), err, http.StatusInternalServerError) - return - } - ns := contentPath.Namespace() - timeToGetFirstContentBlock := time.Since(begin).Seconds() - i.unixfsGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) // deprecated, use firstContentBlockGetMetric instead - i.firstContentBlockGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) - - // HTTP Headers - i.addUserHeaders(w) // ok, _now_ write user's headers. - w.Header().Set("X-Ipfs-Path", contentPath.String()) - - if rootCids, err := i.buildIpfsRootsHeader(contentPath.String(), r); err == nil { - w.Header().Set("X-Ipfs-Roots", rootCids) - } else { // this should never happen, as we resolved the contentPath already - webError(w, "error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError) + if !i.setHeaders(w, r, contentPath) { return } // Support custom response formats passed via ?format or Accept HTTP header + // Note that we handle Unixfs (e.g. responseFormat of "") above. switch responseFormat { - case "": // The implicit response format is UnixFS - logger.Debugw("serving unixfs", "path", contentPath) - i.serveUnixFs(w, r, resolvedPath, contentPath, begin, logger) - return case "application/vnd.ipld.raw": logger.Debugw("serving raw block", "path", contentPath) i.serveRawBlock(w, r, resolvedPath, contentPath, begin) @@ -418,7 +341,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request case "application/vnd.ipld.car": logger.Debugw("serving car stream", "path", contentPath) carVersion := formatParams["version"] - i.serveCar(w, r, resolvedPath, contentPath, carVersion, begin) + i.serveCar(w, r, resolvedPath, contentPath, carVersion, begin) return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) @@ -427,76 +350,6 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } } -// redirect returns redirected, newPath (if rewrite), error -func (i *gatewayHandler) redirect(w http.ResponseWriter, r *http.Request, path ipath.Resolved) (bool, string, error) { - node, err := i.api.Unixfs().Get(r.Context(), path) - if err != nil { - return false, "", fmt.Errorf("could not get redirects file: %v", err) - } - - defer node.Close() - - f, ok := node.(files.File) - - if !ok { - return false, "", fmt.Errorf("redirect, could not convert node to file") - } - - redirs := newRedirs(f) - - // extract "file" part of URL, typically the part after /ipfs/CID/... - g := strings.Split(r.URL.Path, "/") - - if len(g) > 3 { - filePartPath := "/" + strings.Join(g[3:], "/") - - to, code := redirs.search(filePartPath) - if code > 0 { - if code == 200 { - // rewrite - newPath := strings.Join(g[0:3], "/") + "/" + to - return false, newPath, nil - } - - // redirect - http.Redirect(w, r, to, code) - return true, "", nil - } - } - - return false, "", nil -} - -func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) bool { - resolved404Path, ctype, err := i.searchUpTreeFor404(r, contentPath) - if err != nil { - return false - } - - dr, err := i.api.Unixfs().Get(r.Context(), resolved404Path) - if err != nil { - return false - } - defer dr.Close() - - f, ok := dr.(files.File) - if !ok { - return false - } - - size, err := f.Size() - if err != nil { - return false - } - - log.Debugw("using pretty 404 file", "path", contentPath) - w.Header().Set("Content-Type", ctype) - w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) - w.WriteHeader(http.StatusNotFound) - _, err = io.CopyN(w, f, size) - return err == nil -} - func (i *gatewayHandler) postHandler(w http.ResponseWriter, r *http.Request) { p, err := i.api.Unixfs().Add(r.Context(), files.NewReaderFile(r.Body)) if err != nil { @@ -866,67 +719,6 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] return "", nil, nil } -func (i *gatewayHandler) searchUpTreeForRedirects(r *http.Request, path string) (ipath.Resolved, error) { - pathComponents := strings.Split(path, "/") - - for idx := len(pathComponents); idx >= 3; idx-- { - rdir := gopath.Join(append(pathComponents[0:idx], "_redirects")...) - rdirPath := ipath.New("/" + rdir) - if rdirPath.IsValid() != nil { - break - } - resolvedPath, err := i.api.ResolvePath(r.Context(), rdirPath) - if err != nil { - continue - } - return resolvedPath, nil - } - - return nil, fmt.Errorf("no redirects in any parent folder") -} - -func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, contentPath ipath.Path) (ipath.Resolved, string, error) { - filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) - if err != nil { - return nil, "", err - } - - pathComponents := strings.Split(contentPath.String(), "/") - - for idx := len(pathComponents); idx >= 3; idx-- { - pretty404 := gopath.Join(append(pathComponents[0:idx], filename404)...) - parsed404Path := ipath.New("/" + pretty404) - if parsed404Path.IsValid() != nil { - break - } - resolvedPath, err := i.api.ResolvePath(r.Context(), parsed404Path) - if err != nil { - continue - } - return resolvedPath, ctype, nil - } - - return nil, "", fmt.Errorf("no pretty 404 in any parent folder") -} - -func preferred404Filename(acceptHeaders []string) (string, string, error) { - // If we ever want to offer a 404 file for a different content type - // then this function will need to parse q weightings, but for now - // the presence of anything matching HTML is enough. - for _, acceptHeader := range acceptHeaders { - accepted := strings.Split(acceptHeader, ",") - for _, spec := range accepted { - contentType := strings.SplitN(spec, ";", 1)[0] - switch contentType { - case "*/*", "text/*", "text/html": - return "ipfs-404.html", "text/html", nil - } - } - } - - return "", "", fmt.Errorf("there is no 404 file for the requested content types") -} - // returns unquoted path with all special characters revealed as \u codes func debugStr(path string) string { q := fmt.Sprintf("%+q", path) @@ -965,3 +757,109 @@ func fixupSuperfluousNamespace(w http.ResponseWriter, urlPath string, urlQuery s ErrorMsg: fmt.Sprintf("invalid path: %q should be %q", urlPath, intendedPath.String()), }) == nil } + +func validateSupportedHeaders(w http.ResponseWriter, r *http.Request) bool { + // X-Ipfs-Gateway-Prefix was removed (https://github.com/ipfs/go-ipfs/issues/7702) + // TODO: remove this after go-ipfs 0.13 ships + if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); prfx != "" { + err := fmt.Errorf("X-Ipfs-Gateway-Prefix support was removed: https://github.com/ipfs/go-ipfs/issues/7702") + webError(w, "unsupported HTTP header", err, http.StatusBadRequest) + return false + } else { + return true + } +} + +func protocolHandlerRegistration(w http.ResponseWriter, r *http.Request, logger *zap.SugaredLogger) bool { + if uriParam := r.URL.Query().Get("uri"); uriParam != "" { + u, err := url.Parse(uriParam) + if err != nil { + webError(w, "failed to parse uri query parameter", err, http.StatusBadRequest) + return false + } + if u.Scheme != "ipfs" && u.Scheme != "ipns" { + webError(w, "uri query parameter scheme must be ipfs or ipns", err, http.StatusBadRequest) + return false + } + path := u.Path + if u.RawQuery != "" { // preserve query if present + path = path + "?" + u.RawQuery + } + + redirectURL := gopath.Join("/", u.Scheme, u.Host, path) + logger.Debugw("uri param, redirect", "to", redirectURL, "status", http.StatusMovedPermanently) + http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) + return false + } + + return true +} + +// Disallow Service Worker registration on namespace roots +// https://github.com/ipfs/go-ipfs/issues/4025 +func serviceWorkerRegistration(w http.ResponseWriter, r *http.Request) bool { + if r.Header.Get("Service-Worker") == "script" { + matched, _ := regexp.MatchString(`^/ip[fn]s/[^/]+$`, r.URL.Path) + if matched { + err := fmt.Errorf("registration is not allowed for this scope") + webError(w, "navigator.serviceWorker", err, http.StatusBadRequest) + return false + } + } + + return true +} + +func fixSuperfluousNamespaces(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, logger *zap.SugaredLogger) bool { + if pathErr := contentPath.IsValid(); pathErr != nil { + if fixupSuperfluousNamespace(w, r.URL.Path, r.URL.RawQuery) { + // the error was due to redundant namespace, which we were able to fix + // by returning error/redirect page, nothing left to do here + logger.Debugw("redundant namespace; noop") + return false + } + // unable to fix path, returning error + webError(w, "invalid ipfs path", pathErr, http.StatusBadRequest) + return false + } + return true +} + +func (i *gatewayHandler) finishEarlyForMatchingETag(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved) bool { + // Finish early if client already has matching Etag + if r.Header.Get("If-None-Match") == getEtag(r, resolvedPath.Cid()) { + w.WriteHeader(http.StatusNotModified) + return true + } + + return false +} + +// Update the global metric of the time it takes to read the final root block of the requested resource +func (i *gatewayHandler) updateGlobalMetrics(w http.ResponseWriter, r *http.Request, begin time.Time, contentPath ipath.Path, resolvedPath ipath.Resolved) bool { + // NOTE: for legacy reasons this happens before we go into content-type specific code paths + _, err := i.api.Block().Get(r.Context(), resolvedPath) + if err != nil { + webError(w, "ipfs block get "+resolvedPath.Cid().String(), err, http.StatusInternalServerError) + return false + } + ns := contentPath.Namespace() + timeToGetFirstContentBlock := time.Since(begin).Seconds() + i.unixfsGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) // deprecated, use firstContentBlockGetMetric instead + i.firstContentBlockGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) + return true +} + +func (i *gatewayHandler) setHeaders(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) bool { + i.addUserHeaders(w) // ok, _now_ write user's headers. + w.Header().Set("X-Ipfs-Path", contentPath.String()) + + if rootCids, err := i.buildIpfsRootsHeader(contentPath.String(), r); err == nil { + w.Header().Set("X-Ipfs-Roots", rootCids) + } else { // this should never happen, as we resolved the contentPath already + webError(w, "error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError) + return false + } + + return true +} diff --git a/core/corehttp/gateway_handler_unixfs.go b/core/corehttp/gateway_handler_unixfs.go index 2252b3891c6..53c21231202 100644 --- a/core/corehttp/gateway_handler_unixfs.go +++ b/core/corehttp/gateway_handler_unixfs.go @@ -3,20 +3,131 @@ package corehttp import ( "fmt" "html" + "io" "net/http" + gopath "path" + "strconv" + "strings" "time" files "github.com/ipfs/go-ipfs-files" "github.com/ipfs/go-ipfs/tracing" + "github.com/ipfs/go-path/resolver" + coreiface "github.com/ipfs/interface-go-ipfs-core" ipath "github.com/ipfs/interface-go-ipfs-core/path" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" ) -func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { +func (i *gatewayHandler) getOrHeadHandlerUnixfs(w http.ResponseWriter, r *http.Request, begin time.Time, logger *zap.SugaredLogger) { + urlPath := r.URL.Path + // Only look for _redirects file if we have Origin isolation + if hasOriginIsolation(r) { + // Check for _redirects file and redirect as needed + redirectsFile, err := i.getRedirectsFile(r) + if err != nil { + switch err.(type) { + case resolver.ErrNoLink: + // _redirects files doesn't exist, so don't error + default: + // TODO(JJ): During tests we get multibase.ErrUnsupportedEncoding + // This comes from multibase and I assume is due to a fake or otherwise bad CID being in the test. + // So for now any errors getting the redirect file are silently ignored. + // internalWebError(w, err) + // return + } + } else { + // _redirects file exists, so parse it and redirect + redirected, newPath, err := i.redirect(w, r, redirectsFile) + if err != nil { + // TODO(JJ): How should we handle parse or redirect errors? + internalWebError(w, err) + return + } + + if redirected { + return + } + + // 200 is treated as a rewrite, so update the path and continue + if newPath != "" { + urlPath = newPath + } + } + } + + contentPath := ipath.New(urlPath) + + resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath) + + switch err { + case nil: + case coreiface.ErrOffline: + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable) + return + default: + // if Accept is text/html, see if ipfs-404.html is present + if i.servePretty404IfPresent(w, r, contentPath) { + logger.Debugw("serve pretty 404 if present") + return + } + + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound) + return + } + + if i.finishEarlyForMatchingETag(w, r, resolvedPath) { + return + } + + if !i.updateGlobalMetrics(w, r, begin, contentPath, resolvedPath) { + return + } + + if !i.setHeaders(w, r, contentPath) { + return + } + + logger.Debugw("serving unixfs", "path", contentPath) + i.serveUnixfs(w, r, resolvedPath, contentPath, begin, logger) + return +} + +func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) bool { + resolved404Path, ctype, err := i.searchUpTreeFor404(r, contentPath) + if err != nil { + return false + } + + dr, err := i.api.Unixfs().Get(r.Context(), resolved404Path) + if err != nil { + return false + } + defer dr.Close() + + f, ok := dr.(files.File) + if !ok { + return false + } + + size, err := f.Size() + if err != nil { + return false + } + + log.Debugw("using pretty 404 file", "path", contentPath) + w.Header().Set("Content-Type", ctype) + w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) + w.WriteHeader(http.StatusNotFound) + _, err = io.CopyN(w, f, size) + return err == nil +} + +func (i *gatewayHandler) serveUnixfs(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { ctx, span := tracing.Span(r.Context(), "Gateway", "ServeUnixFs", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() + // Handling UnixFS dr, err := i.api.Unixfs().Get(ctx, resolvedPath) if err != nil { @@ -35,9 +146,125 @@ func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, res // Handling Unixfs directory dir, ok := dr.(files.Directory) if !ok { - internalWebError(w, fmt.Errorf("unsupported UnixFs type")) + internalWebError(w, fmt.Errorf("unsupported Unixfs type")) return } logger.Debugw("serving unixfs directory", "path", contentPath) i.serveDirectory(w, r, resolvedPath, contentPath, dir, begin, logger) } + +// redirect returns redirected, newPath (if rewrite), error +func (i *gatewayHandler) redirect(w http.ResponseWriter, r *http.Request, path ipath.Resolved) (bool, string, error) { + node, err := i.api.Unixfs().Get(r.Context(), path) + if err != nil { + return false, "", fmt.Errorf("could not get redirects file: %v", err) + } + + defer node.Close() + + f, ok := node.(files.File) + + if !ok { + return false, "", fmt.Errorf("redirect, could not convert node to file") + } + + redirs := newRedirs(f) + + // extract "file" part of URL, typically the part after /ipfs/CID/... + g := strings.Split(r.URL.Path, "/") + + if len(g) > 3 { + filePartPath := "/" + strings.Join(g[3:], "/") + + to, code := redirs.search(filePartPath) + if code > 0 { + if code == 200 { + // rewrite + newPath := strings.Join(g[0:3], "/") + "/" + to + return false, newPath, nil + } + + // redirect + http.Redirect(w, r, to, code) + return true, "", nil + } + } + + return false, "", nil +} + +// Returns a resolved path to the _redirects file located in the root CID path of the requested path +func (i *gatewayHandler) getRedirectsFile(r *http.Request) (ipath.Resolved, error) { + // r.URL.Path is the full ipfs path to the requested resource, + // regardless of whether path or subdomain resolution is used. + rootPath := getRootPath(r.URL.Path) + // TODO(JJ): handle error + path := ipath.New(gopath.Join(rootPath, "_redirects")) + resolvedPath, err := i.api.ResolvePath(r.Context(), path) + if err != nil { + return nil, err + } + return resolvedPath, nil +} + +// Returns the root CID path for the given path +func getRootPath(path string) string { + if strings.HasPrefix(path, ipfsPathPrefix) && strings.Count(gopath.Clean(path), "/") >= 2 { + parts := strings.Split(path, "/") + return gopath.Join(ipfsPathPrefix, parts[2]) + } else { + return "" + } +} + +// TODO(JJ): I was thinking about changing this to just look at the root path as well, but the docs say it searches up +func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, contentPath ipath.Path) (ipath.Resolved, string, error) { + filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) + if err != nil { + return nil, "", err + } + + pathComponents := strings.Split(contentPath.String(), "/") + + for idx := len(pathComponents); idx >= 3; idx-- { + pretty404 := gopath.Join(append(pathComponents[0:idx], filename404)...) + parsed404Path := ipath.New("/" + pretty404) + if parsed404Path.IsValid() != nil { + break + } + resolvedPath, err := i.api.ResolvePath(r.Context(), parsed404Path) + if err != nil { + continue + } + return resolvedPath, ctype, nil + } + + return nil, "", fmt.Errorf("no pretty 404 in any parent folder") +} + +func preferred404Filename(acceptHeaders []string) (string, string, error) { + // If we ever want to offer a 404 file for a different content type + // then this function will need to parse q weightings, but for now + // the presence of anything matching HTML is enough. + for _, acceptHeader := range acceptHeaders { + accepted := strings.Split(acceptHeader, ",") + for _, spec := range accepted { + contentType := strings.SplitN(spec, ";", 1)[0] + switch contentType { + case "*/*", "text/*", "text/html": + return "ipfs-404.html", "text/html", nil + } + } + } + + return "", "", fmt.Errorf("there is no 404 file for the requested content types") +} + +// TODO(JJ): Pretty sure this is incorrect. Validate the correct approach. +func hasOriginIsolation(r *http.Request) bool { + if _, ok := r.Context().Value("gw-hostname").(string); ok { + return true + } else { + return false + } +} diff --git a/test/sharness/t0114-gateway-subdomains.sh b/test/sharness/t0114-gateway-subdomains.sh index 41abd4a8774..75f55cfe033 100755 --- a/test/sharness/t0114-gateway-subdomains.sh +++ b/test/sharness/t0114-gateway-subdomains.sh @@ -144,6 +144,42 @@ test_expect_success 'start daemon with empty config for Gateway.PublicGateways' test_launch_ipfs_daemon_without_network ' +## ============================================================================ +## Test _redirects file support +## ============================================================================ + +# Directory tree crafted to test _redirects file support +test_expect_success "Add the _redirects file test directory" ' + mkdir -p testredirect/ && + echo "index.html" > testredirect/index.html && + echo "one.html" > testredirect/one.html && + echo "two.html" > testredirect/two.html && + echo "^/301-redirect-one$ /one.html 301" > testredirect/_redirects && + echo "^/302-redirect-two$ /two.html 302" >> testredirect/_redirects && + echo "^/200-index$ /index.html 200" >> testredirect/_redirects && + REDIRECTS_DIR_CID=$(ipfs add -Qr --cid-version 1 testredirect) +' + +REDIRECTS_DIR_HOSTNAME="${REDIRECTS_DIR_CID}.ipfs.localhost:$GWAY_PORT" + +test_expect_success "_redirects - /301-redirect-one" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/301-redirect-one" > response && + test_should_contain "one.html" response && + test_should_contain "301 Moved Permanently" response +' + +test_expect_success "_redirects - /302-redirect-two" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/302-redirect-two" > response && + test_should_contain "two.html" response && + test_should_contain "302 Found" response +' + +test_expect_success "_redirects - /200-index" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/200-index" > response && + test_should_contain "index.html" response && + test_should_contain "200 OK" response +' + ## ============================================================================ ## Test path-based requests to a local gateway with default config ## (forced redirects to http://*.localhost) From 39e82fb0084a6653caf52d1bca7a527561f70510 Mon Sep 17 00:00:00 2001 From: Justin Johnson Date: Mon, 4 Apr 2022 15:26:22 -0500 Subject: [PATCH 3/8] - More descriptive sharness test names - Add test for no redirect due to no origin isolation --- test/sharness/t0114-gateway-subdomains.sh | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/test/sharness/t0114-gateway-subdomains.sh b/test/sharness/t0114-gateway-subdomains.sh index 75f55cfe033..036c4eeefc8 100755 --- a/test/sharness/t0114-gateway-subdomains.sh +++ b/test/sharness/t0114-gateway-subdomains.sh @@ -162,24 +162,34 @@ test_expect_success "Add the _redirects file test directory" ' REDIRECTS_DIR_HOSTNAME="${REDIRECTS_DIR_CID}.ipfs.localhost:$GWAY_PORT" -test_expect_success "_redirects - /301-redirect-one" ' +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/301-redirect-one redirects with 301, per _redirects file" ' curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/301-redirect-one" > response && test_should_contain "one.html" response && test_should_contain "301 Moved Permanently" response ' -test_expect_success "_redirects - /302-redirect-two" ' +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/302-redirect-two redirects with 302, per _redirects file" ' curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/302-redirect-two" > response && test_should_contain "two.html" response && test_should_contain "302 Found" response ' -test_expect_success "_redirects - /200-index" ' +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/200-index returns 200, per _redirects file" ' curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/200-index" > response && test_should_contain "index.html" response && test_should_contain "200 OK" response ' +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/has-no-redirects-entry returns 404, since not in _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/has-no-redirects-entry" > response && + test_should_contain "404 Not Found" response +' + +test_expect_success "request for http://127.0.0.1:$GWAY_PORT/ipfs/$REDIRECTS_DIR_CID/301-redirect-one returns 404, no _redirects since no origin isolation" ' + curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$REDIRECTS_DIR_CID/301-redirect-one" > response && + test_should_contain "404 Not Found" response +' + ## ============================================================================ ## Test path-based requests to a local gateway with default config ## (forced redirects to http://*.localhost) From ce9e6cf98e2930d6fcd4b2abfb6c2541d1a93109 Mon Sep 17 00:00:00 2001 From: Justin Johnson Date: Mon, 4 Apr 2022 19:01:12 -0500 Subject: [PATCH 4/8] - More error handling - Use http.* for status codes instead of hardcoded numbers - Test default of no status code in redirects --- core/corehttp/gateway_handler_unixfs.go | 17 +++++++++++------ core/corehttp/redirect.go | 6 +++--- test/sharness/t0114-gateway-subdomains.sh | 9 ++++++++- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/core/corehttp/gateway_handler_unixfs.go b/core/corehttp/gateway_handler_unixfs.go index 53c21231202..8c7a7d034a1 100644 --- a/core/corehttp/gateway_handler_unixfs.go +++ b/core/corehttp/gateway_handler_unixfs.go @@ -1,6 +1,7 @@ package corehttp import ( + "errors" "fmt" "html" "io" @@ -22,6 +23,7 @@ import ( func (i *gatewayHandler) getOrHeadHandlerUnixfs(w http.ResponseWriter, r *http.Request, begin time.Time, logger *zap.SugaredLogger) { urlPath := r.URL.Path + // Only look for _redirects file if we have Origin isolation if hasOriginIsolation(r) { // Check for _redirects file and redirect as needed @@ -178,7 +180,7 @@ func (i *gatewayHandler) redirect(w http.ResponseWriter, r *http.Request, path i to, code := redirs.search(filePartPath) if code > 0 { - if code == 200 { + if code == http.StatusOK { // rewrite newPath := strings.Join(g[0:3], "/") + "/" + to return false, newPath, nil @@ -197,8 +199,11 @@ func (i *gatewayHandler) redirect(w http.ResponseWriter, r *http.Request, path i func (i *gatewayHandler) getRedirectsFile(r *http.Request) (ipath.Resolved, error) { // r.URL.Path is the full ipfs path to the requested resource, // regardless of whether path or subdomain resolution is used. - rootPath := getRootPath(r.URL.Path) - // TODO(JJ): handle error + rootPath, err := getRootPath(r.URL.Path) + if err != nil { + return nil, err + } + path := ipath.New(gopath.Join(rootPath, "_redirects")) resolvedPath, err := i.api.ResolvePath(r.Context(), path) if err != nil { @@ -208,12 +213,12 @@ func (i *gatewayHandler) getRedirectsFile(r *http.Request) (ipath.Resolved, erro } // Returns the root CID path for the given path -func getRootPath(path string) string { +func getRootPath(path string) (string, error) { if strings.HasPrefix(path, ipfsPathPrefix) && strings.Count(gopath.Clean(path), "/") >= 2 { parts := strings.Split(path, "/") - return gopath.Join(ipfsPathPrefix, parts[2]) + return gopath.Join(ipfsPathPrefix, parts[2]), nil } else { - return "" + return "", errors.New("failed to get root CID path") } } diff --git a/core/corehttp/redirect.go b/core/corehttp/redirect.go index e7538464777..c8245bab7f6 100644 --- a/core/corehttp/redirect.go +++ b/core/corehttp/redirect.go @@ -30,7 +30,7 @@ type redirectHandler struct { } func (i *redirectHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { - http.Redirect(w, r, i.path, 302) + http.Redirect(w, r, i.path, http.StatusFound) } type redirLine struct { @@ -42,7 +42,7 @@ type redirLine struct { func (rdl redirLine) match(s string) (bool, error) { re, err := regexp.Compile(rdl.matcher) if err != nil { - return false, fmt.Errorf("Failed to compile %v: %v", rdl.matcher, err) + return false, fmt.Errorf("failed to compile %v: %v", rdl.matcher, err) } match := re.FindString(s) @@ -70,7 +70,7 @@ func newRedirs(f io.Reader) *redirs { matcher := groups[0] to := groups[1] // default to 302 (temporary redirect) - code := 302 + code := http.StatusFound if len(groups) >= 3 { c, err := strconv.Atoi(groups[2]) if err == nil { diff --git a/test/sharness/t0114-gateway-subdomains.sh b/test/sharness/t0114-gateway-subdomains.sh index 036c4eeefc8..90d082405b3 100755 --- a/test/sharness/t0114-gateway-subdomains.sh +++ b/test/sharness/t0114-gateway-subdomains.sh @@ -154,7 +154,8 @@ test_expect_success "Add the _redirects file test directory" ' echo "index.html" > testredirect/index.html && echo "one.html" > testredirect/one.html && echo "two.html" > testredirect/two.html && - echo "^/301-redirect-one$ /one.html 301" > testredirect/_redirects && + echo "^/redirect-one$ /one.html" > testredirect/_redirects && + echo "^/301-redirect-one$ /one.html 301" >> testredirect/_redirects && echo "^/302-redirect-two$ /two.html 302" >> testredirect/_redirects && echo "^/200-index$ /index.html 200" >> testredirect/_redirects && REDIRECTS_DIR_CID=$(ipfs add -Qr --cid-version 1 testredirect) @@ -162,6 +163,12 @@ test_expect_success "Add the _redirects file test directory" ' REDIRECTS_DIR_HOSTNAME="${REDIRECTS_DIR_CID}.ipfs.localhost:$GWAY_PORT" +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/redirect-one redirects with default of 302, per _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/redirect-one" > response && + test_should_contain "one.html" response && + test_should_contain "302 Found" response +' + test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/301-redirect-one redirects with 301, per _redirects file" ' curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/301-redirect-one" > response && test_should_contain "one.html" response && From 6a855f89a972e7f45d2926f0b1df118ea08ba1d5 Mon Sep 17 00:00:00 2001 From: Justin Johnson Date: Wed, 6 Apr 2022 21:05:50 -0500 Subject: [PATCH 5/8] Cleanup function names --- core/corehttp/gateway_handler.go | 36 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index ef042782bee..293dcfdbcb1 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -273,20 +273,20 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request logger := log.With("from", r.RequestURI) logger.Debug("http request received") - if !validateSupportedHeaders(w, r) { + if handleUnsupportedHeaders(w, r) { return } - if !protocolHandlerRegistration(w, r, logger) { + if handleProtocolHandlerRedirect(w, r, logger) { return } - if !serviceWorkerRegistration(w, r) { + if handleInvalidServiceWorkerRegistration(w, r) { return } contentPath := ipath.New(r.URL.Path) - if !fixSuperfluousNamespaces(w, r, contentPath, logger) { + if handleSuperfluousNamespaces(w, r, contentPath, logger) { return } @@ -758,28 +758,28 @@ func fixupSuperfluousNamespace(w http.ResponseWriter, urlPath string, urlQuery s }) == nil } -func validateSupportedHeaders(w http.ResponseWriter, r *http.Request) bool { +func handleUnsupportedHeaders(w http.ResponseWriter, r *http.Request) bool { // X-Ipfs-Gateway-Prefix was removed (https://github.com/ipfs/go-ipfs/issues/7702) // TODO: remove this after go-ipfs 0.13 ships if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); prfx != "" { err := fmt.Errorf("X-Ipfs-Gateway-Prefix support was removed: https://github.com/ipfs/go-ipfs/issues/7702") webError(w, "unsupported HTTP header", err, http.StatusBadRequest) - return false - } else { return true + } else { + return false } } -func protocolHandlerRegistration(w http.ResponseWriter, r *http.Request, logger *zap.SugaredLogger) bool { +func handleProtocolHandlerRedirect(w http.ResponseWriter, r *http.Request, logger *zap.SugaredLogger) bool { if uriParam := r.URL.Query().Get("uri"); uriParam != "" { u, err := url.Parse(uriParam) if err != nil { webError(w, "failed to parse uri query parameter", err, http.StatusBadRequest) - return false + return true } if u.Scheme != "ipfs" && u.Scheme != "ipns" { webError(w, "uri query parameter scheme must be ipfs or ipns", err, http.StatusBadRequest) - return false + return true } path := u.Path if u.RawQuery != "" { // preserve query if present @@ -789,28 +789,28 @@ func protocolHandlerRegistration(w http.ResponseWriter, r *http.Request, logger redirectURL := gopath.Join("/", u.Scheme, u.Host, path) logger.Debugw("uri param, redirect", "to", redirectURL, "status", http.StatusMovedPermanently) http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) - return false + return true } - return true + return false } // Disallow Service Worker registration on namespace roots // https://github.com/ipfs/go-ipfs/issues/4025 -func serviceWorkerRegistration(w http.ResponseWriter, r *http.Request) bool { +func handleInvalidServiceWorkerRegistration(w http.ResponseWriter, r *http.Request) bool { if r.Header.Get("Service-Worker") == "script" { matched, _ := regexp.MatchString(`^/ip[fn]s/[^/]+$`, r.URL.Path) if matched { err := fmt.Errorf("registration is not allowed for this scope") webError(w, "navigator.serviceWorker", err, http.StatusBadRequest) - return false + return true } } - return true + return false } -func fixSuperfluousNamespaces(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, logger *zap.SugaredLogger) bool { +func handleSuperfluousNamespaces(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, logger *zap.SugaredLogger) bool { if pathErr := contentPath.IsValid(); pathErr != nil { if fixupSuperfluousNamespace(w, r.URL.Path, r.URL.RawQuery) { // the error was due to redundant namespace, which we were able to fix @@ -820,9 +820,9 @@ func fixSuperfluousNamespaces(w http.ResponseWriter, r *http.Request, contentPat } // unable to fix path, returning error webError(w, "invalid ipfs path", pathErr, http.StatusBadRequest) - return false + return true } - return true + return false } func (i *gatewayHandler) finishEarlyForMatchingETag(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved) bool { From b7422d7b95b4ce6e19843359c8699b10f8126a3a Mon Sep 17 00:00:00 2001 From: Justin Johnson Date: Wed, 6 Apr 2022 21:12:45 -0500 Subject: [PATCH 6/8] Wrap error with valuable context before returning --- core/corehttp/gateway_handler_unixfs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/corehttp/gateway_handler_unixfs.go b/core/corehttp/gateway_handler_unixfs.go index 8c7a7d034a1..1a9d41c74fd 100644 --- a/core/corehttp/gateway_handler_unixfs.go +++ b/core/corehttp/gateway_handler_unixfs.go @@ -43,7 +43,7 @@ func (i *gatewayHandler) getOrHeadHandlerUnixfs(w http.ResponseWriter, r *http.R // _redirects file exists, so parse it and redirect redirected, newPath, err := i.redirect(w, r, redirectsFile) if err != nil { - // TODO(JJ): How should we handle parse or redirect errors? + err = fmt.Errorf("invalid _redirects file at %q: %w", redirectsFile.String(), err) internalWebError(w, err) return } From c47eb7b78ec095ddef0d4f3a3f9093ff87d1d8e6 Mon Sep 17 00:00:00 2001 From: Justin Johnson Date: Thu, 7 Apr 2022 06:38:53 -0500 Subject: [PATCH 7/8] More cleanup --- core/corehttp/gateway_handler.go | 26 ++++++++++++------------- core/corehttp/gateway_handler_unixfs.go | 10 ++++++---- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index 293dcfdbcb1..6cda134438f 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -273,20 +273,20 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request logger := log.With("from", r.RequestURI) logger.Debug("http request received") - if handleUnsupportedHeaders(w, r) { + if handledUnsupportedHeaders(w, r) { return } - if handleProtocolHandlerRedirect(w, r, logger) { + if handledProtocolHandlerRedirect(w, r, logger) { return } - if handleInvalidServiceWorkerRegistration(w, r) { + if handledInvalidServiceWorkerRegistration(w, r) { return } contentPath := ipath.New(r.URL.Path) - if handleSuperfluousNamespaces(w, r, contentPath, logger) { + if handledSuperfluousNamespaces(w, r, contentPath, logger) { return } @@ -319,11 +319,11 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } - if i.finishEarlyForMatchingETag(w, r, resolvedPath) { + if i.returnedNotModifiedForMatchingETag(w, r, resolvedPath) { return } - if !i.updateGlobalMetrics(w, r, begin, contentPath, resolvedPath) { + if !i.updateFirstContentBlockMetrics(w, r, begin, contentPath, resolvedPath) { return } @@ -758,7 +758,7 @@ func fixupSuperfluousNamespace(w http.ResponseWriter, urlPath string, urlQuery s }) == nil } -func handleUnsupportedHeaders(w http.ResponseWriter, r *http.Request) bool { +func handledUnsupportedHeaders(w http.ResponseWriter, r *http.Request) bool { // X-Ipfs-Gateway-Prefix was removed (https://github.com/ipfs/go-ipfs/issues/7702) // TODO: remove this after go-ipfs 0.13 ships if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); prfx != "" { @@ -770,7 +770,7 @@ func handleUnsupportedHeaders(w http.ResponseWriter, r *http.Request) bool { } } -func handleProtocolHandlerRedirect(w http.ResponseWriter, r *http.Request, logger *zap.SugaredLogger) bool { +func handledProtocolHandlerRedirect(w http.ResponseWriter, r *http.Request, logger *zap.SugaredLogger) bool { if uriParam := r.URL.Query().Get("uri"); uriParam != "" { u, err := url.Parse(uriParam) if err != nil { @@ -797,7 +797,7 @@ func handleProtocolHandlerRedirect(w http.ResponseWriter, r *http.Request, logge // Disallow Service Worker registration on namespace roots // https://github.com/ipfs/go-ipfs/issues/4025 -func handleInvalidServiceWorkerRegistration(w http.ResponseWriter, r *http.Request) bool { +func handledInvalidServiceWorkerRegistration(w http.ResponseWriter, r *http.Request) bool { if r.Header.Get("Service-Worker") == "script" { matched, _ := regexp.MatchString(`^/ip[fn]s/[^/]+$`, r.URL.Path) if matched { @@ -810,13 +810,13 @@ func handleInvalidServiceWorkerRegistration(w http.ResponseWriter, r *http.Reque return false } -func handleSuperfluousNamespaces(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, logger *zap.SugaredLogger) bool { +func handledSuperfluousNamespaces(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, logger *zap.SugaredLogger) bool { if pathErr := contentPath.IsValid(); pathErr != nil { if fixupSuperfluousNamespace(w, r.URL.Path, r.URL.RawQuery) { // the error was due to redundant namespace, which we were able to fix // by returning error/redirect page, nothing left to do here logger.Debugw("redundant namespace; noop") - return false + return true } // unable to fix path, returning error webError(w, "invalid ipfs path", pathErr, http.StatusBadRequest) @@ -825,7 +825,7 @@ func handleSuperfluousNamespaces(w http.ResponseWriter, r *http.Request, content return false } -func (i *gatewayHandler) finishEarlyForMatchingETag(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved) bool { +func (i *gatewayHandler) returnedNotModifiedForMatchingETag(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved) bool { // Finish early if client already has matching Etag if r.Header.Get("If-None-Match") == getEtag(r, resolvedPath.Cid()) { w.WriteHeader(http.StatusNotModified) @@ -836,7 +836,7 @@ func (i *gatewayHandler) finishEarlyForMatchingETag(w http.ResponseWriter, r *ht } // Update the global metric of the time it takes to read the final root block of the requested resource -func (i *gatewayHandler) updateGlobalMetrics(w http.ResponseWriter, r *http.Request, begin time.Time, contentPath ipath.Path, resolvedPath ipath.Resolved) bool { +func (i *gatewayHandler) updateFirstContentBlockMetrics(w http.ResponseWriter, r *http.Request, begin time.Time, contentPath ipath.Path, resolvedPath ipath.Resolved) bool { // NOTE: for legacy reasons this happens before we go into content-type specific code paths _, err := i.api.Block().Get(r.Context(), resolvedPath) if err != nil { diff --git a/core/corehttp/gateway_handler_unixfs.go b/core/corehttp/gateway_handler_unixfs.go index 1a9d41c74fd..e2ed4996e7c 100644 --- a/core/corehttp/gateway_handler_unixfs.go +++ b/core/corehttp/gateway_handler_unixfs.go @@ -41,7 +41,7 @@ func (i *gatewayHandler) getOrHeadHandlerUnixfs(w http.ResponseWriter, r *http.R } } else { // _redirects file exists, so parse it and redirect - redirected, newPath, err := i.redirect(w, r, redirectsFile) + redirected, newPath, err := i.handleRedirectsFile(w, r, redirectsFile) if err != nil { err = fmt.Errorf("invalid _redirects file at %q: %w", redirectsFile.String(), err) internalWebError(w, err) @@ -79,14 +79,16 @@ func (i *gatewayHandler) getOrHeadHandlerUnixfs(w http.ResponseWriter, r *http.R return } - if i.finishEarlyForMatchingETag(w, r, resolvedPath) { + if i.returnedNotModifiedForMatchingETag(w, r, resolvedPath) { return } - if !i.updateGlobalMetrics(w, r, begin, contentPath, resolvedPath) { + // TODO(JJ): Rename to avoid negation? + if !i.updateFirstContentBlockMetrics(w, r, begin, contentPath, resolvedPath) { return } + // TODO(JJ): Rename to avoid negation? if !i.setHeaders(w, r, contentPath) { return } @@ -156,7 +158,7 @@ func (i *gatewayHandler) serveUnixfs(w http.ResponseWriter, r *http.Request, res } // redirect returns redirected, newPath (if rewrite), error -func (i *gatewayHandler) redirect(w http.ResponseWriter, r *http.Request, path ipath.Resolved) (bool, string, error) { +func (i *gatewayHandler) handleRedirectsFile(w http.ResponseWriter, r *http.Request, path ipath.Resolved) (bool, string, error) { node, err := i.api.Unixfs().Get(r.Context(), path) if err != nil { return false, "", fmt.Errorf("could not get redirects file: %v", err) From 9ef18636d60e0541b39be69bdaad23616baa48f1 Mon Sep 17 00:00:00 2001 From: Justin Johnson Date: Thu, 7 Apr 2022 07:41:42 -0500 Subject: [PATCH 8/8] Separate redirects logic from legacy redirect code --- .../gateway_handler_unixfs_redirects.go | 74 +++++++++++++++++++ ... gateway_handler_unixfs_redirects_test.go} | 0 core/corehttp/redirect.go | 69 ----------------- 3 files changed, 74 insertions(+), 69 deletions(-) create mode 100644 core/corehttp/gateway_handler_unixfs_redirects.go rename core/corehttp/{redirect_test.go => gateway_handler_unixfs_redirects_test.go} (100%) diff --git a/core/corehttp/gateway_handler_unixfs_redirects.go b/core/corehttp/gateway_handler_unixfs_redirects.go new file mode 100644 index 00000000000..ae30d4d4d03 --- /dev/null +++ b/core/corehttp/gateway_handler_unixfs_redirects.go @@ -0,0 +1,74 @@ +package corehttp + +import ( + "bufio" + "fmt" + "io" + "net/http" + "regexp" + "strconv" + "strings" +) + +type redirLine struct { + matcher string + to string + code int +} + +func (rdl redirLine) match(s string) (bool, error) { + re, err := regexp.Compile(rdl.matcher) + if err != nil { + return false, fmt.Errorf("failed to compile %v: %v", rdl.matcher, err) + } + + match := re.FindString(s) + if match == "" { + return false, nil + } + + return true, nil +} + +type redirs []redirLine + +func newRedirs(f io.Reader) *redirs { + ret := redirs{} + scanner := bufio.NewScanner(f) + scanner.Split(bufio.ScanLines) + for scanner.Scan() { + t := scanner.Text() + if len(t) > 0 && t[0] == '#' { + // comment, skip line + continue + } + groups := strings.Fields(scanner.Text()) + if len(groups) >= 2 { + matcher := groups[0] + to := groups[1] + // default to 302 (temporary redirect) + code := http.StatusFound + if len(groups) >= 3 { + c, err := strconv.Atoi(groups[2]) + if err == nil { + code = c + } + } + ret = append(ret, redirLine{matcher, to, code}) + } + } + + return &ret +} + +// returns "" if no redir +func (r redirs) search(path string) (string, int) { + for _, rdir := range r { + m, err := rdir.match(path) + if m && err == nil { + return rdir.to, rdir.code + } + } + + return "", 0 +} diff --git a/core/corehttp/redirect_test.go b/core/corehttp/gateway_handler_unixfs_redirects_test.go similarity index 100% rename from core/corehttp/redirect_test.go rename to core/corehttp/gateway_handler_unixfs_redirects_test.go diff --git a/core/corehttp/redirect.go b/core/corehttp/redirect.go index c8245bab7f6..121bb6a8e2a 100644 --- a/core/corehttp/redirect.go +++ b/core/corehttp/redirect.go @@ -1,14 +1,8 @@ package corehttp import ( - "bufio" - "fmt" - "io" "net" "net/http" - "regexp" - "strconv" - "strings" core "github.com/ipfs/go-ipfs/core" ) @@ -32,66 +26,3 @@ type redirectHandler struct { func (i *redirectHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, i.path, http.StatusFound) } - -type redirLine struct { - matcher string - to string - code int -} - -func (rdl redirLine) match(s string) (bool, error) { - re, err := regexp.Compile(rdl.matcher) - if err != nil { - return false, fmt.Errorf("failed to compile %v: %v", rdl.matcher, err) - } - - match := re.FindString(s) - if match == "" { - return false, nil - } - - return true, nil -} - -type redirs []redirLine - -func newRedirs(f io.Reader) *redirs { - ret := redirs{} - scanner := bufio.NewScanner(f) - scanner.Split(bufio.ScanLines) - for scanner.Scan() { - t := scanner.Text() - if len(t) > 0 && t[0] == '#' { - // comment, skip line - continue - } - groups := strings.Fields(scanner.Text()) - if len(groups) >= 2 { - matcher := groups[0] - to := groups[1] - // default to 302 (temporary redirect) - code := http.StatusFound - if len(groups) >= 3 { - c, err := strconv.Atoi(groups[2]) - if err == nil { - code = c - } - } - ret = append(ret, redirLine{matcher, to, code}) - } - } - - return &ret -} - -// returns "" if no redir -func (r redirs) search(path string) (string, int) { - for _, rdir := range r { - m, err := rdir.match(path) - if m && err == nil { - return rdir.to, rdir.code - } - } - - return "", 0 -}