diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index 32d2eebaef8..6cda134438f 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "html/template" - "io" "mime" "net/http" "net/url" @@ -12,7 +11,6 @@ import ( gopath "path" "regexp" "runtime/debug" - "strconv" "strings" "time" @@ -28,6 +26,7 @@ import ( prometheus "github.com/prometheus/client_golang/prometheus" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" ) const ( @@ -274,126 +273,67 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request logger := log.With("from", r.RequestURI) logger.Debug("http request received") - // X-Ipfs-Gateway-Prefix was removed (https://github.com/ipfs/go-ipfs/issues/7702) - // TODO: remove this after go-ipfs 0.13 ships - if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); prfx != "" { - err := fmt.Errorf("X-Ipfs-Gateway-Prefix support was removed: https://github.com/ipfs/go-ipfs/issues/7702") - webError(w, "unsupported HTTP header", err, http.StatusBadRequest) + if handledUnsupportedHeaders(w, r) { return } - // ?uri query param support for requests produced by web browsers - // via navigator.registerProtocolHandler Web API - // https://developer.mozilla.org/en-US/docs/Web/API/Navigator/registerProtocolHandler - // TLDR: redirect /ipfs/?uri=ipfs%3A%2F%2Fcid%3Fquery%3Dval to /ipfs/cid?query=val - if uriParam := r.URL.Query().Get("uri"); uriParam != "" { - u, err := url.Parse(uriParam) - if err != nil { - webError(w, "failed to parse uri query parameter", err, http.StatusBadRequest) - return - } - if u.Scheme != "ipfs" && u.Scheme != "ipns" { - webError(w, "uri query parameter scheme must be ipfs or ipns", err, http.StatusBadRequest) - return - } - path := u.Path - if u.RawQuery != "" { // preserve query if present - path = path + "?" + u.RawQuery - } - - redirectURL := gopath.Join("/", u.Scheme, u.Host, path) - logger.Debugw("uri param, redirect", "to", redirectURL, "status", http.StatusMovedPermanently) - http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) + if handledProtocolHandlerRedirect(w, r, logger) { return } - // Service Worker registration request - if r.Header.Get("Service-Worker") == "script" { - // Disallow Service Worker registration on namespace roots - // https://github.com/ipfs/go-ipfs/issues/4025 - matched, _ := regexp.MatchString(`^/ip[fn]s/[^/]+$`, r.URL.Path) - if matched { - err := fmt.Errorf("registration is not allowed for this scope") - webError(w, "navigator.serviceWorker", err, http.StatusBadRequest) - return - } + if handledInvalidServiceWorkerRegistration(w, r) { + return } contentPath := ipath.New(r.URL.Path) - if pathErr := contentPath.IsValid(); pathErr != nil { - if fixupSuperfluousNamespace(w, r.URL.Path, r.URL.RawQuery) { - // the error was due to redundant namespace, which we were able to fix - // by returning error/redirect page, nothing left to do here - logger.Debugw("redundant namespace; noop") - return - } - // unable to fix path, returning error - webError(w, "invalid ipfs path", pathErr, http.StatusBadRequest) + if handledSuperfluousNamespaces(w, r, contentPath, logger) { + return + } + + // Detect when explicit Accept header or ?format parameter are present + responseFormat, formatParams, err := customResponseFormat(r) + if err != nil { + webError(w, "error while processing the Accept header", err, http.StatusBadRequest) + return + } + trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat)) + + // For Unixfs, when a path can't be resolved we need to check for redirects and pretty 404 page files. + if responseFormat == "" { + logger.Debugw("dispatching to getOrHeadHandlerUnixfs") + i.getOrHeadHandlerUnixfs(w, r, begin, logger) return } // Resolve path to the final DAG node for the ETag resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath) + trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String())) + switch err { case nil: case coreiface.ErrOffline: webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable) return default: - // if Accept is text/html, see if ipfs-404.html is present - if i.servePretty404IfPresent(w, r, contentPath) { - logger.Debugw("serve pretty 404 if present") - return - } - webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound) return } - // Detect when explicit Accept header or ?format parameter are present - responseFormat, formatParams, err := customResponseFormat(r) - if err != nil { - webError(w, "error while processing the Accept header", err, http.StatusBadRequest) + if i.returnedNotModifiedForMatchingETag(w, r, resolvedPath) { return } - trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat)) - trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String())) - // Finish early if client already has matching Etag - if r.Header.Get("If-None-Match") == getEtag(r, resolvedPath.Cid()) { - w.WriteHeader(http.StatusNotModified) - return - } - - // Update the global metric of the time it takes to read the final root block of the requested resource - // NOTE: for legacy reasons this happens before we go into content-type specific code paths - _, err = i.api.Block().Get(r.Context(), resolvedPath) - if err != nil { - webError(w, "ipfs block get "+resolvedPath.Cid().String(), err, http.StatusInternalServerError) + if !i.updateFirstContentBlockMetrics(w, r, begin, contentPath, resolvedPath) { return } - ns := contentPath.Namespace() - timeToGetFirstContentBlock := time.Since(begin).Seconds() - i.unixfsGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) // deprecated, use firstContentBlockGetMetric instead - i.firstContentBlockGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) - // HTTP Headers - i.addUserHeaders(w) // ok, _now_ write user's headers. - w.Header().Set("X-Ipfs-Path", contentPath.String()) - - if rootCids, err := i.buildIpfsRootsHeader(contentPath.String(), r); err == nil { - w.Header().Set("X-Ipfs-Roots", rootCids) - } else { // this should never happen, as we resolved the contentPath already - webError(w, "error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError) + if !i.setHeaders(w, r, contentPath) { return } // Support custom response formats passed via ?format or Accept HTTP header + // Note that we handle Unixfs (e.g. responseFormat of "") above. switch responseFormat { - case "": // The implicit response format is UnixFS - logger.Debugw("serving unixfs", "path", contentPath) - i.serveUnixFs(w, r, resolvedPath, contentPath, begin, logger) - return case "application/vnd.ipld.raw": logger.Debugw("serving raw block", "path", contentPath) i.serveRawBlock(w, r, resolvedPath, contentPath, begin) @@ -401,7 +341,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request case "application/vnd.ipld.car": logger.Debugw("serving car stream", "path", contentPath) carVersion := formatParams["version"] - i.serveCar(w, r, resolvedPath, contentPath, carVersion, begin) + i.serveCar(w, r, resolvedPath, contentPath, carVersion, begin) return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) @@ -410,36 +350,6 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } } -func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) bool { - resolved404Path, ctype, err := i.searchUpTreeFor404(r, contentPath) - if err != nil { - return false - } - - dr, err := i.api.Unixfs().Get(r.Context(), resolved404Path) - if err != nil { - return false - } - defer dr.Close() - - f, ok := dr.(files.File) - if !ok { - return false - } - - size, err := f.Size() - if err != nil { - return false - } - - log.Debugw("using pretty 404 file", "path", contentPath) - w.Header().Set("Content-Type", ctype) - w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) - w.WriteHeader(http.StatusNotFound) - _, err = io.CopyN(w, f, size) - return err == nil -} - func (i *gatewayHandler) postHandler(w http.ResponseWriter, r *http.Request) { p, err := i.api.Unixfs().Add(r.Context(), files.NewReaderFile(r.Body)) if err != nil { @@ -809,48 +719,6 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] return "", nil, nil } -func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, contentPath ipath.Path) (ipath.Resolved, string, error) { - filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) - if err != nil { - return nil, "", err - } - - pathComponents := strings.Split(contentPath.String(), "/") - - for idx := len(pathComponents); idx >= 3; idx-- { - pretty404 := gopath.Join(append(pathComponents[0:idx], filename404)...) - parsed404Path := ipath.New("/" + pretty404) - if parsed404Path.IsValid() != nil { - break - } - resolvedPath, err := i.api.ResolvePath(r.Context(), parsed404Path) - if err != nil { - continue - } - return resolvedPath, ctype, nil - } - - return nil, "", fmt.Errorf("no pretty 404 in any parent folder") -} - -func preferred404Filename(acceptHeaders []string) (string, string, error) { - // If we ever want to offer a 404 file for a different content type - // then this function will need to parse q weightings, but for now - // the presence of anything matching HTML is enough. - for _, acceptHeader := range acceptHeaders { - accepted := strings.Split(acceptHeader, ",") - for _, spec := range accepted { - contentType := strings.SplitN(spec, ";", 1)[0] - switch contentType { - case "*/*", "text/*", "text/html": - return "ipfs-404.html", "text/html", nil - } - } - } - - return "", "", fmt.Errorf("there is no 404 file for the requested content types") -} - // returns unquoted path with all special characters revealed as \u codes func debugStr(path string) string { q := fmt.Sprintf("%+q", path) @@ -889,3 +757,109 @@ func fixupSuperfluousNamespace(w http.ResponseWriter, urlPath string, urlQuery s ErrorMsg: fmt.Sprintf("invalid path: %q should be %q", urlPath, intendedPath.String()), }) == nil } + +func handledUnsupportedHeaders(w http.ResponseWriter, r *http.Request) bool { + // X-Ipfs-Gateway-Prefix was removed (https://github.com/ipfs/go-ipfs/issues/7702) + // TODO: remove this after go-ipfs 0.13 ships + if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); prfx != "" { + err := fmt.Errorf("X-Ipfs-Gateway-Prefix support was removed: https://github.com/ipfs/go-ipfs/issues/7702") + webError(w, "unsupported HTTP header", err, http.StatusBadRequest) + return true + } else { + return false + } +} + +func handledProtocolHandlerRedirect(w http.ResponseWriter, r *http.Request, logger *zap.SugaredLogger) bool { + if uriParam := r.URL.Query().Get("uri"); uriParam != "" { + u, err := url.Parse(uriParam) + if err != nil { + webError(w, "failed to parse uri query parameter", err, http.StatusBadRequest) + return true + } + if u.Scheme != "ipfs" && u.Scheme != "ipns" { + webError(w, "uri query parameter scheme must be ipfs or ipns", err, http.StatusBadRequest) + return true + } + path := u.Path + if u.RawQuery != "" { // preserve query if present + path = path + "?" + u.RawQuery + } + + redirectURL := gopath.Join("/", u.Scheme, u.Host, path) + logger.Debugw("uri param, redirect", "to", redirectURL, "status", http.StatusMovedPermanently) + http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) + return true + } + + return false +} + +// Disallow Service Worker registration on namespace roots +// https://github.com/ipfs/go-ipfs/issues/4025 +func handledInvalidServiceWorkerRegistration(w http.ResponseWriter, r *http.Request) bool { + if r.Header.Get("Service-Worker") == "script" { + matched, _ := regexp.MatchString(`^/ip[fn]s/[^/]+$`, r.URL.Path) + if matched { + err := fmt.Errorf("registration is not allowed for this scope") + webError(w, "navigator.serviceWorker", err, http.StatusBadRequest) + return true + } + } + + return false +} + +func handledSuperfluousNamespaces(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, logger *zap.SugaredLogger) bool { + if pathErr := contentPath.IsValid(); pathErr != nil { + if fixupSuperfluousNamespace(w, r.URL.Path, r.URL.RawQuery) { + // the error was due to redundant namespace, which we were able to fix + // by returning error/redirect page, nothing left to do here + logger.Debugw("redundant namespace; noop") + return true + } + // unable to fix path, returning error + webError(w, "invalid ipfs path", pathErr, http.StatusBadRequest) + return true + } + return false +} + +func (i *gatewayHandler) returnedNotModifiedForMatchingETag(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved) bool { + // Finish early if client already has matching Etag + if r.Header.Get("If-None-Match") == getEtag(r, resolvedPath.Cid()) { + w.WriteHeader(http.StatusNotModified) + return true + } + + return false +} + +// Update the global metric of the time it takes to read the final root block of the requested resource +func (i *gatewayHandler) updateFirstContentBlockMetrics(w http.ResponseWriter, r *http.Request, begin time.Time, contentPath ipath.Path, resolvedPath ipath.Resolved) bool { + // NOTE: for legacy reasons this happens before we go into content-type specific code paths + _, err := i.api.Block().Get(r.Context(), resolvedPath) + if err != nil { + webError(w, "ipfs block get "+resolvedPath.Cid().String(), err, http.StatusInternalServerError) + return false + } + ns := contentPath.Namespace() + timeToGetFirstContentBlock := time.Since(begin).Seconds() + i.unixfsGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) // deprecated, use firstContentBlockGetMetric instead + i.firstContentBlockGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) + return true +} + +func (i *gatewayHandler) setHeaders(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) bool { + i.addUserHeaders(w) // ok, _now_ write user's headers. + w.Header().Set("X-Ipfs-Path", contentPath.String()) + + if rootCids, err := i.buildIpfsRootsHeader(contentPath.String(), r); err == nil { + w.Header().Set("X-Ipfs-Roots", rootCids) + } else { // this should never happen, as we resolved the contentPath already + webError(w, "error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError) + return false + } + + return true +} diff --git a/core/corehttp/gateway_handler_unixfs.go b/core/corehttp/gateway_handler_unixfs.go index 2252b3891c6..e2ed4996e7c 100644 --- a/core/corehttp/gateway_handler_unixfs.go +++ b/core/corehttp/gateway_handler_unixfs.go @@ -1,22 +1,137 @@ package corehttp import ( + "errors" "fmt" "html" + "io" "net/http" + gopath "path" + "strconv" + "strings" "time" files "github.com/ipfs/go-ipfs-files" "github.com/ipfs/go-ipfs/tracing" + "github.com/ipfs/go-path/resolver" + coreiface "github.com/ipfs/interface-go-ipfs-core" ipath "github.com/ipfs/interface-go-ipfs-core/path" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" ) -func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { +func (i *gatewayHandler) getOrHeadHandlerUnixfs(w http.ResponseWriter, r *http.Request, begin time.Time, logger *zap.SugaredLogger) { + urlPath := r.URL.Path + + // Only look for _redirects file if we have Origin isolation + if hasOriginIsolation(r) { + // Check for _redirects file and redirect as needed + redirectsFile, err := i.getRedirectsFile(r) + if err != nil { + switch err.(type) { + case resolver.ErrNoLink: + // _redirects files doesn't exist, so don't error + default: + // TODO(JJ): During tests we get multibase.ErrUnsupportedEncoding + // This comes from multibase and I assume is due to a fake or otherwise bad CID being in the test. + // So for now any errors getting the redirect file are silently ignored. + // internalWebError(w, err) + // return + } + } else { + // _redirects file exists, so parse it and redirect + redirected, newPath, err := i.handleRedirectsFile(w, r, redirectsFile) + if err != nil { + err = fmt.Errorf("invalid _redirects file at %q: %w", redirectsFile.String(), err) + internalWebError(w, err) + return + } + + if redirected { + return + } + + // 200 is treated as a rewrite, so update the path and continue + if newPath != "" { + urlPath = newPath + } + } + } + + contentPath := ipath.New(urlPath) + + resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath) + + switch err { + case nil: + case coreiface.ErrOffline: + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable) + return + default: + // if Accept is text/html, see if ipfs-404.html is present + if i.servePretty404IfPresent(w, r, contentPath) { + logger.Debugw("serve pretty 404 if present") + return + } + + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound) + return + } + + if i.returnedNotModifiedForMatchingETag(w, r, resolvedPath) { + return + } + + // TODO(JJ): Rename to avoid negation? + if !i.updateFirstContentBlockMetrics(w, r, begin, contentPath, resolvedPath) { + return + } + + // TODO(JJ): Rename to avoid negation? + if !i.setHeaders(w, r, contentPath) { + return + } + + logger.Debugw("serving unixfs", "path", contentPath) + i.serveUnixfs(w, r, resolvedPath, contentPath, begin, logger) + return +} + +func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) bool { + resolved404Path, ctype, err := i.searchUpTreeFor404(r, contentPath) + if err != nil { + return false + } + + dr, err := i.api.Unixfs().Get(r.Context(), resolved404Path) + if err != nil { + return false + } + defer dr.Close() + + f, ok := dr.(files.File) + if !ok { + return false + } + + size, err := f.Size() + if err != nil { + return false + } + + log.Debugw("using pretty 404 file", "path", contentPath) + w.Header().Set("Content-Type", ctype) + w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) + w.WriteHeader(http.StatusNotFound) + _, err = io.CopyN(w, f, size) + return err == nil +} + +func (i *gatewayHandler) serveUnixfs(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { ctx, span := tracing.Span(r.Context(), "Gateway", "ServeUnixFs", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() + // Handling UnixFS dr, err := i.api.Unixfs().Get(ctx, resolvedPath) if err != nil { @@ -35,9 +150,128 @@ func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, res // Handling Unixfs directory dir, ok := dr.(files.Directory) if !ok { - internalWebError(w, fmt.Errorf("unsupported UnixFs type")) + internalWebError(w, fmt.Errorf("unsupported Unixfs type")) return } logger.Debugw("serving unixfs directory", "path", contentPath) i.serveDirectory(w, r, resolvedPath, contentPath, dir, begin, logger) } + +// redirect returns redirected, newPath (if rewrite), error +func (i *gatewayHandler) handleRedirectsFile(w http.ResponseWriter, r *http.Request, path ipath.Resolved) (bool, string, error) { + node, err := i.api.Unixfs().Get(r.Context(), path) + if err != nil { + return false, "", fmt.Errorf("could not get redirects file: %v", err) + } + + defer node.Close() + + f, ok := node.(files.File) + + if !ok { + return false, "", fmt.Errorf("redirect, could not convert node to file") + } + + redirs := newRedirs(f) + + // extract "file" part of URL, typically the part after /ipfs/CID/... + g := strings.Split(r.URL.Path, "/") + + if len(g) > 3 { + filePartPath := "/" + strings.Join(g[3:], "/") + + to, code := redirs.search(filePartPath) + if code > 0 { + if code == http.StatusOK { + // rewrite + newPath := strings.Join(g[0:3], "/") + "/" + to + return false, newPath, nil + } + + // redirect + http.Redirect(w, r, to, code) + return true, "", nil + } + } + + return false, "", nil +} + +// Returns a resolved path to the _redirects file located in the root CID path of the requested path +func (i *gatewayHandler) getRedirectsFile(r *http.Request) (ipath.Resolved, error) { + // r.URL.Path is the full ipfs path to the requested resource, + // regardless of whether path or subdomain resolution is used. + rootPath, err := getRootPath(r.URL.Path) + if err != nil { + return nil, err + } + + path := ipath.New(gopath.Join(rootPath, "_redirects")) + resolvedPath, err := i.api.ResolvePath(r.Context(), path) + if err != nil { + return nil, err + } + return resolvedPath, nil +} + +// Returns the root CID path for the given path +func getRootPath(path string) (string, error) { + if strings.HasPrefix(path, ipfsPathPrefix) && strings.Count(gopath.Clean(path), "/") >= 2 { + parts := strings.Split(path, "/") + return gopath.Join(ipfsPathPrefix, parts[2]), nil + } else { + return "", errors.New("failed to get root CID path") + } +} + +// TODO(JJ): I was thinking about changing this to just look at the root path as well, but the docs say it searches up +func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, contentPath ipath.Path) (ipath.Resolved, string, error) { + filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) + if err != nil { + return nil, "", err + } + + pathComponents := strings.Split(contentPath.String(), "/") + + for idx := len(pathComponents); idx >= 3; idx-- { + pretty404 := gopath.Join(append(pathComponents[0:idx], filename404)...) + parsed404Path := ipath.New("/" + pretty404) + if parsed404Path.IsValid() != nil { + break + } + resolvedPath, err := i.api.ResolvePath(r.Context(), parsed404Path) + if err != nil { + continue + } + return resolvedPath, ctype, nil + } + + return nil, "", fmt.Errorf("no pretty 404 in any parent folder") +} + +func preferred404Filename(acceptHeaders []string) (string, string, error) { + // If we ever want to offer a 404 file for a different content type + // then this function will need to parse q weightings, but for now + // the presence of anything matching HTML is enough. + for _, acceptHeader := range acceptHeaders { + accepted := strings.Split(acceptHeader, ",") + for _, spec := range accepted { + contentType := strings.SplitN(spec, ";", 1)[0] + switch contentType { + case "*/*", "text/*", "text/html": + return "ipfs-404.html", "text/html", nil + } + } + } + + return "", "", fmt.Errorf("there is no 404 file for the requested content types") +} + +// TODO(JJ): Pretty sure this is incorrect. Validate the correct approach. +func hasOriginIsolation(r *http.Request) bool { + if _, ok := r.Context().Value("gw-hostname").(string); ok { + return true + } else { + return false + } +} diff --git a/core/corehttp/gateway_handler_unixfs_redirects.go b/core/corehttp/gateway_handler_unixfs_redirects.go new file mode 100644 index 00000000000..ae30d4d4d03 --- /dev/null +++ b/core/corehttp/gateway_handler_unixfs_redirects.go @@ -0,0 +1,74 @@ +package corehttp + +import ( + "bufio" + "fmt" + "io" + "net/http" + "regexp" + "strconv" + "strings" +) + +type redirLine struct { + matcher string + to string + code int +} + +func (rdl redirLine) match(s string) (bool, error) { + re, err := regexp.Compile(rdl.matcher) + if err != nil { + return false, fmt.Errorf("failed to compile %v: %v", rdl.matcher, err) + } + + match := re.FindString(s) + if match == "" { + return false, nil + } + + return true, nil +} + +type redirs []redirLine + +func newRedirs(f io.Reader) *redirs { + ret := redirs{} + scanner := bufio.NewScanner(f) + scanner.Split(bufio.ScanLines) + for scanner.Scan() { + t := scanner.Text() + if len(t) > 0 && t[0] == '#' { + // comment, skip line + continue + } + groups := strings.Fields(scanner.Text()) + if len(groups) >= 2 { + matcher := groups[0] + to := groups[1] + // default to 302 (temporary redirect) + code := http.StatusFound + if len(groups) >= 3 { + c, err := strconv.Atoi(groups[2]) + if err == nil { + code = c + } + } + ret = append(ret, redirLine{matcher, to, code}) + } + } + + return &ret +} + +// returns "" if no redir +func (r redirs) search(path string) (string, int) { + for _, rdir := range r { + m, err := rdir.match(path) + if m && err == nil { + return rdir.to, rdir.code + } + } + + return "", 0 +} diff --git a/core/corehttp/gateway_handler_unixfs_redirects_test.go b/core/corehttp/gateway_handler_unixfs_redirects_test.go new file mode 100644 index 00000000000..0c9a6255199 --- /dev/null +++ b/core/corehttp/gateway_handler_unixfs_redirects_test.go @@ -0,0 +1,37 @@ +package corehttp + +import ( + "fmt" + "testing" +) + +func TestRedirline(t *testing.T) { + for _, tc := range []struct { + matcher string + s string + exp bool + errExp bool + }{ + {"hi", "hi", true, false}, + {"hi", "hithere", true, false}, + {"^hi$", "hithere", false, false}, + {"^hi$", "hi", true, false}, + {"hi.*", "hithere", true, false}, + {"/hi", "/hi/there", true, false}, + {"^/hi/", "/hi/there/now", true, false}, + {"^/hi/", "/hithere", false, false}, + {"^/hi/(.*", "/hi/there/now", false, true}, + } { + r := redirLine{tc.matcher, "to", 200} + ok, err := r.match(tc.s) + if ok != tc.exp { + t.Errorf("%v %v, expected %v, got %v", tc.matcher, tc.s, tc.exp, + ok) + } + + if err != nil != tc.errExp { + fmt.Printf("regexp error %v\n", err) + t.Errorf("%v %v, expected error %v, got %v", tc.matcher, tc.s, tc.errExp, err == nil) + } + } +} diff --git a/core/corehttp/redirect.go b/core/corehttp/redirect.go index e7b961e604e..121bb6a8e2a 100644 --- a/core/corehttp/redirect.go +++ b/core/corehttp/redirect.go @@ -24,5 +24,5 @@ type redirectHandler struct { } func (i *redirectHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { - http.Redirect(w, r, i.path, 302) + http.Redirect(w, r, i.path, http.StatusFound) } diff --git a/test/sharness/t0114-gateway-subdomains.sh b/test/sharness/t0114-gateway-subdomains.sh index 41abd4a8774..90d082405b3 100755 --- a/test/sharness/t0114-gateway-subdomains.sh +++ b/test/sharness/t0114-gateway-subdomains.sh @@ -144,6 +144,59 @@ test_expect_success 'start daemon with empty config for Gateway.PublicGateways' test_launch_ipfs_daemon_without_network ' +## ============================================================================ +## Test _redirects file support +## ============================================================================ + +# Directory tree crafted to test _redirects file support +test_expect_success "Add the _redirects file test directory" ' + mkdir -p testredirect/ && + echo "index.html" > testredirect/index.html && + echo "one.html" > testredirect/one.html && + echo "two.html" > testredirect/two.html && + echo "^/redirect-one$ /one.html" > testredirect/_redirects && + echo "^/301-redirect-one$ /one.html 301" >> testredirect/_redirects && + echo "^/302-redirect-two$ /two.html 302" >> testredirect/_redirects && + echo "^/200-index$ /index.html 200" >> testredirect/_redirects && + REDIRECTS_DIR_CID=$(ipfs add -Qr --cid-version 1 testredirect) +' + +REDIRECTS_DIR_HOSTNAME="${REDIRECTS_DIR_CID}.ipfs.localhost:$GWAY_PORT" + +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/redirect-one redirects with default of 302, per _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/redirect-one" > response && + test_should_contain "one.html" response && + test_should_contain "302 Found" response +' + +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/301-redirect-one redirects with 301, per _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/301-redirect-one" > response && + test_should_contain "one.html" response && + test_should_contain "301 Moved Permanently" response +' + +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/302-redirect-two redirects with 302, per _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/302-redirect-two" > response && + test_should_contain "two.html" response && + test_should_contain "302 Found" response +' + +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/200-index returns 200, per _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/200-index" > response && + test_should_contain "index.html" response && + test_should_contain "200 OK" response +' + +test_expect_success "request for $REDIRECTS_DIR_HOSTNAME/has-no-redirects-entry returns 404, since not in _redirects file" ' + curl -sD - --resolve $REDIRECTS_DIR_HOSTNAME:127.0.0.1 "http://$REDIRECTS_DIR_HOSTNAME/has-no-redirects-entry" > response && + test_should_contain "404 Not Found" response +' + +test_expect_success "request for http://127.0.0.1:$GWAY_PORT/ipfs/$REDIRECTS_DIR_CID/301-redirect-one returns 404, no _redirects since no origin isolation" ' + curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$REDIRECTS_DIR_CID/301-redirect-one" > response && + test_should_contain "404 Not Found" response +' + ## ============================================================================ ## Test path-based requests to a local gateway with default config ## (forced redirects to http://*.localhost)